atomicshop 2.18.4__py3-none-any.whl → 2.18.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of atomicshop might be problematic. Click here for more details.
- atomicshop/__init__.py +1 -1
- atomicshop/web_apis/google_custom_search.py +37 -0
- atomicshop/web_apis/google_llm.py +87 -11
- atomicshop/wrappers/playwrightw/scenarios.py +70 -0
- {atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/METADATA +1 -1
- {atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/RECORD +9 -9
- {atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/LICENSE.txt +0 -0
- {atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/WHEEL +0 -0
- {atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/top_level.txt +0 -0
atomicshop/__init__.py
CHANGED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from googleapiclient.discovery import build
|
|
4
|
+
import googleapiclient.errors
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def search_google(
|
|
8
|
+
query: str,
|
|
9
|
+
api_key: str,
|
|
10
|
+
search_engine_id: str
|
|
11
|
+
) -> tuple[
|
|
12
|
+
Union[list[str], None],
|
|
13
|
+
str]:
|
|
14
|
+
"""
|
|
15
|
+
Function to search Google using Google Custom Search API for links related to a query.
|
|
16
|
+
:param query: string, the search query to search on Google Custom Search.
|
|
17
|
+
:param api_key: string, the API key for the Google Custom Search API.
|
|
18
|
+
:param search_engine_id: string, the search engine ID for the Google Custom Search API.
|
|
19
|
+
|
|
20
|
+
:return: tuple(list of strings - the links related to the query, string - the error message if any)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# noinspection PyTypeChecker
|
|
24
|
+
error: str = None
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
service = build("customsearch", "v1", developerKey=api_key)
|
|
28
|
+
result = service.cse().list(q=query, cx=search_engine_id).execute()
|
|
29
|
+
items = result.get('items', [])
|
|
30
|
+
links = [item['link'] for item in items if 'link' in item]
|
|
31
|
+
return links, error
|
|
32
|
+
except googleapiclient.errors.HttpError as e:
|
|
33
|
+
# In case of rate limit error, return the error message.
|
|
34
|
+
if e.status_code == 429:
|
|
35
|
+
return None, str(e.reason)
|
|
36
|
+
else:
|
|
37
|
+
raise e
|
|
@@ -2,13 +2,31 @@ import os
|
|
|
2
2
|
|
|
3
3
|
import google.generativeai as genai
|
|
4
4
|
|
|
5
|
+
from . import google_custom_search
|
|
6
|
+
from ..wrappers.playwrightw import scenarios
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GoogleCustomSearchError(Exception):
|
|
10
|
+
pass
|
|
11
|
+
|
|
5
12
|
|
|
6
13
|
class GoogleLLM:
|
|
7
14
|
def __init__(
|
|
8
15
|
self,
|
|
9
|
-
llm_api_key: str
|
|
16
|
+
llm_api_key: str,
|
|
17
|
+
search_api_key: str,
|
|
18
|
+
search_engine_id: str
|
|
10
19
|
) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Constructor for the GoogleLLM class.
|
|
22
|
+
:param llm_api_key: str, the API key for the Gemini API.
|
|
23
|
+
:param search_api_key: str, the API key for the Google Custom Search API.
|
|
24
|
+
:param search_engine_id: str, the search engine ID for the Google Custom Search API.
|
|
25
|
+
"""
|
|
26
|
+
|
|
11
27
|
self.genai = genai
|
|
28
|
+
self.search_api_key: str = search_api_key
|
|
29
|
+
self.search_engine_id: str = search_engine_id
|
|
12
30
|
|
|
13
31
|
os.environ["API_KEY"] = llm_api_key
|
|
14
32
|
genai.configure(api_key=os.environ["API_KEY"])
|
|
@@ -28,15 +46,73 @@ class GoogleLLM:
|
|
|
28
46
|
number_of_top_links: int = 2,
|
|
29
47
|
number_of_characters_per_link: int = 15000,
|
|
30
48
|
temperature: float = 0,
|
|
31
|
-
max_output_tokens: int = 4096
|
|
32
|
-
|
|
49
|
+
max_output_tokens: int = 4096,
|
|
50
|
+
model_name: str = 'gemini-pro'
|
|
51
|
+
) -> str:
|
|
33
52
|
"""
|
|
34
53
|
Function to get the answer to a question by searching Google Custom Console API and processing the content using Gemini API.
|
|
35
|
-
|
|
36
|
-
:param
|
|
37
|
-
:param
|
|
38
|
-
:param
|
|
39
|
-
:param
|
|
40
|
-
:param
|
|
41
|
-
:
|
|
42
|
-
|
|
54
|
+
|
|
55
|
+
:param search_query: string, the search query to search on Google Custom Search.
|
|
56
|
+
:param additional_llm_instructions: string, additional instructions to provide to the LLM.
|
|
57
|
+
:param number_of_top_links: integer, the number of top links to fetch content from.
|
|
58
|
+
:param number_of_characters_per_link: integer, the number of characters to fetch from each link.
|
|
59
|
+
:param temperature: float, the temperature parameter for the LLM.
|
|
60
|
+
:param max_output_tokens: integer, the maximum number of tokens to generate in the LLM response.
|
|
61
|
+
:param model_name: string, the name of the model to use for the LLM.
|
|
62
|
+
|
|
63
|
+
:return: string, the answer by LLM to the question.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
# Search Google for links related to the query
|
|
67
|
+
links, search_error = google_custom_search.search_google(
|
|
68
|
+
query=search_query, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
|
|
69
|
+
|
|
70
|
+
if search_error:
|
|
71
|
+
raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
|
|
72
|
+
|
|
73
|
+
# Get only the first X links to not overload the LLM.
|
|
74
|
+
contents = scenarios.fetch_urls_content_in_threads(links[:number_of_top_links], number_of_characters_per_link)
|
|
75
|
+
|
|
76
|
+
combined_content = ""
|
|
77
|
+
for content in contents:
|
|
78
|
+
combined_content += f'{content}\n\n\n\n================================================================'
|
|
79
|
+
|
|
80
|
+
final_question = (f'Answer this question: {search_query}\n\n'
|
|
81
|
+
f'Follow these instructions: {additional_llm_instructions}\n\n'
|
|
82
|
+
f'Based on these data contents:\n\n'
|
|
83
|
+
f'{combined_content}')
|
|
84
|
+
|
|
85
|
+
# Ask Gemini to process the combined content
|
|
86
|
+
gemini_response = self.ask_gemini(final_question, temperature, max_output_tokens, model_name)
|
|
87
|
+
return gemini_response
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def ask_gemini(
|
|
91
|
+
question: str,
|
|
92
|
+
temperature: float,
|
|
93
|
+
max_output_tokens: int,
|
|
94
|
+
model_name: str = 'gemini-pro'
|
|
95
|
+
) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Function to ask the Gemini API a question and get the response.
|
|
98
|
+
:param question: str, the question to ask the Gemini API.
|
|
99
|
+
:param temperature: float, the temperature parameter for the LLM.
|
|
100
|
+
While 0 is deterministic, higher values can lead to more creative responses.
|
|
101
|
+
:param max_output_tokens: int, the maximum number of tokens to generate in the LLM response.
|
|
102
|
+
:param model_name: str, the name of the model to use for the LLM.
|
|
103
|
+
|
|
104
|
+
:return: str, the response from the Gemini API.
|
|
105
|
+
"""
|
|
106
|
+
# Model Configuration
|
|
107
|
+
model_config = {
|
|
108
|
+
"temperature": temperature,
|
|
109
|
+
"top_p": 0.99,
|
|
110
|
+
"top_k": 0,
|
|
111
|
+
"max_output_tokens": max_output_tokens,
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
# model = genai.GenerativeModel('gemini-1.5-pro-latest',
|
|
115
|
+
# noinspection PyTypeChecker
|
|
116
|
+
model = genai.GenerativeModel(model_name, generation_config=model_config)
|
|
117
|
+
response = model.generate_content(question)
|
|
118
|
+
return response.text
|
|
@@ -3,6 +3,11 @@ Scenarios file contains full execution scenarios of playwright wrapper.
|
|
|
3
3
|
For example: run playwright, navigate to URL, get text from a locator.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
|
+
|
|
8
|
+
from playwright.sync_api import sync_playwright
|
|
9
|
+
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
|
10
|
+
|
|
6
11
|
from . import engine, base, combos
|
|
7
12
|
from ...basics import threads, multiprocesses
|
|
8
13
|
|
|
@@ -139,3 +144,68 @@ def _get_page_content_in_process(
|
|
|
139
144
|
html_txt_convert_to_bytes=html_txt_convert_to_bytes,
|
|
140
145
|
print_kwargs=print_kwargs
|
|
141
146
|
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def fetch_urls_content_in_threads(
|
|
150
|
+
urls: list[str],
|
|
151
|
+
number_of_characters_per_link: int
|
|
152
|
+
) -> list[str]:
|
|
153
|
+
""" The function to fetch all URLs concurrently using threads """
|
|
154
|
+
contents = []
|
|
155
|
+
|
|
156
|
+
# Use ThreadPoolExecutor to run multiple threads
|
|
157
|
+
with ThreadPoolExecutor() as executor:
|
|
158
|
+
# Submit tasks for each URL
|
|
159
|
+
future_to_url = {executor.submit(_fetch_content, url, number_of_characters_per_link): url for url in urls}
|
|
160
|
+
|
|
161
|
+
# Collect results as they complete
|
|
162
|
+
for future in as_completed(future_to_url):
|
|
163
|
+
url = future_to_url[future]
|
|
164
|
+
try:
|
|
165
|
+
data = future.result()
|
|
166
|
+
contents.append(data)
|
|
167
|
+
except Exception as exc:
|
|
168
|
+
print(f"An error occurred when fetching {url}: {exc}")
|
|
169
|
+
|
|
170
|
+
return contents
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _fetch_content(url, number_of_characters_per_link):
|
|
174
|
+
""" Function to fetch content from a single URL using the synchronous Playwright API """
|
|
175
|
+
with sync_playwright() as p:
|
|
176
|
+
browser = p.chromium.launch(headless=True)
|
|
177
|
+
page = browser.new_page()
|
|
178
|
+
page.goto(url)
|
|
179
|
+
|
|
180
|
+
# Wait for the page to load using all possible methods, since there is no specific method
|
|
181
|
+
# that will work for all websites.
|
|
182
|
+
page.wait_for_load_state("load", timeout=5000)
|
|
183
|
+
page.wait_for_load_state("domcontentloaded", timeout=5000)
|
|
184
|
+
# The above is not enough, wait for network to stop loading files.
|
|
185
|
+
response_list: list = []
|
|
186
|
+
while True:
|
|
187
|
+
try:
|
|
188
|
+
# "**/*" is the wildcard for all URLs.
|
|
189
|
+
# 'page.expect_response' will wait for the response to be received, and then return the response object.
|
|
190
|
+
# When timeout is reached, it will raise a TimeoutError, which will break the while loop.
|
|
191
|
+
with page.expect_response("**/*", timeout=2000) as response_info:
|
|
192
|
+
response_list.append(response_info.value)
|
|
193
|
+
except PlaywrightTimeoutError:
|
|
194
|
+
break
|
|
195
|
+
|
|
196
|
+
# Use JavaScript to extract only the visible text from the page
|
|
197
|
+
text_content: str = page.evaluate("document.body.innerText")
|
|
198
|
+
# text = page.evaluate('document.body.textContent')
|
|
199
|
+
# text = page.eval_on_selector('body', 'element => element.innerText')
|
|
200
|
+
# text = page.eval_on_selector('body', 'element => element.textContent')
|
|
201
|
+
# text = page.inner_text('body')
|
|
202
|
+
# text = page.text_content('body')
|
|
203
|
+
|
|
204
|
+
# text = page.evaluate('document.documentElement.innerText')
|
|
205
|
+
# text = page.inner_text(':root')
|
|
206
|
+
# html = page.content()
|
|
207
|
+
# html = page.evaluate('document.documentElement.outerHTML')
|
|
208
|
+
|
|
209
|
+
browser.close()
|
|
210
|
+
# Return only the first X characters of the text content to not overload the LLM.
|
|
211
|
+
return text_content[:number_of_characters_per_link]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
atomicshop/__init__.py,sha256=
|
|
1
|
+
atomicshop/__init__.py,sha256=djTLkiQczxSrBfseOQFp0kfiVNR7-UsGEJSqlifxWfA,123
|
|
2
2
|
atomicshop/_basics_temp.py,sha256=6cu2dd6r2dLrd1BRNcVDKTHlsHs_26Gpw8QS6v32lQ0,3699
|
|
3
3
|
atomicshop/_create_pdf_demo.py,sha256=Yi-PGZuMg0RKvQmLqVeLIZYadqEZwUm-4A9JxBl_vYA,3713
|
|
4
4
|
atomicshop/_patch_import.py,sha256=ENp55sKVJ0e6-4lBvZnpz9PQCt3Otbur7F6aXDlyje4,6334
|
|
@@ -177,8 +177,8 @@ atomicshop/startup/win/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
|
|
|
177
177
|
atomicshop/startup/win/startup_folder.py,sha256=2RZEyF-Mf8eWPlt_-OaoGKKnMs6YhELEzJZ376EI0E0,1891
|
|
178
178
|
atomicshop/startup/win/task_scheduler.py,sha256=qALe-8sfthYxsdCViH2r8OsH3x-WauDqteg5RzElPdk,4348
|
|
179
179
|
atomicshop/web_apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
|
-
atomicshop/web_apis/google_custom_search.py,sha256=
|
|
181
|
-
atomicshop/web_apis/google_llm.py,sha256=
|
|
180
|
+
atomicshop/web_apis/google_custom_search.py,sha256=evixI7y8JYyGwurRZH03nAWdD-417VFaNe1mAtuKPNA,1310
|
|
181
|
+
atomicshop/web_apis/google_llm.py,sha256=UzZkPtyNA03g0xcb4vJ8imdjdNvyjUvmPUtxf9J9HnY,4898
|
|
182
182
|
atomicshop/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
183
183
|
atomicshop/wrappers/_process_wrapper_curl.py,sha256=XkZZXYl7D0Q6UfdWqy-18AvpU0yVp9i2BVD2qRcXlkk,841
|
|
184
184
|
atomicshop/wrappers/_process_wrapper_tar.py,sha256=WUMZFKNrlG4nJP9tWZ51W7BR1j_pIjsjgyAStmWjRGs,655
|
|
@@ -275,7 +275,7 @@ atomicshop/wrappers/playwrightw/javascript.py,sha256=_bW7CAtm0Y8IHYrAalg5HpPFnk6
|
|
|
275
275
|
atomicshop/wrappers/playwrightw/keyboard.py,sha256=zN3YddGO-qUkn6C0CRVFejP4cTuaUwXLDNFhFREjERY,422
|
|
276
276
|
atomicshop/wrappers/playwrightw/locators.py,sha256=6wsLywZxDuii7mwv-zQsRbqQC8r7j96Bma5b5_7ZoVo,2411
|
|
277
277
|
atomicshop/wrappers/playwrightw/mouse.py,sha256=-2FZbQtjgH7tdXWld6ZPGqlKFUdf5in0ujN0hewxa50,656
|
|
278
|
-
atomicshop/wrappers/playwrightw/scenarios.py,sha256=
|
|
278
|
+
atomicshop/wrappers/playwrightw/scenarios.py,sha256=RY56hH7UKvDoBr5j1JwP5xRoQtaz0AnCAkA602MurPk,8396
|
|
279
279
|
atomicshop/wrappers/playwrightw/waits.py,sha256=PBFdz_PoM7Fo7O8hLqMrxNPzBEYgPoXwZceFFCGGeu8,7182
|
|
280
280
|
atomicshop/wrappers/psutilw/cpus.py,sha256=w6LPBMINqS-T_X8vzdYkLS2Wzuve28Ydp_GafTCngrc,236
|
|
281
281
|
atomicshop/wrappers/psutilw/disks.py,sha256=3ZSVoommKH1TWo37j_83frB-NqXF4Nf5q5mBCX8G4jE,9221
|
|
@@ -319,8 +319,8 @@ atomicshop/wrappers/socketw/ssl_base.py,sha256=kmiif84kMhBr5yjQW17p935sfjR5JKG0L
|
|
|
319
319
|
atomicshop/wrappers/socketw/statistics_csv.py,sha256=fgMzDXI0cybwUEqAxprRmY3lqbh30KAV-jOpoFKT-m8,3395
|
|
320
320
|
atomicshop/wrappers/winregw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
321
321
|
atomicshop/wrappers/winregw/winreg_network.py,sha256=zZQfps-CdODQaTUADbHAwKHr5RUg7BLafnKWBbKaLN4,8728
|
|
322
|
-
atomicshop-2.18.
|
|
323
|
-
atomicshop-2.18.
|
|
324
|
-
atomicshop-2.18.
|
|
325
|
-
atomicshop-2.18.
|
|
326
|
-
atomicshop-2.18.
|
|
322
|
+
atomicshop-2.18.5.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
|
|
323
|
+
atomicshop-2.18.5.dist-info/METADATA,sha256=jFNipJDLDZS4YVzhPI5lVuaMh1xDHuH2Fohc6wyNjy8,10499
|
|
324
|
+
atomicshop-2.18.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
325
|
+
atomicshop-2.18.5.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
|
|
326
|
+
atomicshop-2.18.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|