atomicshop 2.18.4__py3-none-any.whl → 2.18.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of atomicshop might be problematic. Click here for more details.

atomicshop/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """Atomic Basic functions and classes to make developer life easier"""
2
2
 
3
3
  __author__ = "Den Kras"
4
- __version__ = '2.18.4'
4
+ __version__ = '2.18.5'
@@ -0,0 +1,37 @@
1
+ from typing import Union
2
+
3
+ from googleapiclient.discovery import build
4
+ import googleapiclient.errors
5
+
6
+
7
+ def search_google(
8
+ query: str,
9
+ api_key: str,
10
+ search_engine_id: str
11
+ ) -> tuple[
12
+ Union[list[str], None],
13
+ str]:
14
+ """
15
+ Function to search Google using Google Custom Search API for links related to a query.
16
+ :param query: string, the search query to search on Google Custom Search.
17
+ :param api_key: string, the API key for the Google Custom Search API.
18
+ :param search_engine_id: string, the search engine ID for the Google Custom Search API.
19
+
20
+ :return: tuple(list of strings - the links related to the query, string - the error message if any)
21
+ """
22
+
23
+ # noinspection PyTypeChecker
24
+ error: str = None
25
+
26
+ try:
27
+ service = build("customsearch", "v1", developerKey=api_key)
28
+ result = service.cse().list(q=query, cx=search_engine_id).execute()
29
+ items = result.get('items', [])
30
+ links = [item['link'] for item in items if 'link' in item]
31
+ return links, error
32
+ except googleapiclient.errors.HttpError as e:
33
+ # In case of rate limit error, return the error message.
34
+ if e.status_code == 429:
35
+ return None, str(e.reason)
36
+ else:
37
+ raise e
@@ -2,13 +2,31 @@ import os
2
2
 
3
3
  import google.generativeai as genai
4
4
 
5
+ from . import google_custom_search
6
+ from ..wrappers.playwrightw import scenarios
7
+
8
+
9
+ class GoogleCustomSearchError(Exception):
10
+ pass
11
+
5
12
 
6
13
  class GoogleLLM:
7
14
  def __init__(
8
15
  self,
9
- llm_api_key: str
16
+ llm_api_key: str,
17
+ search_api_key: str,
18
+ search_engine_id: str
10
19
  ) -> None:
20
+ """
21
+ Constructor for the GoogleLLM class.
22
+ :param llm_api_key: str, the API key for the Gemini API.
23
+ :param search_api_key: str, the API key for the Google Custom Search API.
24
+ :param search_engine_id: str, the search engine ID for the Google Custom Search API.
25
+ """
26
+
11
27
  self.genai = genai
28
+ self.search_api_key: str = search_api_key
29
+ self.search_engine_id: str = search_engine_id
12
30
 
13
31
  os.environ["API_KEY"] = llm_api_key
14
32
  genai.configure(api_key=os.environ["API_KEY"])
@@ -28,15 +46,73 @@ class GoogleLLM:
28
46
  number_of_top_links: int = 2,
29
47
  number_of_characters_per_link: int = 15000,
30
48
  temperature: float = 0,
31
- max_output_tokens: int = 4096
32
- ):
49
+ max_output_tokens: int = 4096,
50
+ model_name: str = 'gemini-pro'
51
+ ) -> str:
33
52
  """
34
53
  Function to get the answer to a question by searching Google Custom Console API and processing the content using Gemini API.
35
- :param search_query:
36
- :param additional_llm_instructions:
37
- :param number_of_top_links:
38
- :param number_of_characters_per_link:
39
- :param temperature:
40
- :param max_output_tokens:
41
- :return:
42
- """
54
+
55
+ :param search_query: string, the search query to search on Google Custom Search.
56
+ :param additional_llm_instructions: string, additional instructions to provide to the LLM.
57
+ :param number_of_top_links: integer, the number of top links to fetch content from.
58
+ :param number_of_characters_per_link: integer, the number of characters to fetch from each link.
59
+ :param temperature: float, the temperature parameter for the LLM.
60
+ :param max_output_tokens: integer, the maximum number of tokens to generate in the LLM response.
61
+ :param model_name: string, the name of the model to use for the LLM.
62
+
63
+ :return: string, the answer by LLM to the question.
64
+ """
65
+
66
+ # Search Google for links related to the query
67
+ links, search_error = google_custom_search.search_google(
68
+ query=search_query, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
69
+
70
+ if search_error:
71
+ raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
72
+
73
+ # Get only the first X links to not overload the LLM.
74
+ contents = scenarios.fetch_urls_content_in_threads(links[:number_of_top_links], number_of_characters_per_link)
75
+
76
+ combined_content = ""
77
+ for content in contents:
78
+ combined_content += f'{content}\n\n\n\n================================================================'
79
+
80
+ final_question = (f'Answer this question: {search_query}\n\n'
81
+ f'Follow these instructions: {additional_llm_instructions}\n\n'
82
+ f'Based on these data contents:\n\n'
83
+ f'{combined_content}')
84
+
85
+ # Ask Gemini to process the combined content
86
+ gemini_response = self.ask_gemini(final_question, temperature, max_output_tokens, model_name)
87
+ return gemini_response
88
+
89
+ @staticmethod
90
+ def ask_gemini(
91
+ question: str,
92
+ temperature: float,
93
+ max_output_tokens: int,
94
+ model_name: str = 'gemini-pro'
95
+ ) -> str:
96
+ """
97
+ Function to ask the Gemini API a question and get the response.
98
+ :param question: str, the question to ask the Gemini API.
99
+ :param temperature: float, the temperature parameter for the LLM.
100
+ While 0 is deterministic, higher values can lead to more creative responses.
101
+ :param max_output_tokens: int, the maximum number of tokens to generate in the LLM response.
102
+ :param model_name: str, the name of the model to use for the LLM.
103
+
104
+ :return: str, the response from the Gemini API.
105
+ """
106
+ # Model Configuration
107
+ model_config = {
108
+ "temperature": temperature,
109
+ "top_p": 0.99,
110
+ "top_k": 0,
111
+ "max_output_tokens": max_output_tokens,
112
+ }
113
+
114
+ # model = genai.GenerativeModel('gemini-1.5-pro-latest',
115
+ # noinspection PyTypeChecker
116
+ model = genai.GenerativeModel(model_name, generation_config=model_config)
117
+ response = model.generate_content(question)
118
+ return response.text
@@ -3,6 +3,11 @@ Scenarios file contains full execution scenarios of playwright wrapper.
3
3
  For example: run playwright, navigate to URL, get text from a locator.
4
4
  """
5
5
 
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+
8
+ from playwright.sync_api import sync_playwright
9
+ from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
10
+
6
11
  from . import engine, base, combos
7
12
  from ...basics import threads, multiprocesses
8
13
 
@@ -139,3 +144,68 @@ def _get_page_content_in_process(
139
144
  html_txt_convert_to_bytes=html_txt_convert_to_bytes,
140
145
  print_kwargs=print_kwargs
141
146
  )
147
+
148
+
149
+ def fetch_urls_content_in_threads(
150
+ urls: list[str],
151
+ number_of_characters_per_link: int
152
+ ) -> list[str]:
153
+ """ The function to fetch all URLs concurrently using threads """
154
+ contents = []
155
+
156
+ # Use ThreadPoolExecutor to run multiple threads
157
+ with ThreadPoolExecutor() as executor:
158
+ # Submit tasks for each URL
159
+ future_to_url = {executor.submit(_fetch_content, url, number_of_characters_per_link): url for url in urls}
160
+
161
+ # Collect results as they complete
162
+ for future in as_completed(future_to_url):
163
+ url = future_to_url[future]
164
+ try:
165
+ data = future.result()
166
+ contents.append(data)
167
+ except Exception as exc:
168
+ print(f"An error occurred when fetching {url}: {exc}")
169
+
170
+ return contents
171
+
172
+
173
+ def _fetch_content(url, number_of_characters_per_link):
174
+ """ Function to fetch content from a single URL using the synchronous Playwright API """
175
+ with sync_playwright() as p:
176
+ browser = p.chromium.launch(headless=True)
177
+ page = browser.new_page()
178
+ page.goto(url)
179
+
180
+ # Wait for the page to load using all possible methods, since there is no specific method
181
+ # that will work for all websites.
182
+ page.wait_for_load_state("load", timeout=5000)
183
+ page.wait_for_load_state("domcontentloaded", timeout=5000)
184
+ # The above is not enough, wait for network to stop loading files.
185
+ response_list: list = []
186
+ while True:
187
+ try:
188
+ # "**/*" is the wildcard for all URLs.
189
+ # 'page.expect_response' will wait for the response to be received, and then return the response object.
190
+ # When timeout is reached, it will raise a TimeoutError, which will break the while loop.
191
+ with page.expect_response("**/*", timeout=2000) as response_info:
192
+ response_list.append(response_info.value)
193
+ except PlaywrightTimeoutError:
194
+ break
195
+
196
+ # Use JavaScript to extract only the visible text from the page
197
+ text_content: str = page.evaluate("document.body.innerText")
198
+ # text = page.evaluate('document.body.textContent')
199
+ # text = page.eval_on_selector('body', 'element => element.innerText')
200
+ # text = page.eval_on_selector('body', 'element => element.textContent')
201
+ # text = page.inner_text('body')
202
+ # text = page.text_content('body')
203
+
204
+ # text = page.evaluate('document.documentElement.innerText')
205
+ # text = page.inner_text(':root')
206
+ # html = page.content()
207
+ # html = page.evaluate('document.documentElement.outerHTML')
208
+
209
+ browser.close()
210
+ # Return only the first X characters of the text content to not overload the LLM.
211
+ return text_content[:number_of_characters_per_link]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: atomicshop
3
- Version: 2.18.4
3
+ Version: 2.18.5
4
4
  Summary: Atomic functions and classes to make developer life easier
5
5
  Author: Denis Kras
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
- atomicshop/__init__.py,sha256=85MhliVqiJ5CTAxGKQv7PfsSM76Jo4Xtu8UHYW8DdFA,123
1
+ atomicshop/__init__.py,sha256=djTLkiQczxSrBfseOQFp0kfiVNR7-UsGEJSqlifxWfA,123
2
2
  atomicshop/_basics_temp.py,sha256=6cu2dd6r2dLrd1BRNcVDKTHlsHs_26Gpw8QS6v32lQ0,3699
3
3
  atomicshop/_create_pdf_demo.py,sha256=Yi-PGZuMg0RKvQmLqVeLIZYadqEZwUm-4A9JxBl_vYA,3713
4
4
  atomicshop/_patch_import.py,sha256=ENp55sKVJ0e6-4lBvZnpz9PQCt3Otbur7F6aXDlyje4,6334
@@ -177,8 +177,8 @@ atomicshop/startup/win/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
177
177
  atomicshop/startup/win/startup_folder.py,sha256=2RZEyF-Mf8eWPlt_-OaoGKKnMs6YhELEzJZ376EI0E0,1891
178
178
  atomicshop/startup/win/task_scheduler.py,sha256=qALe-8sfthYxsdCViH2r8OsH3x-WauDqteg5RzElPdk,4348
179
179
  atomicshop/web_apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
- atomicshop/web_apis/google_custom_search.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
- atomicshop/web_apis/google_llm.py,sha256=WVLqyfZHFIGEncxdBvrHCv2FbvQw40z75uMGzq9lxB4,1291
180
+ atomicshop/web_apis/google_custom_search.py,sha256=evixI7y8JYyGwurRZH03nAWdD-417VFaNe1mAtuKPNA,1310
181
+ atomicshop/web_apis/google_llm.py,sha256=UzZkPtyNA03g0xcb4vJ8imdjdNvyjUvmPUtxf9J9HnY,4898
182
182
  atomicshop/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
183
183
  atomicshop/wrappers/_process_wrapper_curl.py,sha256=XkZZXYl7D0Q6UfdWqy-18AvpU0yVp9i2BVD2qRcXlkk,841
184
184
  atomicshop/wrappers/_process_wrapper_tar.py,sha256=WUMZFKNrlG4nJP9tWZ51W7BR1j_pIjsjgyAStmWjRGs,655
@@ -275,7 +275,7 @@ atomicshop/wrappers/playwrightw/javascript.py,sha256=_bW7CAtm0Y8IHYrAalg5HpPFnk6
275
275
  atomicshop/wrappers/playwrightw/keyboard.py,sha256=zN3YddGO-qUkn6C0CRVFejP4cTuaUwXLDNFhFREjERY,422
276
276
  atomicshop/wrappers/playwrightw/locators.py,sha256=6wsLywZxDuii7mwv-zQsRbqQC8r7j96Bma5b5_7ZoVo,2411
277
277
  atomicshop/wrappers/playwrightw/mouse.py,sha256=-2FZbQtjgH7tdXWld6ZPGqlKFUdf5in0ujN0hewxa50,656
278
- atomicshop/wrappers/playwrightw/scenarios.py,sha256=OzI3SV0QgazRwMZ0hMEopDHUYG-aygBSxZ50w78lIP8,5310
278
+ atomicshop/wrappers/playwrightw/scenarios.py,sha256=RY56hH7UKvDoBr5j1JwP5xRoQtaz0AnCAkA602MurPk,8396
279
279
  atomicshop/wrappers/playwrightw/waits.py,sha256=PBFdz_PoM7Fo7O8hLqMrxNPzBEYgPoXwZceFFCGGeu8,7182
280
280
  atomicshop/wrappers/psutilw/cpus.py,sha256=w6LPBMINqS-T_X8vzdYkLS2Wzuve28Ydp_GafTCngrc,236
281
281
  atomicshop/wrappers/psutilw/disks.py,sha256=3ZSVoommKH1TWo37j_83frB-NqXF4Nf5q5mBCX8G4jE,9221
@@ -319,8 +319,8 @@ atomicshop/wrappers/socketw/ssl_base.py,sha256=kmiif84kMhBr5yjQW17p935sfjR5JKG0L
319
319
  atomicshop/wrappers/socketw/statistics_csv.py,sha256=fgMzDXI0cybwUEqAxprRmY3lqbh30KAV-jOpoFKT-m8,3395
320
320
  atomicshop/wrappers/winregw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
321
321
  atomicshop/wrappers/winregw/winreg_network.py,sha256=zZQfps-CdODQaTUADbHAwKHr5RUg7BLafnKWBbKaLN4,8728
322
- atomicshop-2.18.4.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
323
- atomicshop-2.18.4.dist-info/METADATA,sha256=Eabxa6pWlDm-BYBMU-rJ_YCVnxKylpxEYJ2GmbskdZM,10499
324
- atomicshop-2.18.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
325
- atomicshop-2.18.4.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
326
- atomicshop-2.18.4.dist-info/RECORD,,
322
+ atomicshop-2.18.5.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
323
+ atomicshop-2.18.5.dist-info/METADATA,sha256=jFNipJDLDZS4YVzhPI5lVuaMh1xDHuH2Fohc6wyNjy8,10499
324
+ atomicshop-2.18.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
325
+ atomicshop-2.18.5.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
326
+ atomicshop-2.18.5.dist-info/RECORD,,