PyPI - atomicshop - Versions diffs - 2.18.4__py3-none-any.whl → 2.18.5__py3-none-any.whl - Mend

atomicshop 2.18.4py3-none-any.whl → 2.18.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of atomicshop might be problematic. Click here for more details.

Files changed (9) hide show

atomicshop/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 """Atomic Basic functions and classes to make developer life easier"""
 __author__ = "Den Kras"
-__version__ = '2.18.4'
+__version__ = '2.18.5'

atomicshop/web_apis/google_custom_search.py CHANGED Viewed

@@ -0,0 +1,37 @@
+from typing import Union
+from googleapiclient.discovery import build
+import googleapiclient.errors
+def search_google(
+        query: str,
+        api_key: str,
+        search_engine_id: str
+) -> tuple[
+        Union[list[str], None],
+        str]:
+    """
+    Function to search Google using Google Custom Search API for links related to a query.
+    :param query: string, the search query to search on Google Custom Search.
+    :param api_key: string, the API key for the Google Custom Search API.
+    :param search_engine_id: string, the search engine ID for the Google Custom Search API.
+    :return: tuple(list of strings - the links related to the query, string - the error message if any)
+    """
+    # noinspection PyTypeChecker
+    error: str = None
+    try:
+        service = build("customsearch", "v1", developerKey=api_key)
+        result = service.cse().list(q=query, cx=search_engine_id).execute()
+        items = result.get('items', [])
+        links = [item['link'] for item in items if 'link' in item]
+        return links, error
+    except googleapiclient.errors.HttpError as e:
+        # In case of rate limit error, return the error message.
+        if e.status_code == 429:
+            return None, str(e.reason)
+        else:
+            raise e

atomicshop/web_apis/google_llm.py CHANGED Viewed

@@ -2,13 +2,31 @@ import os
 import google.generativeai as genai
+from . import google_custom_search
+from ..wrappers.playwrightw import scenarios
+class GoogleCustomSearchError(Exception):
+    pass
 class GoogleLLM:
     def __init__(
             self,
-            llm_api_key: str
+            llm_api_key: str,
+            search_api_key: str,
+            search_engine_id: str
     ) -> None:
+        """
+        Constructor for the GoogleLLM class.
+        :param llm_api_key: str, the API key for the Gemini API.
+        :param search_api_key: str, the API key for the Google Custom Search API.
+        :param search_engine_id: str, the search engine ID for the Google Custom Search API.
+        """
         self.genai = genai
+        self.search_api_key: str = search_api_key
+        self.search_engine_id: str = search_engine_id
         os.environ["API_KEY"] = llm_api_key
         genai.configure(api_key=os.environ["API_KEY"])
@@ -28,15 +46,73 @@ class GoogleLLM:
             number_of_top_links: int = 2,
             number_of_characters_per_link: int = 15000,
             temperature: float = 0,
-            max_output_tokens: int = 4096
-    ):
+            max_output_tokens: int = 4096,
+            model_name: str = 'gemini-pro'
+    ) -> str:
         """
         Function to get the answer to a question by searching Google Custom Console API and processing the content using Gemini API.
-        :param search_query:
-        :param additional_llm_instructions:
-        :param number_of_top_links:
-        :param number_of_characters_per_link:
-        :param temperature:
-        :param max_output_tokens:
-        :return:
-        """
+        :param search_query: string, the search query to search on Google Custom Search.
+        :param additional_llm_instructions: string, additional instructions to provide to the LLM.
+        :param number_of_top_links: integer, the number of top links to fetch content from.
+        :param number_of_characters_per_link: integer, the number of characters to fetch from each link.
+        :param temperature: float, the temperature parameter for the LLM.
+        :param max_output_tokens: integer, the maximum number of tokens to generate in the LLM response.
+        :param model_name: string, the name of the model to use for the LLM.
+        :return: string, the answer by LLM to the question.
+        """
+        # Search Google for links related to the query
+        links, search_error = google_custom_search.search_google(
+            query=search_query, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
+        if search_error:
+            raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
+        # Get only the first X links to not overload the LLM.
+        contents = scenarios.fetch_urls_content_in_threads(links[:number_of_top_links], number_of_characters_per_link)
+        combined_content = ""
+        for content in contents:
+            combined_content += f'{content}\n\n\n\n================================================================'
+        final_question = (f'Answer this question: {search_query}\n\n'
+                          f'Follow these instructions: {additional_llm_instructions}\n\n'
+                          f'Based on these data contents:\n\n'
+                          f'{combined_content}')
+        # Ask Gemini to process the combined content
+        gemini_response = self.ask_gemini(final_question, temperature, max_output_tokens, model_name)
+        return gemini_response
+    @staticmethod
+    def ask_gemini(
+            question: str,
+            temperature: float,
+            max_output_tokens: int,
+            model_name: str = 'gemini-pro'
+    ) -> str:
+        """
+        Function to ask the Gemini API a question and get the response.
+        :param question: str, the question to ask the Gemini API.
+        :param temperature: float, the temperature parameter for the LLM.
+            While 0 is deterministic, higher values can lead to more creative responses.
+        :param max_output_tokens: int, the maximum number of tokens to generate in the LLM response.
+        :param model_name: str, the name of the model to use for the LLM.
+        :return: str, the response from the Gemini API.
+        """
+        # Model Configuration
+        model_config = {
+            "temperature": temperature,
+            "top_p": 0.99,
+            "top_k": 0,
+            "max_output_tokens": max_output_tokens,
+        }
+        # model = genai.GenerativeModel('gemini-1.5-pro-latest',
+        # noinspection PyTypeChecker
+        model = genai.GenerativeModel(model_name, generation_config=model_config)
+        response = model.generate_content(question)
+        return response.text

atomicshop/wrappers/playwrightw/scenarios.py CHANGED Viewed

@@ -3,6 +3,11 @@ Scenarios file contains full execution scenarios of playwright wrapper.
 For example: run playwright, navigate to URL, get text from a locator.
 """
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from playwright.sync_api import sync_playwright
+from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
 from . import engine, base, combos
 from ...basics import threads, multiprocesses
@@ -139,3 +144,68 @@ def _get_page_content_in_process(
             html_txt_convert_to_bytes=html_txt_convert_to_bytes,
             print_kwargs=print_kwargs
         )
+def fetch_urls_content_in_threads(
+        urls: list[str],
+        number_of_characters_per_link: int
+) -> list[str]:
+    """ The function to fetch all URLs concurrently using threads """
+    contents = []
+    # Use ThreadPoolExecutor to run multiple threads
+    with ThreadPoolExecutor() as executor:
+        # Submit tasks for each URL
+        future_to_url = {executor.submit(_fetch_content, url, number_of_characters_per_link): url for url in urls}
+        # Collect results as they complete
+        for future in as_completed(future_to_url):
+            url = future_to_url[future]
+            try:
+                data = future.result()
+                contents.append(data)
+            except Exception as exc:
+                print(f"An error occurred when fetching {url}: {exc}")
+    return contents
+def _fetch_content(url, number_of_characters_per_link):
+    """ Function to fetch content from a single URL using the synchronous Playwright API """
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        page = browser.new_page()
+        page.goto(url)
+        # Wait for the page to load using all possible methods, since there is no specific method
+        # that will work for all websites.
+        page.wait_for_load_state("load", timeout=5000)
+        page.wait_for_load_state("domcontentloaded", timeout=5000)
+        # The above is not enough, wait for network to stop loading files.
+        response_list: list = []
+        while True:
+            try:
+                # "**/*" is the wildcard for all URLs.
+                # 'page.expect_response' will wait for the response to be received, and then return the response object.
+                # When timeout is reached, it will raise a TimeoutError, which will break the while loop.
+                with page.expect_response("**/*", timeout=2000) as response_info:
+                    response_list.append(response_info.value)
+            except PlaywrightTimeoutError:
+                break
+        # Use JavaScript to extract only the visible text from the page
+        text_content: str = page.evaluate("document.body.innerText")
+        # text = page.evaluate('document.body.textContent')
+        # text = page.eval_on_selector('body', 'element => element.innerText')
+        # text = page.eval_on_selector('body', 'element => element.textContent')
+        # text = page.inner_text('body')
+        # text = page.text_content('body')
+        # text = page.evaluate('document.documentElement.innerText')
+        # text = page.inner_text(':root')
+        # html = page.content()
+        # html = page.evaluate('document.documentElement.outerHTML')
+        browser.close()
+    # Return only the first X characters of the text content to not overload the LLM.
+    return text_content[:number_of_characters_per_link]

{atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: atomicshop
-Version: 2.18.4
+Version: 2.18.5
 Summary: Atomic functions and classes to make developer life easier
 Author: Denis Kras
 License: MIT License

{atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-atomicshop/__init__.py,sha256=85MhliVqiJ5CTAxGKQv7PfsSM76Jo4Xtu8UHYW8DdFA,123
+atomicshop/__init__.py,sha256=djTLkiQczxSrBfseOQFp0kfiVNR7-UsGEJSqlifxWfA,123
 atomicshop/_basics_temp.py,sha256=6cu2dd6r2dLrd1BRNcVDKTHlsHs_26Gpw8QS6v32lQ0,3699
 atomicshop/_create_pdf_demo.py,sha256=Yi-PGZuMg0RKvQmLqVeLIZYadqEZwUm-4A9JxBl_vYA,3713
 atomicshop/_patch_import.py,sha256=ENp55sKVJ0e6-4lBvZnpz9PQCt3Otbur7F6aXDlyje4,6334
@@ -177,8 +177,8 @@ atomicshop/startup/win/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
 atomicshop/startup/win/startup_folder.py,sha256=2RZEyF-Mf8eWPlt_-OaoGKKnMs6YhELEzJZ376EI0E0,1891
 atomicshop/startup/win/task_scheduler.py,sha256=qALe-8sfthYxsdCViH2r8OsH3x-WauDqteg5RzElPdk,4348
 atomicshop/web_apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-atomicshop/web_apis/google_custom_search.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-atomicshop/web_apis/google_llm.py,sha256=WVLqyfZHFIGEncxdBvrHCv2FbvQw40z75uMGzq9lxB4,1291
+atomicshop/web_apis/google_custom_search.py,sha256=evixI7y8JYyGwurRZH03nAWdD-417VFaNe1mAtuKPNA,1310
+atomicshop/web_apis/google_llm.py,sha256=UzZkPtyNA03g0xcb4vJ8imdjdNvyjUvmPUtxf9J9HnY,4898
 atomicshop/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 atomicshop/wrappers/_process_wrapper_curl.py,sha256=XkZZXYl7D0Q6UfdWqy-18AvpU0yVp9i2BVD2qRcXlkk,841
 atomicshop/wrappers/_process_wrapper_tar.py,sha256=WUMZFKNrlG4nJP9tWZ51W7BR1j_pIjsjgyAStmWjRGs,655
@@ -275,7 +275,7 @@ atomicshop/wrappers/playwrightw/javascript.py,sha256=_bW7CAtm0Y8IHYrAalg5HpPFnk6
 atomicshop/wrappers/playwrightw/keyboard.py,sha256=zN3YddGO-qUkn6C0CRVFejP4cTuaUwXLDNFhFREjERY,422
 atomicshop/wrappers/playwrightw/locators.py,sha256=6wsLywZxDuii7mwv-zQsRbqQC8r7j96Bma5b5_7ZoVo,2411
 atomicshop/wrappers/playwrightw/mouse.py,sha256=-2FZbQtjgH7tdXWld6ZPGqlKFUdf5in0ujN0hewxa50,656
-atomicshop/wrappers/playwrightw/scenarios.py,sha256=OzI3SV0QgazRwMZ0hMEopDHUYG-aygBSxZ50w78lIP8,5310
+atomicshop/wrappers/playwrightw/scenarios.py,sha256=RY56hH7UKvDoBr5j1JwP5xRoQtaz0AnCAkA602MurPk,8396
 atomicshop/wrappers/playwrightw/waits.py,sha256=PBFdz_PoM7Fo7O8hLqMrxNPzBEYgPoXwZceFFCGGeu8,7182
 atomicshop/wrappers/psutilw/cpus.py,sha256=w6LPBMINqS-T_X8vzdYkLS2Wzuve28Ydp_GafTCngrc,236
 atomicshop/wrappers/psutilw/disks.py,sha256=3ZSVoommKH1TWo37j_83frB-NqXF4Nf5q5mBCX8G4jE,9221
@@ -319,8 +319,8 @@ atomicshop/wrappers/socketw/ssl_base.py,sha256=kmiif84kMhBr5yjQW17p935sfjR5JKG0L
 atomicshop/wrappers/socketw/statistics_csv.py,sha256=fgMzDXI0cybwUEqAxprRmY3lqbh30KAV-jOpoFKT-m8,3395
 atomicshop/wrappers/winregw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 atomicshop/wrappers/winregw/winreg_network.py,sha256=zZQfps-CdODQaTUADbHAwKHr5RUg7BLafnKWBbKaLN4,8728
-atomicshop-2.18.4.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
-atomicshop-2.18.4.dist-info/METADATA,sha256=Eabxa6pWlDm-BYBMU-rJ_YCVnxKylpxEYJ2GmbskdZM,10499
-atomicshop-2.18.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-atomicshop-2.18.4.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
-atomicshop-2.18.4.dist-info/RECORD,,
+atomicshop-2.18.5.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
+atomicshop-2.18.5.dist-info/METADATA,sha256=jFNipJDLDZS4YVzhPI5lVuaMh1xDHuH2Fohc6wyNjy8,10499
+atomicshop-2.18.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+atomicshop-2.18.5.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
+atomicshop-2.18.5.dist-info/RECORD,,

{atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{atomicshop-2.18.4.dist-info → atomicshop-2.18.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

atomicshop 2.18.4__py3-none-any.whl → 2.18.5__py3-none-any.whl

Potentially problematic release.

atomicshop 2.18.4py3-none-any.whl → 2.18.5py3-none-any.whl