PyPI - atomicshop - Versions diffs - 2.18.18__py3-none-any.whl → 2.18.19__py3-none-any.whl - Mend

atomicshop 2.18.18py3-none-any.whl → 2.18.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of atomicshop might be problematic. Click here for more details.

Files changed (9) hide show

atomicshop/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 """Atomic Basic functions and classes to make developer life easier"""
 __author__ = "Den Kras"
-__version__ = '2.18.18'
+__version__ = '2.18.19'

atomicshop/web_apis/google_custom_search.py CHANGED Viewed

@@ -25,7 +25,14 @@ def search_google(
     try:
         service = build("customsearch", "v1", developerKey=api_key)
-        result = service.cse().list(q=query, cx=search_engine_id).execute()
+        result = service.cse().list(
+            q=query,
+            cx=search_engine_id,
+            # gl="us",  # Country code
+            # lr="lang_en",  # Language restriction
+            # safe="off",  # Safe search off
+            # dateRestrict="m1"  # Restrict results to the last month
+        ).execute()
         items = result.get('items', [])
         links = [item['link'] for item in items if 'link' in item]
         return links, error

atomicshop/web_apis/google_llm.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import os
+from typing import Literal
 import google.generativeai as genai
 from . import google_custom_search
 from ..wrappers.playwrightw import scenarios
+from .. import urls
 class GoogleCustomSearchError(Exception):
@@ -41,8 +43,16 @@ class GoogleLLM:
     def get_answer_online(
             self,
-            search_query: str,
-            additional_llm_instructions: str,
+            search_query_or_url: str,
+            text_fetch_method: Literal[
+                'playwright_text',
+                'js_text',
+                'playwright_html',
+                'js_html',
+                'playwright_copypaste'
+            ],
+            llm_query: str,
+            llm_post_instructions: str,
             number_of_top_links: int = 2,
             number_of_characters_per_link: int = 15000,
             temperature: float = 0,
@@ -52,8 +62,17 @@ class GoogleLLM:
         """
         Function to get the answer to a question by searching Google Custom Console API and processing the content using Gemini API.
-        :param search_query: string, the search query to search on Google Custom Search.
-        :param additional_llm_instructions: string, additional instructions to provide to the LLM.
+        :param search_query_or_url: string, is checked if it is a URL or a search query.
+            Search query: the search query to search on Google Custom Search.
+            URL: the URL to fetch content from without using Google Custom Search.
+        :param text_fetch_method: string, the method to fetch text from the URL.
+            playwright_text: uses native Playwright to fetch text from the URL.
+            js_text: uses Playwright and JavaScript evaluation to fetch text from the URL.
+            playwright_html: uses native Playwright to fetch HTML from the URL and then parse it to text using beautiful soup.
+            js_html: uses Playwright and JavaScript evaluation to fetch HTML from the URL and then parse it to text using beautiful soup.
+            playwright_copypaste: uses native Playwright to fetch text from the URL by copying and pasting the text from rendered page using clipboard.
+        :param llm_query: string, the question to ask the LLM about the text content that is returned from the search query or the URL.
+        :param llm_post_instructions: string, additional instructions to provide to the LLM on the answer it provided after the llm_query.
         :param number_of_top_links: integer, the number of top links to fetch content from.
         :param number_of_characters_per_link: integer, the number of characters to fetch from each link.
         :param temperature: float, the temperature parameter for the LLM.
@@ -63,22 +82,31 @@ class GoogleLLM:
         :return: string, the answer by LLM to the question.
         """
-        # Search Google for links related to the query
-        links, search_error = google_custom_search.search_google(
-            query=search_query, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
-        if search_error:
-            raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
-        # Get only the first X links to not overload the LLM.
-        contents = scenarios.fetch_urls_content_in_threads(links[:number_of_top_links], number_of_characters_per_link)
+        # Check if the search query is a URL.
+        if urls.is_valid_url(search_query_or_url):
+            # Fetch content from the URL
+            contents = scenarios.fetch_urls_content_in_threads(
+                urls=[search_query_or_url], number_of_characters_per_link=number_of_characters_per_link,
+                text_fetch_method=text_fetch_method)
+        # If not a URL, Search Google for links related to the query
+        else:
+            links, search_error = google_custom_search.search_google(
+                query=search_query_or_url, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
+            if search_error:
+                raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
+            # Get only the first X links to not overload the LLM.
+            contents = scenarios.fetch_urls_content_in_threads(
+                urls=links[:number_of_top_links], number_of_characters_per_link=number_of_characters_per_link,
+                text_fetch_method=text_fetch_method)
         combined_content = ""
         for content in contents:
             combined_content += f'{content}\n\n\n\n================================================================'
-        final_question = (f'Answer this question: {search_query}\n\n'
-                          f'Follow these instructions: {additional_llm_instructions}\n\n'
+        final_question = (f'Answer this question: {llm_query}\n\n'
+                          f'Follow these instructions: {llm_post_instructions}\n\n'
                           f'Based on these data contents:\n\n'
                           f'{combined_content}')

atomicshop/wrappers/playwrightw/scenarios.py CHANGED Viewed

@@ -4,9 +4,11 @@ For example: run playwright, navigate to URL, get text from a locator.
 """
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Literal
 from playwright.sync_api import sync_playwright
 from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
+from bs4 import BeautifulSoup
 from . import engine, base, combos
 from ...basics import threads, multiprocesses
@@ -148,7 +150,14 @@ def _get_page_content_in_process(
 def fetch_urls_content_in_threads(
         urls: list[str],
-        number_of_characters_per_link: int
+        number_of_characters_per_link: int,
+        text_fetch_method: Literal[
+            'playwright_text',
+            'js_text',
+            'playwright_html',
+            'js_html',
+            'playwright_copypaste'
+        ]
 ) -> list[str]:
     """ The function to fetch all URLs concurrently using threads """
     contents = []
@@ -156,7 +165,7 @@ def fetch_urls_content_in_threads(
     # Use ThreadPoolExecutor to run multiple threads
     with ThreadPoolExecutor() as executor:
         # Submit tasks for each URL
-        future_to_url = {executor.submit(_fetch_content, url, number_of_characters_per_link): url for url in urls}
+        future_to_url = {executor.submit(_fetch_content, url, number_of_characters_per_link, text_fetch_method): url for url in urls}
         # Collect results as they complete
         for future in as_completed(future_to_url):
@@ -172,23 +181,62 @@ def fetch_urls_content_in_threads(
 def fetch_urls_content(
         urls: list[str],
-        number_of_characters_per_link: int
+        number_of_characters_per_link: int,
+        text_fetch_method: Literal[
+            'playwright_text',
+            'js_text',
+            'playwright_html',
+            'js_html',
+            'playwright_copypaste'
+        ],
 ) -> list[str]:
     """ The function to fetch all URLs not concurrently without using threads """
     contents = []
     for url in urls:
-        data = _fetch_content(url, number_of_characters_per_link)
+        data = _fetch_content(url, number_of_characters_per_link, text_fetch_method)
         contents.append(data)
     return contents
-def _fetch_content(url, number_of_characters_per_link, headless: bool = True):
+def _fetch_content(
+        url,
+        number_of_characters_per_link,
+        text_fetch_method: Literal[
+            'playwright_text',
+            'js_text',
+            'playwright_html',
+            'js_html',
+            'playwright_copypaste'
+        ],
+        headless: bool = True):
     """ Function to fetch content from a single URL using the synchronous Playwright API """
     with sync_playwright() as p:
-        browser = p.chromium.launch(headless=headless)
-        page = browser.new_page()
+        browser = p.chromium.launch(headless=headless)  # Set headless=True if you don't want to see the browser
+        if text_fetch_method == "playwright_copypaste":
+            context = browser.new_context(permissions=["clipboard-read", "clipboard-write"])
+        else:
+            context = browser.new_context()
+        page = context.new_page()
+        # from playwright_stealth import stealth_sync
+        # stealth_sync(page)
+        # # Block specific script by URL or partial URL match
+        # def block_script(route):
+        #     if "custom.js" in route.request.url:
+        #         print(f"Blocking: {route.request.url}")
+        #         route.abort()  # Block the request
+        #     else:
+        #         route.continue_()  # Allow other requests
+        #
+        # # Intercept and handle network requests
+        # page.route("**/*", block_script)
         page.goto(url)
         # Wait for the page to load using all possible methods, since there is no specific method
@@ -207,8 +255,32 @@ def _fetch_content(url, number_of_characters_per_link, headless: bool = True):
             except PlaywrightTimeoutError:
                 break
-        # Use JavaScript to extract only the visible text from the page
-        text_content: str = page.evaluate("document.body.innerText")
+        if text_fetch_method == "playwright_text":
+            text_content = page.inner_text('body')
+        elif text_fetch_method == "js_text":
+            # Use JavaScript to extract only the visible text from the page
+            text_content: str = page.evaluate("document.body.innerText")
+        elif text_fetch_method == "playwright_html":
+            # Get the full HTML content of the page
+            html = page.content()
+            # Parse the HTML using BeautifulSoup and extract the text
+            soup = BeautifulSoup(html, 'html.parser')
+            text_content = soup.get_text()
+        elif text_fetch_method == "js_html":
+            # Use JavaScript to extract the full text from the page
+            html = page.evaluate('document.documentElement.outerHTML')
+            # Parse the HTML using BeautifulSoup and extract the text
+            soup = BeautifulSoup(html, 'html.parser')
+            text_content = soup.get_text()
+        elif text_fetch_method == "playwright_copypaste":
+            # Focus the page and simulate Ctrl+A and Ctrl+C
+            page.keyboard.press("Control+a")  # Select all text
+            page.keyboard.press("Control+c")  # Copy text to clipboard
+            # Retrieve copied text from the clipboard
+            text_content = page.evaluate("navigator.clipboard.readText()")
+        else:
+            raise ValueError(f"Invalid text_fetch_method: {text_fetch_method}")
         # text = page.evaluate('document.body.textContent')
         # text = page.eval_on_selector('body', 'element => element.innerText')
         # text = page.eval_on_selector('body', 'element => element.textContent')
@@ -217,8 +289,6 @@ def _fetch_content(url, number_of_characters_per_link, headless: bool = True):
         # text = page.evaluate('document.documentElement.innerText')
         # text = page.inner_text(':root')
-        # html = page.content()
-        # html = page.evaluate('document.documentElement.outerHTML')
         browser.close()
     # Return only the first X characters of the text content to not overload the LLM.

{atomicshop-2.18.18.dist-info → atomicshop-2.18.19.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: atomicshop
-Version: 2.18.18
+Version: 2.18.19
 Summary: Atomic functions and classes to make developer life easier
 Author: Denis Kras
 License: MIT License

{atomicshop-2.18.18.dist-info → atomicshop-2.18.19.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-atomicshop/__init__.py,sha256=eQr9Fof-xo4eR9O81aZCwDxsSgyunNqvIrJeVUh7VjA,124
+atomicshop/__init__.py,sha256=VnFf6mmsMm3Gl5pv70NpCxRaUMo5qq2TDFjlnEeDvJs,124
 atomicshop/_basics_temp.py,sha256=6cu2dd6r2dLrd1BRNcVDKTHlsHs_26Gpw8QS6v32lQ0,3699
 atomicshop/_create_pdf_demo.py,sha256=Yi-PGZuMg0RKvQmLqVeLIZYadqEZwUm-4A9JxBl_vYA,3713
 atomicshop/_patch_import.py,sha256=ENp55sKVJ0e6-4lBvZnpz9PQCt3Otbur7F6aXDlyje4,6334
@@ -177,8 +177,8 @@ atomicshop/startup/win/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
 atomicshop/startup/win/startup_folder.py,sha256=2RZEyF-Mf8eWPlt_-OaoGKKnMs6YhELEzJZ376EI0E0,1891
 atomicshop/startup/win/task_scheduler.py,sha256=qALe-8sfthYxsdCViH2r8OsH3x-WauDqteg5RzElPdk,4348
 atomicshop/web_apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-atomicshop/web_apis/google_custom_search.py,sha256=evixI7y8JYyGwurRZH03nAWdD-417VFaNe1mAtuKPNA,1310
-atomicshop/web_apis/google_llm.py,sha256=UzZkPtyNA03g0xcb4vJ8imdjdNvyjUvmPUtxf9J9HnY,4898
+atomicshop/web_apis/google_custom_search.py,sha256=R1BnUmBFWZIWkfizSRWoSYoZTdPEjLJ28F_sS2g1jGQ,1558
+atomicshop/web_apis/google_llm.py,sha256=X_sG3leUvskPCPryN6YszDFih_X2Ne0OSMA3UbDMKIg,6741
 atomicshop/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 atomicshop/wrappers/_process_wrapper_curl.py,sha256=XkZZXYl7D0Q6UfdWqy-18AvpU0yVp9i2BVD2qRcXlkk,841
 atomicshop/wrappers/_process_wrapper_tar.py,sha256=WUMZFKNrlG4nJP9tWZ51W7BR1j_pIjsjgyAStmWjRGs,655
@@ -275,7 +275,7 @@ atomicshop/wrappers/playwrightw/javascript.py,sha256=_bW7CAtm0Y8IHYrAalg5HpPFnk6
 atomicshop/wrappers/playwrightw/keyboard.py,sha256=zN3YddGO-qUkn6C0CRVFejP4cTuaUwXLDNFhFREjERY,422
 atomicshop/wrappers/playwrightw/locators.py,sha256=6wsLywZxDuii7mwv-zQsRbqQC8r7j96Bma5b5_7ZoVo,2411
 atomicshop/wrappers/playwrightw/mouse.py,sha256=-2FZbQtjgH7tdXWld6ZPGqlKFUdf5in0ujN0hewxa50,656
-atomicshop/wrappers/playwrightw/scenarios.py,sha256=HopJJ-caAHuXxH8kiJHtlcFSI-89Zx7Fc6caGPOHC2A,8786
+atomicshop/wrappers/playwrightw/scenarios.py,sha256=Xvl1jUmQhd4l0MmOUgQKfgGleblIyE-qC3wuoyx16tU,11531
 atomicshop/wrappers/playwrightw/waits.py,sha256=PBFdz_PoM7Fo7O8hLqMrxNPzBEYgPoXwZceFFCGGeu8,7182
 atomicshop/wrappers/psutilw/cpus.py,sha256=w6LPBMINqS-T_X8vzdYkLS2Wzuve28Ydp_GafTCngrc,236
 atomicshop/wrappers/psutilw/disks.py,sha256=3ZSVoommKH1TWo37j_83frB-NqXF4Nf5q5mBCX8G4jE,9221
@@ -320,8 +320,8 @@ atomicshop/wrappers/socketw/statistics_csv.py,sha256=fgMzDXI0cybwUEqAxprRmY3lqbh
 atomicshop/wrappers/winregw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 atomicshop/wrappers/winregw/winreg_installed_software.py,sha256=Qzmyktvob1qp6Tjk2DjLfAqr_yXV0sgWzdMW_9kwNjY,2345
 atomicshop/wrappers/winregw/winreg_network.py,sha256=AENV88H1qDidrcpyM9OwEZxX5svfi-Jb4N6FkS1xtqA,8851
-atomicshop-2.18.18.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
-atomicshop-2.18.18.dist-info/METADATA,sha256=8vRMxwI466-ZxRsHeJJLik4TemN7DUHdwI1gf2JmBd0,10577
-atomicshop-2.18.18.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-atomicshop-2.18.18.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
-atomicshop-2.18.18.dist-info/RECORD,,
+atomicshop-2.18.19.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
+atomicshop-2.18.19.dist-info/METADATA,sha256=laKTJjYAM6iOz-IeezdJ7sYHK8LFfJX1Zuc3Ru-Ktkg,10577
+atomicshop-2.18.19.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+atomicshop-2.18.19.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
+atomicshop-2.18.19.dist-info/RECORD,,

{atomicshop-2.18.18.dist-info → atomicshop-2.18.19.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{atomicshop-2.18.18.dist-info → atomicshop-2.18.19.dist-info}/WHEEL RENAMED Viewed

File without changes

{atomicshop-2.18.18.dist-info → atomicshop-2.18.19.dist-info}/top_level.txt RENAMED Viewed

File without changes

atomicshop 2.18.18__py3-none-any.whl → 2.18.19__py3-none-any.whl

Potentially problematic release.

atomicshop 2.18.18py3-none-any.whl → 2.18.19py3-none-any.whl