atomicshop 2.18.18__py3-none-any.whl → 2.18.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of atomicshop might be problematic. Click here for more details.

atomicshop/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """Atomic Basic functions and classes to make developer life easier"""
2
2
 
3
3
  __author__ = "Den Kras"
4
- __version__ = '2.18.18'
4
+ __version__ = '2.18.19'
@@ -25,7 +25,14 @@ def search_google(
25
25
 
26
26
  try:
27
27
  service = build("customsearch", "v1", developerKey=api_key)
28
- result = service.cse().list(q=query, cx=search_engine_id).execute()
28
+ result = service.cse().list(
29
+ q=query,
30
+ cx=search_engine_id,
31
+ # gl="us", # Country code
32
+ # lr="lang_en", # Language restriction
33
+ # safe="off", # Safe search off
34
+ # dateRestrict="m1" # Restrict results to the last month
35
+ ).execute()
29
36
  items = result.get('items', [])
30
37
  links = [item['link'] for item in items if 'link' in item]
31
38
  return links, error
@@ -1,9 +1,11 @@
1
1
  import os
2
+ from typing import Literal
2
3
 
3
4
  import google.generativeai as genai
4
5
 
5
6
  from . import google_custom_search
6
7
  from ..wrappers.playwrightw import scenarios
8
+ from .. import urls
7
9
 
8
10
 
9
11
  class GoogleCustomSearchError(Exception):
@@ -41,8 +43,16 @@ class GoogleLLM:
41
43
 
42
44
  def get_answer_online(
43
45
  self,
44
- search_query: str,
45
- additional_llm_instructions: str,
46
+ search_query_or_url: str,
47
+ text_fetch_method: Literal[
48
+ 'playwright_text',
49
+ 'js_text',
50
+ 'playwright_html',
51
+ 'js_html',
52
+ 'playwright_copypaste'
53
+ ],
54
+ llm_query: str,
55
+ llm_post_instructions: str,
46
56
  number_of_top_links: int = 2,
47
57
  number_of_characters_per_link: int = 15000,
48
58
  temperature: float = 0,
@@ -52,8 +62,17 @@ class GoogleLLM:
52
62
  """
53
63
  Function to get the answer to a question by searching Google Custom Console API and processing the content using Gemini API.
54
64
 
55
- :param search_query: string, the search query to search on Google Custom Search.
56
- :param additional_llm_instructions: string, additional instructions to provide to the LLM.
65
+ :param search_query_or_url: string, is checked if it is a URL or a search query.
66
+ Search query: the search query to search on Google Custom Search.
67
+ URL: the URL to fetch content from without using Google Custom Search.
68
+ :param text_fetch_method: string, the method to fetch text from the URL.
69
+ playwright_text: uses native Playwright to fetch text from the URL.
70
+ js_text: uses Playwright and JavaScript evaluation to fetch text from the URL.
71
+ playwright_html: uses native Playwright to fetch HTML from the URL and then parse it to text using beautiful soup.
72
+ js_html: uses Playwright and JavaScript evaluation to fetch HTML from the URL and then parse it to text using beautiful soup.
73
+ playwright_copypaste: uses native Playwright to fetch text from the URL by copying and pasting the text from rendered page using clipboard.
74
+ :param llm_query: string, the question to ask the LLM about the text content that is returned from the search query or the URL.
75
+ :param llm_post_instructions: string, additional instructions to provide to the LLM on the answer it provided after the llm_query.
57
76
  :param number_of_top_links: integer, the number of top links to fetch content from.
58
77
  :param number_of_characters_per_link: integer, the number of characters to fetch from each link.
59
78
  :param temperature: float, the temperature parameter for the LLM.
@@ -63,22 +82,31 @@ class GoogleLLM:
63
82
  :return: string, the answer by LLM to the question.
64
83
  """
65
84
 
66
- # Search Google for links related to the query
67
- links, search_error = google_custom_search.search_google(
68
- query=search_query, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
69
-
70
- if search_error:
71
- raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
72
-
73
- # Get only the first X links to not overload the LLM.
74
- contents = scenarios.fetch_urls_content_in_threads(links[:number_of_top_links], number_of_characters_per_link)
85
+ # Check if the search query is a URL.
86
+ if urls.is_valid_url(search_query_or_url):
87
+ # Fetch content from the URL
88
+ contents = scenarios.fetch_urls_content_in_threads(
89
+ urls=[search_query_or_url], number_of_characters_per_link=number_of_characters_per_link,
90
+ text_fetch_method=text_fetch_method)
91
+ # If not a URL, Search Google for links related to the query
92
+ else:
93
+ links, search_error = google_custom_search.search_google(
94
+ query=search_query_or_url, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
95
+
96
+ if search_error:
97
+ raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
98
+
99
+ # Get only the first X links to not overload the LLM.
100
+ contents = scenarios.fetch_urls_content_in_threads(
101
+ urls=links[:number_of_top_links], number_of_characters_per_link=number_of_characters_per_link,
102
+ text_fetch_method=text_fetch_method)
75
103
 
76
104
  combined_content = ""
77
105
  for content in contents:
78
106
  combined_content += f'{content}\n\n\n\n================================================================'
79
107
 
80
- final_question = (f'Answer this question: {search_query}\n\n'
81
- f'Follow these instructions: {additional_llm_instructions}\n\n'
108
+ final_question = (f'Answer this question: {llm_query}\n\n'
109
+ f'Follow these instructions: {llm_post_instructions}\n\n'
82
110
  f'Based on these data contents:\n\n'
83
111
  f'{combined_content}')
84
112
 
@@ -4,9 +4,11 @@ For example: run playwright, navigate to URL, get text from a locator.
4
4
  """
5
5
 
6
6
  from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from typing import Literal
7
8
 
8
9
  from playwright.sync_api import sync_playwright
9
10
  from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
11
+ from bs4 import BeautifulSoup
10
12
 
11
13
  from . import engine, base, combos
12
14
  from ...basics import threads, multiprocesses
@@ -148,7 +150,14 @@ def _get_page_content_in_process(
148
150
 
149
151
  def fetch_urls_content_in_threads(
150
152
  urls: list[str],
151
- number_of_characters_per_link: int
153
+ number_of_characters_per_link: int,
154
+ text_fetch_method: Literal[
155
+ 'playwright_text',
156
+ 'js_text',
157
+ 'playwright_html',
158
+ 'js_html',
159
+ 'playwright_copypaste'
160
+ ]
152
161
  ) -> list[str]:
153
162
  """ The function to fetch all URLs concurrently using threads """
154
163
  contents = []
@@ -156,7 +165,7 @@ def fetch_urls_content_in_threads(
156
165
  # Use ThreadPoolExecutor to run multiple threads
157
166
  with ThreadPoolExecutor() as executor:
158
167
  # Submit tasks for each URL
159
- future_to_url = {executor.submit(_fetch_content, url, number_of_characters_per_link): url for url in urls}
168
+ future_to_url = {executor.submit(_fetch_content, url, number_of_characters_per_link, text_fetch_method): url for url in urls}
160
169
 
161
170
  # Collect results as they complete
162
171
  for future in as_completed(future_to_url):
@@ -172,23 +181,62 @@ def fetch_urls_content_in_threads(
172
181
 
173
182
  def fetch_urls_content(
174
183
  urls: list[str],
175
- number_of_characters_per_link: int
184
+ number_of_characters_per_link: int,
185
+ text_fetch_method: Literal[
186
+ 'playwright_text',
187
+ 'js_text',
188
+ 'playwright_html',
189
+ 'js_html',
190
+ 'playwright_copypaste'
191
+ ],
176
192
  ) -> list[str]:
177
193
  """ The function to fetch all URLs not concurrently without using threads """
178
194
  contents = []
179
195
 
180
196
  for url in urls:
181
- data = _fetch_content(url, number_of_characters_per_link)
197
+ data = _fetch_content(url, number_of_characters_per_link, text_fetch_method)
182
198
  contents.append(data)
183
199
 
184
200
  return contents
185
201
 
186
202
 
187
- def _fetch_content(url, number_of_characters_per_link, headless: bool = True):
203
+ def _fetch_content(
204
+ url,
205
+ number_of_characters_per_link,
206
+ text_fetch_method: Literal[
207
+ 'playwright_text',
208
+ 'js_text',
209
+ 'playwright_html',
210
+ 'js_html',
211
+ 'playwright_copypaste'
212
+ ],
213
+ headless: bool = True):
188
214
  """ Function to fetch content from a single URL using the synchronous Playwright API """
215
+
189
216
  with sync_playwright() as p:
190
- browser = p.chromium.launch(headless=headless)
191
- page = browser.new_page()
217
+ browser = p.chromium.launch(headless=headless) # Set headless=True if you don't want to see the browser
218
+
219
+ if text_fetch_method == "playwright_copypaste":
220
+ context = browser.new_context(permissions=["clipboard-read", "clipboard-write"])
221
+ else:
222
+ context = browser.new_context()
223
+
224
+ page = context.new_page()
225
+
226
+ # from playwright_stealth import stealth_sync
227
+ # stealth_sync(page)
228
+
229
+ # # Block specific script by URL or partial URL match
230
+ # def block_script(route):
231
+ # if "custom.js" in route.request.url:
232
+ # print(f"Blocking: {route.request.url}")
233
+ # route.abort() # Block the request
234
+ # else:
235
+ # route.continue_() # Allow other requests
236
+ #
237
+ # # Intercept and handle network requests
238
+ # page.route("**/*", block_script)
239
+
192
240
  page.goto(url)
193
241
 
194
242
  # Wait for the page to load using all possible methods, since there is no specific method
@@ -207,8 +255,32 @@ def _fetch_content(url, number_of_characters_per_link, headless: bool = True):
207
255
  except PlaywrightTimeoutError:
208
256
  break
209
257
 
210
- # Use JavaScript to extract only the visible text from the page
211
- text_content: str = page.evaluate("document.body.innerText")
258
+ if text_fetch_method == "playwright_text":
259
+ text_content = page.inner_text('body')
260
+ elif text_fetch_method == "js_text":
261
+ # Use JavaScript to extract only the visible text from the page
262
+ text_content: str = page.evaluate("document.body.innerText")
263
+ elif text_fetch_method == "playwright_html":
264
+ # Get the full HTML content of the page
265
+ html = page.content()
266
+ # Parse the HTML using BeautifulSoup and extract the text
267
+ soup = BeautifulSoup(html, 'html.parser')
268
+ text_content = soup.get_text()
269
+ elif text_fetch_method == "js_html":
270
+ # Use JavaScript to extract the full text from the page
271
+ html = page.evaluate('document.documentElement.outerHTML')
272
+ # Parse the HTML using BeautifulSoup and extract the text
273
+ soup = BeautifulSoup(html, 'html.parser')
274
+ text_content = soup.get_text()
275
+ elif text_fetch_method == "playwright_copypaste":
276
+ # Focus the page and simulate Ctrl+A and Ctrl+C
277
+ page.keyboard.press("Control+a") # Select all text
278
+ page.keyboard.press("Control+c") # Copy text to clipboard
279
+ # Retrieve copied text from the clipboard
280
+ text_content = page.evaluate("navigator.clipboard.readText()")
281
+ else:
282
+ raise ValueError(f"Invalid text_fetch_method: {text_fetch_method}")
283
+
212
284
  # text = page.evaluate('document.body.textContent')
213
285
  # text = page.eval_on_selector('body', 'element => element.innerText')
214
286
  # text = page.eval_on_selector('body', 'element => element.textContent')
@@ -217,8 +289,6 @@ def _fetch_content(url, number_of_characters_per_link, headless: bool = True):
217
289
 
218
290
  # text = page.evaluate('document.documentElement.innerText')
219
291
  # text = page.inner_text(':root')
220
- # html = page.content()
221
- # html = page.evaluate('document.documentElement.outerHTML')
222
292
 
223
293
  browser.close()
224
294
  # Return only the first X characters of the text content to not overload the LLM.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: atomicshop
3
- Version: 2.18.18
3
+ Version: 2.18.19
4
4
  Summary: Atomic functions and classes to make developer life easier
5
5
  Author: Denis Kras
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
- atomicshop/__init__.py,sha256=eQr9Fof-xo4eR9O81aZCwDxsSgyunNqvIrJeVUh7VjA,124
1
+ atomicshop/__init__.py,sha256=VnFf6mmsMm3Gl5pv70NpCxRaUMo5qq2TDFjlnEeDvJs,124
2
2
  atomicshop/_basics_temp.py,sha256=6cu2dd6r2dLrd1BRNcVDKTHlsHs_26Gpw8QS6v32lQ0,3699
3
3
  atomicshop/_create_pdf_demo.py,sha256=Yi-PGZuMg0RKvQmLqVeLIZYadqEZwUm-4A9JxBl_vYA,3713
4
4
  atomicshop/_patch_import.py,sha256=ENp55sKVJ0e6-4lBvZnpz9PQCt3Otbur7F6aXDlyje4,6334
@@ -177,8 +177,8 @@ atomicshop/startup/win/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
177
177
  atomicshop/startup/win/startup_folder.py,sha256=2RZEyF-Mf8eWPlt_-OaoGKKnMs6YhELEzJZ376EI0E0,1891
178
178
  atomicshop/startup/win/task_scheduler.py,sha256=qALe-8sfthYxsdCViH2r8OsH3x-WauDqteg5RzElPdk,4348
179
179
  atomicshop/web_apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
180
- atomicshop/web_apis/google_custom_search.py,sha256=evixI7y8JYyGwurRZH03nAWdD-417VFaNe1mAtuKPNA,1310
181
- atomicshop/web_apis/google_llm.py,sha256=UzZkPtyNA03g0xcb4vJ8imdjdNvyjUvmPUtxf9J9HnY,4898
180
+ atomicshop/web_apis/google_custom_search.py,sha256=R1BnUmBFWZIWkfizSRWoSYoZTdPEjLJ28F_sS2g1jGQ,1558
181
+ atomicshop/web_apis/google_llm.py,sha256=X_sG3leUvskPCPryN6YszDFih_X2Ne0OSMA3UbDMKIg,6741
182
182
  atomicshop/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
183
183
  atomicshop/wrappers/_process_wrapper_curl.py,sha256=XkZZXYl7D0Q6UfdWqy-18AvpU0yVp9i2BVD2qRcXlkk,841
184
184
  atomicshop/wrappers/_process_wrapper_tar.py,sha256=WUMZFKNrlG4nJP9tWZ51W7BR1j_pIjsjgyAStmWjRGs,655
@@ -275,7 +275,7 @@ atomicshop/wrappers/playwrightw/javascript.py,sha256=_bW7CAtm0Y8IHYrAalg5HpPFnk6
275
275
  atomicshop/wrappers/playwrightw/keyboard.py,sha256=zN3YddGO-qUkn6C0CRVFejP4cTuaUwXLDNFhFREjERY,422
276
276
  atomicshop/wrappers/playwrightw/locators.py,sha256=6wsLywZxDuii7mwv-zQsRbqQC8r7j96Bma5b5_7ZoVo,2411
277
277
  atomicshop/wrappers/playwrightw/mouse.py,sha256=-2FZbQtjgH7tdXWld6ZPGqlKFUdf5in0ujN0hewxa50,656
278
- atomicshop/wrappers/playwrightw/scenarios.py,sha256=HopJJ-caAHuXxH8kiJHtlcFSI-89Zx7Fc6caGPOHC2A,8786
278
+ atomicshop/wrappers/playwrightw/scenarios.py,sha256=Xvl1jUmQhd4l0MmOUgQKfgGleblIyE-qC3wuoyx16tU,11531
279
279
  atomicshop/wrappers/playwrightw/waits.py,sha256=PBFdz_PoM7Fo7O8hLqMrxNPzBEYgPoXwZceFFCGGeu8,7182
280
280
  atomicshop/wrappers/psutilw/cpus.py,sha256=w6LPBMINqS-T_X8vzdYkLS2Wzuve28Ydp_GafTCngrc,236
281
281
  atomicshop/wrappers/psutilw/disks.py,sha256=3ZSVoommKH1TWo37j_83frB-NqXF4Nf5q5mBCX8G4jE,9221
@@ -320,8 +320,8 @@ atomicshop/wrappers/socketw/statistics_csv.py,sha256=fgMzDXI0cybwUEqAxprRmY3lqbh
320
320
  atomicshop/wrappers/winregw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
321
321
  atomicshop/wrappers/winregw/winreg_installed_software.py,sha256=Qzmyktvob1qp6Tjk2DjLfAqr_yXV0sgWzdMW_9kwNjY,2345
322
322
  atomicshop/wrappers/winregw/winreg_network.py,sha256=AENV88H1qDidrcpyM9OwEZxX5svfi-Jb4N6FkS1xtqA,8851
323
- atomicshop-2.18.18.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
324
- atomicshop-2.18.18.dist-info/METADATA,sha256=8vRMxwI466-ZxRsHeJJLik4TemN7DUHdwI1gf2JmBd0,10577
325
- atomicshop-2.18.18.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
326
- atomicshop-2.18.18.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
327
- atomicshop-2.18.18.dist-info/RECORD,,
323
+ atomicshop-2.18.19.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
324
+ atomicshop-2.18.19.dist-info/METADATA,sha256=laKTJjYAM6iOz-IeezdJ7sYHK8LFfJX1Zuc3Ru-Ktkg,10577
325
+ atomicshop-2.18.19.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
326
+ atomicshop-2.18.19.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
327
+ atomicshop-2.18.19.dist-info/RECORD,,