PyPI - abstract-webtools - Versions diffs - 0.1.6.59__tar.gz → 0.1.6.61__tar.gz - Mend

abstract-webtools 0.1.6.59tar.gz → 0.1.6.61tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

{abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: abstract_webtools
-Version: 0.1.6.59
+Version: 0.1.6.61
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff

{abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 setuptools.setup(
     name='abstract_webtools',
-    version='0.1.6.59',
+    version='0.1.6.61',
     author='putkoff',
     author_email='partners@abstractendeavors.com',
     description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',

{abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/abstract_usurpit.py RENAMED Viewed

@@ -5,7 +5,7 @@ import os
 import shutil
 import time
 from abstract_webtools import *
+from abstract_utilities import *
 # Import your custom classes/functions
 # from your_module import linkManager, get_soup_mgr
@@ -53,12 +53,14 @@ def save_page(url, content,output_dir):
     with open(page_full_path, 'w', encoding='utf-8') as f:
         f.write(content)
     print(f"Saved page: {page_full_path}")
-def save_asset(asset_url, base_url,output_dir,downloaded_assets,session):
+def save_asset(asset_url, base_url,output_dir,downloaded_assets=None,session=None):
     """
     Download and save assets like images, CSS, JS files.
     """
+    session=requests.Session()
+    downloaded_assets = downloaded_assets or set()
     asset_url = normalize_url(asset_url, base_url)
-    if asset_url in downloaded_assets:
+    if asset_url in list(downloaded_assets):
         return
     downloaded_assets.add(asset_url)
@@ -102,18 +104,18 @@ class usurpManager():
             "Access-Control-Allow-Origin": "*"})
     def process_page(self,url, depth, base_domain):
-        """
-        Process a single page: download assets, save HTML, and crawl links.
-        """
-        print(url)
-        if url in self.visited_pages or depth > self.MAX_DEPTH:
-            return
-        self.visited_pages.add(url)
-        try:
+            """
+            Process a single page: download assets, save HTML, and crawl links.
+            """
+            print(url)
+            if url in self.visited_pages or depth > self.MAX_DEPTH:
+                return
+            self.visited_pages.add(url)
             # Fetch the page content
             response = self.session.get(url)
-            response.raise_for_status()
+            #response.raise_for_status()
             content = response.text
             # Use your get_soup_mgr function to get the soup and attributes
@@ -144,14 +146,12 @@ class usurpManager():
             all_domains = link_mgr.find_all_domain()
             # Process each domain link
-            for link_url in all_domains:
+            for link_url in make_list(all_domains):
                 normalized_link = normalize_url(link_url, url)
                 if is_valid_url(normalized_link, base_domain):
                     time.sleep(self.WAIT_BETWEEN_REQUESTS)
                     self.process_page(normalized_link, depth + 1, base_domain)
-        except Exception as e:
-            print(f"Failed to process page {url}: {e}")
     def main(self):
         # Ensure output directory exists
@@ -163,7 +163,7 @@ class usurpManager():
         self.process_page(self.BASE_URL, 0, base_domain)
         print("Website copying completed.")
 def test_download(url=None,directory=None):
-    url=url or 'https://algassert.com/quantum/2016/01/07/Delayed-Choice-Quantum-Erasure.html'
+    url=url or 'https://www.youtube.com/watch?v=jRGrNDV2mKc&list=RDMMjRGrNDV2mKc&start_radio=1'
     output_dir= directory or os.path.join(os.getcwd(),'testit')
     os.makedirs(output_dir,exist_ok=True)
     site_mgr = usurpManager(url,output_dir)

{abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: abstract_webtools
-Version: 0.1.6.59
+Version: 0.1.6.61
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff