PyPI - abstract-webtools - Versions diffs - 0.1.6.60__py3-none-any.whl → 0.1.6.61__py3-none-any.whl - Mend

abstract-webtools 0.1.6.60py3-none-any.whl → 0.1.6.61py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

abstract_webtools/abstract_usurpit.py CHANGED Viewed

@@ -53,12 +53,14 @@ def save_page(url, content,output_dir):
     with open(page_full_path, 'w', encoding='utf-8') as f:
         f.write(content)
     print(f"Saved page: {page_full_path}")
-def save_asset(asset_url, base_url,output_dir,downloaded_assets,session):
+def save_asset(asset_url, base_url,output_dir,downloaded_assets=None,session=None):
     """
     Download and save assets like images, CSS, JS files.
     """
+    session=requests.Session()
+    downloaded_assets = downloaded_assets or set()
     asset_url = normalize_url(asset_url, base_url)
-    if asset_url in downloaded_assets:
+    if asset_url in list(downloaded_assets):
         return
     downloaded_assets.add(asset_url)
@@ -102,18 +104,18 @@ class usurpManager():
             "Access-Control-Allow-Origin": "*"})
     def process_page(self,url, depth, base_domain):
-        """
-        Process a single page: download assets, save HTML, and crawl links.
-        """
-        print(url)
-        if url in self.visited_pages or depth > self.MAX_DEPTH:
-            return
-        self.visited_pages.add(url)
-        try:
+            """
+            Process a single page: download assets, save HTML, and crawl links.
+            """
+            print(url)
+            if url in self.visited_pages or depth > self.MAX_DEPTH:
+                return
+            self.visited_pages.add(url)
             # Fetch the page content
             response = self.session.get(url)
-            response.raise_for_status()
+            #response.raise_for_status()
             content = response.text
             # Use your get_soup_mgr function to get the soup and attributes
@@ -150,8 +152,6 @@ class usurpManager():
                     time.sleep(self.WAIT_BETWEEN_REQUESTS)
                     self.process_page(normalized_link, depth + 1, base_domain)
-        except Exception as e:
-            print(f"Failed to process page {url}: {e}")
     def main(self):
         # Ensure output directory exists
@@ -163,7 +163,7 @@ class usurpManager():
         self.process_page(self.BASE_URL, 0, base_domain)
         print("Website copying completed.")
 def test_download(url=None,directory=None):
-    url=url or 'https://algassert.com/quantum/2016/01/07/Delayed-Choice-Quantum-Erasure.html'
+    url=url or 'https://www.youtube.com/watch?v=jRGrNDV2mKc&list=RDMMjRGrNDV2mKc&start_radio=1'
     output_dir= directory or os.path.join(os.getcwd(),'testit')
     os.makedirs(output_dir,exist_ok=True)
     site_mgr = usurpManager(url,output_dir)

{abstract_webtools-0.1.6.60.dist-info → abstract_webtools-0.1.6.61.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: abstract_webtools
-Version: 0.1.6.60
+Version: 0.1.6.61
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff

{abstract_webtools-0.1.6.60.dist-info → abstract_webtools-0.1.6.61.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 abstract_webtools/__init__.py,sha256=zNMp-9f0Q6BXWxR-tgHrEqKP8GeXw9z7VYzbqIeEydo,132
 abstract_webtools/abstract_userpit.py,sha256=Rg_0Orx79rxqEePt6Sf-evGslPq5KLlTiL-P2w1u6ng,6462
-abstract_webtools/abstract_usurpit.py,sha256=AAqelbqntBsRZUxPJ0XiGO4xqsmR-y-LbQwn8sDevPo,7131
+abstract_webtools/abstract_usurpit.py,sha256=7PDUb5LNETjvU1rhfJaToKLIKmSXRkcJAmM4wOX7PsQ,7170
 abstract_webtools/abstract_webtools.py,sha256=3NzGmJlZvrdVtEcUi2K5iUgWr1822IBPhIN9us2e2t0,3859
 abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
 abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
@@ -37,7 +37,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
 abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
 abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
 abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
-abstract_webtools-0.1.6.60.dist-info/METADATA,sha256=eOy8Kvm6ipb8jVKRoEyTLtaZrrHSxwYb1tHQP7_PJb0,16029
-abstract_webtools-0.1.6.60.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
-abstract_webtools-0.1.6.60.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
-abstract_webtools-0.1.6.60.dist-info/RECORD,,
+abstract_webtools-0.1.6.61.dist-info/METADATA,sha256=QENrp5W8V0PoOXOrU0YEi093XXyaoQRmBMJMXmVko2k,16029
+abstract_webtools-0.1.6.61.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
+abstract_webtools-0.1.6.61.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
+abstract_webtools-0.1.6.61.dist-info/RECORD,,

{abstract_webtools-0.1.6.60.dist-info → abstract_webtools-0.1.6.61.dist-info}/WHEEL RENAMED Viewed

File without changes

{abstract_webtools-0.1.6.60.dist-info → abstract_webtools-0.1.6.61.dist-info}/top_level.txt RENAMED Viewed

File without changes

abstract-webtools 0.1.6.60__py3-none-any.whl → 0.1.6.61__py3-none-any.whl

abstract-webtools 0.1.6.60py3-none-any.whl → 0.1.6.61py3-none-any.whl