abstract-webtools 0.1.6.60__py3-none-any.whl → 0.1.6.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,12 +53,14 @@ def save_page(url, content,output_dir):
53
53
  with open(page_full_path, 'w', encoding='utf-8') as f:
54
54
  f.write(content)
55
55
  print(f"Saved page: {page_full_path}")
56
- def save_asset(asset_url, base_url,output_dir,downloaded_assets,session):
56
+ def save_asset(asset_url, base_url,output_dir,downloaded_assets=None,session=None):
57
57
  """
58
58
  Download and save assets like images, CSS, JS files.
59
59
  """
60
+ session=requests.Session()
61
+ downloaded_assets = downloaded_assets or set()
60
62
  asset_url = normalize_url(asset_url, base_url)
61
- if asset_url in downloaded_assets:
63
+ if asset_url in list(downloaded_assets):
62
64
  return
63
65
  downloaded_assets.add(asset_url)
64
66
 
@@ -102,18 +104,18 @@ class usurpManager():
102
104
  "Access-Control-Allow-Origin": "*"})
103
105
 
104
106
  def process_page(self,url, depth, base_domain):
105
- """
106
- Process a single page: download assets, save HTML, and crawl links.
107
- """
108
- print(url)
109
- if url in self.visited_pages or depth > self.MAX_DEPTH:
110
- return
111
- self.visited_pages.add(url)
112
-
113
- try:
107
+ """
108
+ Process a single page: download assets, save HTML, and crawl links.
109
+ """
110
+ print(url)
111
+ if url in self.visited_pages or depth > self.MAX_DEPTH:
112
+ return
113
+ self.visited_pages.add(url)
114
+
115
+
114
116
  # Fetch the page content
115
117
  response = self.session.get(url)
116
- response.raise_for_status()
118
+ #response.raise_for_status()
117
119
  content = response.text
118
120
 
119
121
  # Use your get_soup_mgr function to get the soup and attributes
@@ -150,8 +152,6 @@ class usurpManager():
150
152
  time.sleep(self.WAIT_BETWEEN_REQUESTS)
151
153
  self.process_page(normalized_link, depth + 1, base_domain)
152
154
 
153
- except Exception as e:
154
- print(f"Failed to process page {url}: {e}")
155
155
 
156
156
  def main(self):
157
157
  # Ensure output directory exists
@@ -163,7 +163,7 @@ class usurpManager():
163
163
  self.process_page(self.BASE_URL, 0, base_domain)
164
164
  print("Website copying completed.")
165
165
  def test_download(url=None,directory=None):
166
- url=url or 'https://algassert.com/quantum/2016/01/07/Delayed-Choice-Quantum-Erasure.html'
166
+ url=url or 'https://www.youtube.com/watch?v=jRGrNDV2mKc&list=RDMMjRGrNDV2mKc&start_radio=1'
167
167
  output_dir= directory or os.path.join(os.getcwd(),'testit')
168
168
  os.makedirs(output_dir,exist_ok=True)
169
169
  site_mgr = usurpManager(url,output_dir)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.60
3
+ Version: 0.1.6.62
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -1,6 +1,6 @@
1
1
  abstract_webtools/__init__.py,sha256=zNMp-9f0Q6BXWxR-tgHrEqKP8GeXw9z7VYzbqIeEydo,132
2
2
  abstract_webtools/abstract_userpit.py,sha256=Rg_0Orx79rxqEePt6Sf-evGslPq5KLlTiL-P2w1u6ng,6462
3
- abstract_webtools/abstract_usurpit.py,sha256=AAqelbqntBsRZUxPJ0XiGO4xqsmR-y-LbQwn8sDevPo,7131
3
+ abstract_webtools/abstract_usurpit.py,sha256=7PDUb5LNETjvU1rhfJaToKLIKmSXRkcJAmM4wOX7PsQ,7170
4
4
  abstract_webtools/abstract_webtools.py,sha256=3NzGmJlZvrdVtEcUi2K5iUgWr1822IBPhIN9us2e2t0,3859
5
5
  abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
6
6
  abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
@@ -37,7 +37,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
37
37
  abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
38
38
  abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
39
39
  abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
40
- abstract_webtools-0.1.6.60.dist-info/METADATA,sha256=eOy8Kvm6ipb8jVKRoEyTLtaZrrHSxwYb1tHQP7_PJb0,16029
41
- abstract_webtools-0.1.6.60.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
42
- abstract_webtools-0.1.6.60.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
43
- abstract_webtools-0.1.6.60.dist-info/RECORD,,
40
+ abstract_webtools-0.1.6.62.dist-info/METADATA,sha256=4cXNFpklULNRdmmGoI0kw6KQm9IcokPKehfHwScitCc,16029
41
+ abstract_webtools-0.1.6.62.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
42
+ abstract_webtools-0.1.6.62.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
43
+ abstract_webtools-0.1.6.62.dist-info/RECORD,,