abstract-webtools 0.1.6.59__tar.gz → 0.1.6.61__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/PKG-INFO +1 -1
  2. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/setup.py +1 -1
  3. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/abstract_usurpit.py +17 -17
  4. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools.egg-info/PKG-INFO +1 -1
  5. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/README.md +0 -0
  6. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/pyproject.toml +0 -0
  7. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/setup.cfg +0 -0
  8. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/__init__.py +0 -0
  9. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/abstract_webtools.py +0 -0
  10. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/big_user_agent_list.py +0 -0
  11. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/main.py +0 -0
  12. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/__init__.py +0 -0
  13. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/cipherManager.py +0 -0
  14. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/crawlManager.py +0 -0
  15. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/crawlmgr2.py +0 -0
  16. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/curlMgr.py +0 -0
  17. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/domainManager.py +0 -0
  18. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
  19. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/get_test.py +0 -0
  20. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/linkManager/__init__.py +0 -0
  21. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/linkManager/linkManager.py +0 -0
  22. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/mySocketClient.py +0 -0
  23. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/networkManager.py +0 -0
  24. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/requestManager/__init__.py +0 -0
  25. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/requestManager/requestManager.py +0 -0
  26. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/seleniumManager.py +0 -0
  27. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/soupManager/__init__.py +0 -0
  28. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/soupManager/asoueces.py +0 -0
  29. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/soupManager/soupManager.py +0 -0
  30. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/sslManager.py +0 -0
  31. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
  32. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/urlManager/__init__.py +0 -0
  33. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/urlManager/urlManager.py +0 -0
  34. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/userAgentManager.py +0 -0
  35. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/videoDownloader.py +0 -0
  36. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/managers/videoDownloader2.py +0 -0
  37. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/soup_gui.py +0 -0
  38. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/url_grabber.py +0 -0
  39. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools/url_grabber_new.py +0 -0
  40. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools.egg-info/SOURCES.txt +0 -0
  41. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
  42. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools.egg-info/requires.txt +0 -0
  43. {abstract_webtools-0.1.6.59 → abstract_webtools-0.1.6.61}/src/abstract_webtools.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.59
3
+ Version: 0.1.6.61
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
4
4
  long_description = fh.read()
5
5
  setuptools.setup(
6
6
  name='abstract_webtools',
7
- version='0.1.6.59',
7
+ version='0.1.6.61',
8
8
  author='putkoff',
9
9
  author_email='partners@abstractendeavors.com',
10
10
  description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',
@@ -5,7 +5,7 @@ import os
5
5
  import shutil
6
6
  import time
7
7
  from abstract_webtools import *
8
-
8
+ from abstract_utilities import *
9
9
 
10
10
  # Import your custom classes/functions
11
11
  # from your_module import linkManager, get_soup_mgr
@@ -53,12 +53,14 @@ def save_page(url, content,output_dir):
53
53
  with open(page_full_path, 'w', encoding='utf-8') as f:
54
54
  f.write(content)
55
55
  print(f"Saved page: {page_full_path}")
56
- def save_asset(asset_url, base_url,output_dir,downloaded_assets,session):
56
+ def save_asset(asset_url, base_url,output_dir,downloaded_assets=None,session=None):
57
57
  """
58
58
  Download and save assets like images, CSS, JS files.
59
59
  """
60
+ session=requests.Session()
61
+ downloaded_assets = downloaded_assets or set()
60
62
  asset_url = normalize_url(asset_url, base_url)
61
- if asset_url in downloaded_assets:
63
+ if asset_url in list(downloaded_assets):
62
64
  return
63
65
  downloaded_assets.add(asset_url)
64
66
 
@@ -102,18 +104,18 @@ class usurpManager():
102
104
  "Access-Control-Allow-Origin": "*"})
103
105
 
104
106
  def process_page(self,url, depth, base_domain):
105
- """
106
- Process a single page: download assets, save HTML, and crawl links.
107
- """
108
- print(url)
109
- if url in self.visited_pages or depth > self.MAX_DEPTH:
110
- return
111
- self.visited_pages.add(url)
112
-
113
- try:
107
+ """
108
+ Process a single page: download assets, save HTML, and crawl links.
109
+ """
110
+ print(url)
111
+ if url in self.visited_pages or depth > self.MAX_DEPTH:
112
+ return
113
+ self.visited_pages.add(url)
114
+
115
+
114
116
  # Fetch the page content
115
117
  response = self.session.get(url)
116
- response.raise_for_status()
118
+ #response.raise_for_status()
117
119
  content = response.text
118
120
 
119
121
  # Use your get_soup_mgr function to get the soup and attributes
@@ -144,14 +146,12 @@ class usurpManager():
144
146
  all_domains = link_mgr.find_all_domain()
145
147
 
146
148
  # Process each domain link
147
- for link_url in all_domains:
149
+ for link_url in make_list(all_domains):
148
150
  normalized_link = normalize_url(link_url, url)
149
151
  if is_valid_url(normalized_link, base_domain):
150
152
  time.sleep(self.WAIT_BETWEEN_REQUESTS)
151
153
  self.process_page(normalized_link, depth + 1, base_domain)
152
154
 
153
- except Exception as e:
154
- print(f"Failed to process page {url}: {e}")
155
155
 
156
156
  def main(self):
157
157
  # Ensure output directory exists
@@ -163,7 +163,7 @@ class usurpManager():
163
163
  self.process_page(self.BASE_URL, 0, base_domain)
164
164
  print("Website copying completed.")
165
165
  def test_download(url=None,directory=None):
166
- url=url or 'https://algassert.com/quantum/2016/01/07/Delayed-Choice-Quantum-Erasure.html'
166
+ url=url or 'https://www.youtube.com/watch?v=jRGrNDV2mKc&list=RDMMjRGrNDV2mKc&start_radio=1'
167
167
  output_dir= directory or os.path.join(os.getcwd(),'testit')
168
168
  os.makedirs(output_dir,exist_ok=True)
169
169
  site_mgr = usurpManager(url,output_dir)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.59
3
+ Version: 0.1.6.61
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff