abstract-webtools 0.1.6.16__py3-none-any.whl → 0.1.6.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,7 +52,7 @@ class crawlManager:
52
52
  """Finds all internal links on the website that belong to the same domain."""
53
53
  all_urls = [self.url_mgr.url]
54
54
  domain = self.url_mgr.domain
55
- all_attribs = get_all_attribute_values(self.url_mgr.url)
55
+ all_attribs = self.extract_links_from_url(self.url_mgr.url)
56
56
 
57
57
  for href in all_attribs.get('href', []):
58
58
  if not href or not self.url_mgr.is_valid_url(href):
@@ -118,8 +118,9 @@ class crawlManager:
118
118
 
119
119
  return meta_info
120
120
 
121
- def generate_sitemap(self):
121
+ def generate_sitemap(self,url=None):
122
122
  """Generates a sitemap.xml file with URLs, images, change frequency, and priority."""
123
+ url = url or self.url
123
124
  urls = self.get_all_website_links()
124
125
  with open('sitemap.xml', 'w', encoding='utf-8') as f:
125
126
  f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.16
3
+ Version: 0.1.6.18
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -7,7 +7,7 @@ abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE7
7
7
  abstract_webtools/url_grabber_new.py,sha256=Oh2Kc0gBScCo0xpopNsg8JE5lIbPuzZVKM5f5GoZmw0,3454
8
8
  abstract_webtools/managers/__init__.py,sha256=5aIpbdUsDWTrhPUAjfIKnG54OULqOKan9LBL5EIUllo,407
9
9
  abstract_webtools/managers/cipherManager.py,sha256=NHQGdR11eNSm-1H-GezD5dyQgsPTJwY5kczt8Sher2s,1621
10
- abstract_webtools/managers/crawlManager.py,sha256=VN57a7fOjAHGbRrqcz4KVfDxFdHgWUMTR88_JcMLBhU,7869
10
+ abstract_webtools/managers/crawlManager.py,sha256=fp8Jne0GUUi-pw2Pwd1obUGZHfft8dnqPHtrjPXpobM,7911
11
11
  abstract_webtools/managers/domainManager.py,sha256=95znOBv05W77mW_fbZAfl4RmlENDlYqhEOMkL02L220,3610
12
12
  abstract_webtools/managers/dynamicRateLimiter.py,sha256=gopQcQo50JG2D0KcyepNCIQ_1uDQEBIHBzWf4R2Wgy0,7617
13
13
  abstract_webtools/managers/linkManager.py,sha256=m6y9s8jknrTX8RtOAFKeHd4yd23G7Rgf0T7Sp7wmHUw,12180
@@ -21,8 +21,8 @@ abstract_webtools/managers/tlsAdapter.py,sha256=XZSMZz9EUOhv-h3_Waf6mjV1dA3oN_M_
21
21
  abstract_webtools/managers/urlManager.py,sha256=Dvf-TiSo5j_YjZS2Eq6lFfbhveneD6NA_wEE0xUXy_E,8858
22
22
  abstract_webtools/managers/userAgentManager.py,sha256=33SB2p2FG7EYZl7l2iYm1U4gI9PcdkGTZHw5lg_Ogrw,1653
23
23
  abstract_webtools/managers/videoDownloader.py,sha256=6G_aLc05BTMUYUWc7iqYtHF_BaR7DnCNK_NJ-QnjsYY,10531
24
- abstract_webtools-0.1.6.16.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
25
- abstract_webtools-0.1.6.16.dist-info/METADATA,sha256=E3Z9_JiEo0cYQ4uXnuvpeVtiTXFasutqkWFu17BdWmI,15858
26
- abstract_webtools-0.1.6.16.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
27
- abstract_webtools-0.1.6.16.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
28
- abstract_webtools-0.1.6.16.dist-info/RECORD,,
24
+ abstract_webtools-0.1.6.18.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
25
+ abstract_webtools-0.1.6.18.dist-info/METADATA,sha256=6zwrJjOURm7sAjpxCvRgRFlw7iAISxIVQWv5wbl_xyc,15858
26
+ abstract_webtools-0.1.6.18.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
27
+ abstract_webtools-0.1.6.18.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
28
+ abstract_webtools-0.1.6.18.dist-info/RECORD,,