PyPI - abstract-webtools - Versions diffs - 0.1.6.15__py3-none-any.whl → 0.1.6.17__py3-none-any.whl - Mend

abstract-webtools 0.1.6.15py3-none-any.whl → 0.1.6.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

abstract_webtools/managers/crawlManager.py CHANGED Viewed

@@ -15,9 +15,8 @@ class crawlManager:
     def get_new_source_and_url(self, url=None):
         """Fetches new source code and response for a given URL."""
-        if url is None:
-            url = self.url
-        self.req_mgr.set_url(url)
+        url = url
+        self.req_mgr = get_req_mgr(url=url)
         self.source_code = self.req_mgr.source_code
         self.response = self.req_mgr.response
@@ -119,8 +118,9 @@ class crawlManager:
         return meta_info
-    def generate_sitemap(self):
+    def generate_sitemap(self,url=None):
         """Generates a sitemap.xml file with URLs, images, change frequency, and priority."""
+        url = url or self.url
         urls = self.get_all_website_links()
         with open('sitemap.xml', 'w', encoding='utf-8') as f:
             f.write('<?xml version="1.0" encoding="UTF-8"?>\n')

abstract_webtools/managers/urlManager.py CHANGED Viewed

@@ -41,8 +41,9 @@ class urlManager:
             url (str or None): The URL to manage (default is None).
             session (requests.Session): A custom requests session (default is the requests module's session).
         """
-        self._url=url or 'www.example.com'
-        self.url = url or 'www.example.com'
+        url = url or 'www.example.com'
+        self._url=url
+        self.url = url
         self.session= session or requests
         self.clean_urls = self.clean_url(url=url)
         self.url = self.get_correct_url(clean_urls=self.clean_urls)
@@ -68,8 +69,7 @@ class urlManager:
         Given a URL, return a list with potential URL versions including with and without 'www.',
         and with 'http://' and 'https://'.
         """
-        if url == None:
-            url=self.url
+        url = url or self.url
         urls=[]
         if url:
             # Remove http:// or https:// prefix
@@ -134,7 +134,8 @@ class urlManager:
         self.url =self.correct_url
         self.protocol,self.domain,self.path,self.query=self.url_to_pieces(url=self.url)
         self.all_urls = []
-    def get_domain(self,url):
+    def get_domain(self,url=None):
+        url = url or self.url
         return urlparse(url).netloc
     def url_join(self,url,path):
         url = eatOuter(url,['/'])
@@ -150,16 +151,17 @@ class urlManager:
     @url.setter
     def url(self, new_url):
         self._url = new_url
-    @staticmethod
-    def is_valid_url(url):
+    def is_valid_url(self,url=None):
         """
         Check if the given URL is valid.
         """
+        url = url or self.url
         parsed = urlparse(url)
         return bool(parsed.netloc) and bool(parsed.scheme)
-    @staticmethod
-    def make_valid(href,url):
+    def make_valid(self,href,url=None):
         def is_valid_url(url):
+            url = url or self.url
             """
             Check if the given URL is valid.
             """
@@ -171,24 +173,27 @@ class urlManager:
         if is_valid_url(new_link):
             return new_link
         return False
-    @staticmethod
-    def get_relative_href(url,href):
+    def get_relative_href(self,url,href):
         # join the URL if it's relative (not an absolute link)
+        url = url or self.url
         href = urljoin(url, href)
         parsed_href = urlparse(href)
         # remove URL GET parameters, URL fragments, etc.
         href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
         return href
-    def url_basename(url):
+    def url_basename(self,url=None):
+        url = url or self.url
         path = urllib.parse.urlparse(url).path
         return path.strip('/').split('/')[-1]
-    def base_url(url):
+    def base_url(self,url=None):
+        url = url or self.url
         return re.match(r'https?://[^?#]+/', url).group()
-    def urljoin(base, path):
+    def urljoin(self,base, path):
         if isinstance(path, bytes):
             path = path.decode()
         if not isinstance(path, str) or not path:

{abstract_webtools-0.1.6.15.dist-info → abstract_webtools-0.1.6.17.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: abstract_webtools
-Version: 0.1.6.15
+Version: 0.1.6.17
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff

{abstract_webtools-0.1.6.15.dist-info → abstract_webtools-0.1.6.17.dist-info}/RECORD RENAMED Viewed

@@ -7,7 +7,7 @@ abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE7
 abstract_webtools/url_grabber_new.py,sha256=Oh2Kc0gBScCo0xpopNsg8JE5lIbPuzZVKM5f5GoZmw0,3454
 abstract_webtools/managers/__init__.py,sha256=5aIpbdUsDWTrhPUAjfIKnG54OULqOKan9LBL5EIUllo,407
 abstract_webtools/managers/cipherManager.py,sha256=NHQGdR11eNSm-1H-GezD5dyQgsPTJwY5kczt8Sher2s,1621
-abstract_webtools/managers/crawlManager.py,sha256=wEjbRzuuIQrjwAAEHfSwXeDCEAh0pLG3JPBEvc4YieU,7892
+abstract_webtools/managers/crawlManager.py,sha256=RVRWiceEKuLSPIWtPYGGz85zRxamwOOgsMLIhJBU14Q,7908
 abstract_webtools/managers/domainManager.py,sha256=95znOBv05W77mW_fbZAfl4RmlENDlYqhEOMkL02L220,3610
 abstract_webtools/managers/dynamicRateLimiter.py,sha256=gopQcQo50JG2D0KcyepNCIQ_1uDQEBIHBzWf4R2Wgy0,7617
 abstract_webtools/managers/linkManager.py,sha256=m6y9s8jknrTX8RtOAFKeHd4yd23G7Rgf0T7Sp7wmHUw,12180
@@ -18,11 +18,11 @@ abstract_webtools/managers/seleniumManager.py,sha256=qSY8gH3N5YJIMwE_Alj9HNQRip_
 abstract_webtools/managers/soupManager.py,sha256=7nDB_QKneGjyTZUzchfbdHNvxxYiTyIn8AHon8ObTSY,17148
 abstract_webtools/managers/sslManager.py,sha256=C-QgQw9CW84uOE5kx2MPjC3RsLbE2JQqdwdTs0H4ecc,1370
 abstract_webtools/managers/tlsAdapter.py,sha256=XZSMZz9EUOhv-h3_Waf6mjV1dA3oN_M_oWuoo4VZ_HE,1454
-abstract_webtools/managers/urlManager.py,sha256=XqMrCM84BeWEfWtHc_8UFpT91ZtG-okzdKdCuC49vsA,8678
+abstract_webtools/managers/urlManager.py,sha256=Dvf-TiSo5j_YjZS2Eq6lFfbhveneD6NA_wEE0xUXy_E,8858
 abstract_webtools/managers/userAgentManager.py,sha256=33SB2p2FG7EYZl7l2iYm1U4gI9PcdkGTZHw5lg_Ogrw,1653
 abstract_webtools/managers/videoDownloader.py,sha256=6G_aLc05BTMUYUWc7iqYtHF_BaR7DnCNK_NJ-QnjsYY,10531
-abstract_webtools-0.1.6.15.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
-abstract_webtools-0.1.6.15.dist-info/METADATA,sha256=EXUqVO3U5vPBoS_86YHIy2CeWWenk1CcbqOt2QO4TYo,15858
-abstract_webtools-0.1.6.15.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-abstract_webtools-0.1.6.15.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
-abstract_webtools-0.1.6.15.dist-info/RECORD,,
+abstract_webtools-0.1.6.17.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
+abstract_webtools-0.1.6.17.dist-info/METADATA,sha256=L9KnzarSqKPuWx_jpndNebg2haRXUAc1XAwzVj09RyI,15858
+abstract_webtools-0.1.6.17.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+abstract_webtools-0.1.6.17.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
+abstract_webtools-0.1.6.17.dist-info/RECORD,,

{abstract_webtools-0.1.6.15.dist-info → abstract_webtools-0.1.6.17.dist-info}/LICENSE RENAMED Viewed

File without changes

{abstract_webtools-0.1.6.15.dist-info → abstract_webtools-0.1.6.17.dist-info}/WHEEL RENAMED Viewed

File without changes

{abstract_webtools-0.1.6.15.dist-info → abstract_webtools-0.1.6.17.dist-info}/top_level.txt RENAMED Viewed

File without changes

abstract-webtools 0.1.6.15__py3-none-any.whl → 0.1.6.17__py3-none-any.whl

abstract-webtools 0.1.6.15py3-none-any.whl → 0.1.6.17py3-none-any.whl