PyPI - abstract-webtools - Versions diffs - 0.1.6.1__tar.gz → 0.1.6.3__tar.gz - Mend

abstract-webtools 0.1.6.1tar.gz → 0.1.6.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{abstract_webtools-0.1.6.1/src/abstract_webtools.egg-info → abstract_webtools-0.1.6.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: abstract_webtools
-Version: 0.1.6.1
+Version: 0.1.6.3
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff

{abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/setup.py RENAMED Viewed

@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 setuptools.setup(
     name='abstract_webtools',
-    version='0.1.6.01',
+    version='0.1.6.03',
     author='putkoff',
     author_email='partners@abstractendeavors.com',
     description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',

{abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/crawlManager.py RENAMED Viewed

@@ -73,7 +73,7 @@ class crawlManager():
         """
         all_urls=[self.url_mgr.url]
         domain = self.url_mgr.domain
-        all_attribs = get_attribs(self.url_mgr.url)
+        all_attribs = get_all_attribute_values(self.url_mgr.url)
         for href in all_attribs.get('href',[]):
             if href == "" or href is None:
                 # href empty tag
@@ -163,7 +163,7 @@ class crawlManager():
         # Fetch the title if available
         meta_tags = soup_mgr.find_all("meta")
         url = eatAll(str(url),['',' ','\n','\t','\\','/'])
-        attribs = get_attribs(url)
+        attribs = get_all_attribute_values(url)
         soup = get_soup(url)
         for meta_tag in meta_tags:
@@ -194,7 +194,7 @@ class crawlManager():
                 string += f'  <url>\n    <loc>{url}</loc>\n'
                 preprocess=[]
                 self.get_new_source_and_url(url=url)
-                links = get_attribs(url)
+                links = get_all_attribute_values(url)
                 images = [link for link in links if link.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp'))]
                 for img in images:
@@ -240,22 +240,22 @@ class crawlManager():
             for ext_link in links['external_links']:
                 print(f"\t{ext_link}")
-class CrawlManagerSingleton():
+class crawlManagerSingleton():
     _instance = None
     @staticmethod
     def get_instance(url=None,source_code=None,parse_type="html.parser"):
-        if CrawlManagerSingleton._instance is None:
-            CrawlManagerSingleton._instance = CrawlManager(url=url,parse_type=parse_type,source_code=source_code)
-        elif parse_type != CrawlManagerSingleton._instance.parse_type or url != CrawlManagerSingleton._instance.url  or source_code != CrawlManagerSingleton._instance.source_code:
-            CrawlManagerSingleton._instance = CrawlManager(url=url,parse_type=parse_type,source_code=source_code)
-        return CrawlManagerSingleton._instance
+        if crawlManagerSingleton._instance is None:
+            crawlManagerSingleton._instance = CrawlManager(url=url,parse_type=parse_type,source_code=source_code)
+        elif parse_type != crawlManagerSingleton._instance.parse_type or url != crawlManagerSingleton._instance.url  or source_code != crawlManagerSingleton._instance.source_code:
+            crawlManagerSingleton._instance = CrawlManager(url=url,parse_type=parse_type,source_code=source_code)
+        return crawlManagerSingleton._instance
 def get_crawl_mgr(url=None,req_mgr=None,url_mgr=None,source_code=None,parse_type="html.parser"):
     url = get_url(url=url,url_mgr=url_mgr)
     url_mgr = get_url(url=url,url_mgr=url_mgr)
     req_mgr=get_req_mgr(url=url,url_mgr=url_mgr,source_code=source_code)
     source_code = get_source(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr)
     soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,parse_type=parse_type)
-    crawl_mgr = CrawlManager(url=url,req_mgr=req_mgr,url_mgr=url_mgr,source_code=source_code,parse_type=parse_type)
+    crawl_mgr = crawlManager(url=url,req_mgr=req_mgr,url_mgr=url_mgr,source_code=source_code,parse_type=parse_type)
     return crawl_mgr
 def get_domain_crawl(url=None,req_mgr=None,url_mgr=None,source_code=None,parse_type="html.parser",max_depth=3, depth=1):
     crawl_mgr = get_crawl_mgr(url=url,req_mgr=req_mgr,url_mgr=url_mgr,source_code=source_code,parse_type=parse_type)

{abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3/src/abstract_webtools.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: abstract_webtools
-Version: 0.1.6.1
+Version: 0.1.6.3
 Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
 Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
 Author: putkoff