abstract-webtools 0.1.6.1__tar.gz → 0.1.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {abstract_webtools-0.1.6.1/src/abstract_webtools.egg-info → abstract_webtools-0.1.6.3}/PKG-INFO +1 -1
  2. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/setup.py +1 -1
  3. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/crawlManager.py +10 -10
  4. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3/src/abstract_webtools.egg-info}/PKG-INFO +1 -1
  5. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/LICENSE +0 -0
  6. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/README.md +0 -0
  7. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/pyproject.toml +0 -0
  8. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/setup.cfg +0 -0
  9. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/__init__.py +0 -0
  10. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/abstract_webtools.py +0 -0
  11. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/big_user_agent_list.py +0 -0
  12. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/main.py +0 -0
  13. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/__init__.py +0 -0
  14. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/cipherManager.py +0 -0
  15. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/domainManager.py +0 -0
  16. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
  17. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/linkManager.py +0 -0
  18. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/mySocketClient.py +0 -0
  19. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/networkManager.py +0 -0
  20. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/requestManager.py +0 -0
  21. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/seleniumManager.py +0 -0
  22. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/soupManager.py +0 -0
  23. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/sslManager.py +0 -0
  24. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
  25. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/urlManager.py +0 -0
  26. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/userAgentManager.py +0 -0
  27. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/managers/videoDownloader.py +0 -0
  28. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/soup_gui.py +0 -0
  29. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/url_grabber.py +0 -0
  30. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools/url_grabber_new.py +0 -0
  31. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools.egg-info/SOURCES.txt +0 -0
  32. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
  33. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools.egg-info/requires.txt +0 -0
  34. {abstract_webtools-0.1.6.1 → abstract_webtools-0.1.6.3}/src/abstract_webtools.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.1
3
+ Version: 0.1.6.3
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
4
4
  long_description = fh.read()
5
5
  setuptools.setup(
6
6
  name='abstract_webtools',
7
- version='0.1.6.01',
7
+ version='0.1.6.03',
8
8
  author='putkoff',
9
9
  author_email='partners@abstractendeavors.com',
10
10
  description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',
@@ -73,7 +73,7 @@ class crawlManager():
73
73
  """
74
74
  all_urls=[self.url_mgr.url]
75
75
  domain = self.url_mgr.domain
76
- all_attribs = get_attribs(self.url_mgr.url)
76
+ all_attribs = get_all_attribute_values(self.url_mgr.url)
77
77
  for href in all_attribs.get('href',[]):
78
78
  if href == "" or href is None:
79
79
  # href empty tag
@@ -163,7 +163,7 @@ class crawlManager():
163
163
  # Fetch the title if available
164
164
  meta_tags = soup_mgr.find_all("meta")
165
165
  url = eatAll(str(url),['',' ','\n','\t','\\','/'])
166
- attribs = get_attribs(url)
166
+ attribs = get_all_attribute_values(url)
167
167
  soup = get_soup(url)
168
168
 
169
169
  for meta_tag in meta_tags:
@@ -194,7 +194,7 @@ class crawlManager():
194
194
  string += f' <url>\n <loc>{url}</loc>\n'
195
195
  preprocess=[]
196
196
  self.get_new_source_and_url(url=url)
197
- links = get_attribs(url)
197
+ links = get_all_attribute_values(url)
198
198
  images = [link for link in links if link.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp'))]
199
199
 
200
200
  for img in images:
@@ -240,22 +240,22 @@ class crawlManager():
240
240
  for ext_link in links['external_links']:
241
241
  print(f"\t{ext_link}")
242
242
 
243
- class CrawlManagerSingleton():
243
+ class crawlManagerSingleton():
244
244
  _instance = None
245
245
  @staticmethod
246
246
  def get_instance(url=None,source_code=None,parse_type="html.parser"):
247
- if CrawlManagerSingleton._instance is None:
248
- CrawlManagerSingleton._instance = CrawlManager(url=url,parse_type=parse_type,source_code=source_code)
249
- elif parse_type != CrawlManagerSingleton._instance.parse_type or url != CrawlManagerSingleton._instance.url or source_code != CrawlManagerSingleton._instance.source_code:
250
- CrawlManagerSingleton._instance = CrawlManager(url=url,parse_type=parse_type,source_code=source_code)
251
- return CrawlManagerSingleton._instance
247
+ if crawlManagerSingleton._instance is None:
248
+ crawlManagerSingleton._instance = CrawlManager(url=url,parse_type=parse_type,source_code=source_code)
249
+ elif parse_type != crawlManagerSingleton._instance.parse_type or url != crawlManagerSingleton._instance.url or source_code != crawlManagerSingleton._instance.source_code:
250
+ crawlManagerSingleton._instance = CrawlManager(url=url,parse_type=parse_type,source_code=source_code)
251
+ return crawlManagerSingleton._instance
252
252
  def get_crawl_mgr(url=None,req_mgr=None,url_mgr=None,source_code=None,parse_type="html.parser"):
253
253
  url = get_url(url=url,url_mgr=url_mgr)
254
254
  url_mgr = get_url(url=url,url_mgr=url_mgr)
255
255
  req_mgr=get_req_mgr(url=url,url_mgr=url_mgr,source_code=source_code)
256
256
  source_code = get_source(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr)
257
257
  soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,parse_type=parse_type)
258
- crawl_mgr = CrawlManager(url=url,req_mgr=req_mgr,url_mgr=url_mgr,source_code=source_code,parse_type=parse_type)
258
+ crawl_mgr = crawlManager(url=url,req_mgr=req_mgr,url_mgr=url_mgr,source_code=source_code,parse_type=parse_type)
259
259
  return crawl_mgr
260
260
  def get_domain_crawl(url=None,req_mgr=None,url_mgr=None,source_code=None,parse_type="html.parser",max_depth=3, depth=1):
261
261
  crawl_mgr = get_crawl_mgr(url=url,req_mgr=req_mgr,url_mgr=url_mgr,source_code=source_code,parse_type=parse_type)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.1
3
+ Version: 0.1.6.3
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff