abstract-webtools 0.1.6.16__tar.gz → 0.1.6.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstract_webtools-0.1.6.16/src/abstract_webtools.egg-info → abstract_webtools-0.1.6.18}/PKG-INFO +1 -1
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/setup.py +1 -1
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/crawlManager.py +3 -2
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18/src/abstract_webtools.egg-info}/PKG-INFO +1 -1
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/LICENSE +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/README.md +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/pyproject.toml +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/setup.cfg +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/__init__.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/abstract_webtools.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/big_user_agent_list.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/main.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/__init__.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/cipherManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/domainManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/linkManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/mySocketClient.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/networkManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/requestManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/seleniumManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/soupManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/sslManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/urlManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/userAgentManager.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/videoDownloader.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/soup_gui.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/url_grabber.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/url_grabber_new.py +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools.egg-info/SOURCES.txt +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools.egg-info/requires.txt +0 -0
- {abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools.egg-info/top_level.txt +0 -0
{abstract_webtools-0.1.6.16/src/abstract_webtools.egg-info → abstract_webtools-0.1.6.18}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.18
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
4
4
|
long_description = fh.read()
|
5
5
|
setuptools.setup(
|
6
6
|
name='abstract_webtools',
|
7
|
-
version='0.1.6.
|
7
|
+
version='0.1.6.18',
|
8
8
|
author='putkoff',
|
9
9
|
author_email='partners@abstractendeavors.com',
|
10
10
|
description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',
|
@@ -52,7 +52,7 @@ class crawlManager:
|
|
52
52
|
"""Finds all internal links on the website that belong to the same domain."""
|
53
53
|
all_urls = [self.url_mgr.url]
|
54
54
|
domain = self.url_mgr.domain
|
55
|
-
all_attribs =
|
55
|
+
all_attribs = self.extract_links_from_url(self.url_mgr.url)
|
56
56
|
|
57
57
|
for href in all_attribs.get('href', []):
|
58
58
|
if not href or not self.url_mgr.is_valid_url(href):
|
@@ -118,8 +118,9 @@ class crawlManager:
|
|
118
118
|
|
119
119
|
return meta_info
|
120
120
|
|
121
|
-
def generate_sitemap(self):
|
121
|
+
def generate_sitemap(self,url=None):
|
122
122
|
"""Generates a sitemap.xml file with URLs, images, change frequency, and priority."""
|
123
|
+
url = url or self.url
|
123
124
|
urls = self.get_all_website_links()
|
124
125
|
with open('sitemap.xml', 'w', encoding='utf-8') as f:
|
125
126
|
f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
|
{abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18/src/abstract_webtools.egg-info}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.18
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/abstract_webtools.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/managers/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/url_grabber.py
RENAMED
File without changes
|
{abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools/url_grabber_new.py
RENAMED
File without changes
|
{abstract_webtools-0.1.6.16 → abstract_webtools-0.1.6.18}/src/abstract_webtools.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|