abstract-webtools 0.1.5.94__tar.gz → 0.1.5.95__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstract_webtools-0.1.5.94/src/abstract_webtools.egg-info → abstract_webtools-0.1.5.95}/PKG-INFO +1 -1
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/setup.py +1 -1
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/crawlManager.py +3 -3
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95/src/abstract_webtools.egg-info}/PKG-INFO +1 -1
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/LICENSE +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/README.md +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/pyproject.toml +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/setup.cfg +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/__init__.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/abstract_webtools.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/big_user_agent_list.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/main.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/__init__.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/cipherManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/domainManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/linkManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/mySocketClient.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/networkManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/requestManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/seleniumManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/soupManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/sslManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/urlManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/userAgentManager.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/videoDownloader.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/soup_gui.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/url_grabber.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/url_grabber_new.py +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools.egg-info/SOURCES.txt +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools.egg-info/requires.txt +0 -0
- {abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools.egg-info/top_level.txt +0 -0
{abstract_webtools-0.1.5.94/src/abstract_webtools.egg-info → abstract_webtools-0.1.5.95}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.5.
|
3
|
+
Version: 0.1.5.95
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
4
4
|
long_description = fh.read()
|
5
5
|
setuptools.setup(
|
6
6
|
name='abstract_webtools',
|
7
|
-
version='0.1.5.
|
7
|
+
version='0.1.5.95',
|
8
8
|
author='putkoff',
|
9
9
|
author_email='partners@abstractendeavors.com',
|
10
10
|
description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',
|
@@ -56,7 +56,7 @@ class CrawlManager():
|
|
56
56
|
"""
|
57
57
|
all_urls=[self.url_mgr.url]
|
58
58
|
domain = self.url_mgr.domain
|
59
|
-
all_attribs =
|
59
|
+
all_attribs = get_all_attribute_values(url_mgr=self.url_mgr.url)
|
60
60
|
for href in all_attribs.get('href',[]):
|
61
61
|
if href == "" or href is None:
|
62
62
|
# href empty tag
|
@@ -146,7 +146,7 @@ class CrawlManager():
|
|
146
146
|
# Fetch the title if available
|
147
147
|
meta_tags = soup_mgr.find_all("meta")
|
148
148
|
url = eatAll(str(url),['',' ','\n','\t','\\','/'])
|
149
|
-
attribs =
|
149
|
+
attribs = get_all_attribute_values(url)
|
150
150
|
soup = get_soup(url)
|
151
151
|
|
152
152
|
for meta_tag in meta_tags:
|
@@ -177,7 +177,7 @@ class CrawlManager():
|
|
177
177
|
string += f' <url>\n <loc>{url}</loc>\n'
|
178
178
|
preprocess=[]
|
179
179
|
self.get_new_source_and_url(url=url)
|
180
|
-
links =
|
180
|
+
links = get_all_attribute_values(url)
|
181
181
|
images = [link for link in links if link.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp'))]
|
182
182
|
|
183
183
|
for img in images:
|
{abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95/src/abstract_webtools.egg-info}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.5.
|
3
|
+
Version: 0.1.5.95
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/abstract_webtools.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/managers/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/url_grabber.py
RENAMED
File without changes
|
{abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools/url_grabber_new.py
RENAMED
File without changes
|
{abstract_webtools-0.1.5.94 → abstract_webtools-0.1.5.95}/src/abstract_webtools.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|