abstract-webtools 0.1.6.57__tar.gz → 0.1.6.59__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/PKG-INFO +1 -1
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/setup.py +1 -1
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/abstract_usurpit.py +4 -3
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools.egg-info/PKG-INFO +1 -1
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/README.md +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/pyproject.toml +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/setup.cfg +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/__init__.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/abstract_webtools.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/big_user_agent_list.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/main.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/__init__.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/cipherManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/crawlManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/crawlmgr2.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/curlMgr.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/domainManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/get_test.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/linkManager/__init__.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/linkManager/linkManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/mySocketClient.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/networkManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/requestManager/__init__.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/requestManager/requestManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/seleniumManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/soupManager/__init__.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/soupManager/asoueces.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/soupManager/soupManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/sslManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/urlManager/__init__.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/urlManager/urlManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/userAgentManager.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/videoDownloader.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/videoDownloader2.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/soup_gui.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/url_grabber.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/url_grabber_new.py +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools.egg-info/SOURCES.txt +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools.egg-info/requires.txt +0 -0
- {abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.59
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
4
4
|
long_description = fh.read()
|
5
5
|
setuptools.setup(
|
6
6
|
name='abstract_webtools',
|
7
|
-
version='0.1.6.
|
7
|
+
version='0.1.6.59',
|
8
8
|
author='putkoff',
|
9
9
|
author_email='partners@abstractendeavors.com',
|
10
10
|
description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',
|
{abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/abstract_usurpit.py
RENAMED
@@ -53,7 +53,7 @@ def save_page(url, content,output_dir):
|
|
53
53
|
with open(page_full_path, 'w', encoding='utf-8') as f:
|
54
54
|
f.write(content)
|
55
55
|
print(f"Saved page: {page_full_path}")
|
56
|
-
def save_asset(asset_url, base_url,output_dir,session):
|
56
|
+
def save_asset(asset_url, base_url,output_dir,downloaded_assets,session):
|
57
57
|
"""
|
58
58
|
Download and save assets like images, CSS, JS files.
|
59
59
|
"""
|
@@ -78,6 +78,7 @@ def save_asset(asset_url, base_url,output_dir,session):
|
|
78
78
|
print(f"Saved asset: {asset_full_path}")
|
79
79
|
except Exception as e:
|
80
80
|
print(f"Failed to save asset {asset_url}: {e}")
|
81
|
+
return downloaded_assets
|
81
82
|
class usurpManager():
|
82
83
|
def __init__(self,url,output_dir=None,max_depth=None,wait_between_requests=None,operating_system=None, browser=None, version=None,user_agent=None,website_bot=None):
|
83
84
|
self.url = url
|
@@ -130,7 +131,7 @@ class usurpManager():
|
|
130
131
|
parsed_asset_url = urlparse(full_asset_url)
|
131
132
|
|
132
133
|
if is_valid_url(full_asset_url, base_domain):
|
133
|
-
save_asset(full_asset_url, self.url,self.session)
|
134
|
+
self.downloaded_assets = save_asset(full_asset_url, self.url,self.OUTPUT_DIR,self.downloaded_assets,self.session)
|
134
135
|
# Update tag to point to the local asset
|
135
136
|
local_asset_path = '/' + parsed_asset_url.path.lstrip('/')
|
136
137
|
tag[attr] = local_asset_path
|
@@ -170,5 +171,5 @@ def test_download(url=None,directory=None):
|
|
170
171
|
def usurpit(url,output_dir=None,max_depth=None,wait_between_requests=None,operating_system=None, browser=None, version=None,user_agent=None,website_bot=None):
|
171
172
|
output_dir= output_dir or os.path.join(os.getcwd(),'usurped')
|
172
173
|
os.makedirs(output_dir,exist_ok=True)
|
173
|
-
site_mgr = usurpManager(url,output_dir)
|
174
|
+
site_mgr = usurpManager(url,output_dir=output_dir,max_depth=max_depth,wait_between_requests=wait_between_requests,operating_system=operating_system, browser=browser, version=version,user_agent=user_agent,website_bot=website_bot)
|
174
175
|
site_mgr.main()
|
{abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools.egg-info/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.59
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/abstract_webtools.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/curlMgr.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/managers/get_test.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/url_grabber.py
RENAMED
File without changes
|
{abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools/url_grabber_new.py
RENAMED
File without changes
|
{abstract_webtools-0.1.6.57 → abstract_webtools-0.1.6.59}/src/abstract_webtools.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|