abstract-webtools 0.1.6.58__py3-none-any.whl → 0.1.6.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_webtools/abstract_usurpit.py +5 -4
- {abstract_webtools-0.1.6.58.dist-info → abstract_webtools-0.1.6.60.dist-info}/METADATA +1 -1
- {abstract_webtools-0.1.6.58.dist-info → abstract_webtools-0.1.6.60.dist-info}/RECORD +5 -5
- {abstract_webtools-0.1.6.58.dist-info → abstract_webtools-0.1.6.60.dist-info}/WHEEL +0 -0
- {abstract_webtools-0.1.6.58.dist-info → abstract_webtools-0.1.6.60.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ import os
|
|
5
5
|
import shutil
|
6
6
|
import time
|
7
7
|
from abstract_webtools import *
|
8
|
-
|
8
|
+
from abstract_utilities import *
|
9
9
|
|
10
10
|
# Import your custom classes/functions
|
11
11
|
# from your_module import linkManager, get_soup_mgr
|
@@ -53,7 +53,7 @@ def save_page(url, content,output_dir):
|
|
53
53
|
with open(page_full_path, 'w', encoding='utf-8') as f:
|
54
54
|
f.write(content)
|
55
55
|
print(f"Saved page: {page_full_path}")
|
56
|
-
def save_asset(asset_url, base_url,output_dir,session):
|
56
|
+
def save_asset(asset_url, base_url,output_dir,downloaded_assets,session):
|
57
57
|
"""
|
58
58
|
Download and save assets like images, CSS, JS files.
|
59
59
|
"""
|
@@ -78,6 +78,7 @@ def save_asset(asset_url, base_url,output_dir,session):
|
|
78
78
|
print(f"Saved asset: {asset_full_path}")
|
79
79
|
except Exception as e:
|
80
80
|
print(f"Failed to save asset {asset_url}: {e}")
|
81
|
+
return downloaded_assets
|
81
82
|
class usurpManager():
|
82
83
|
def __init__(self,url,output_dir=None,max_depth=None,wait_between_requests=None,operating_system=None, browser=None, version=None,user_agent=None,website_bot=None):
|
83
84
|
self.url = url
|
@@ -130,7 +131,7 @@ class usurpManager():
|
|
130
131
|
parsed_asset_url = urlparse(full_asset_url)
|
131
132
|
|
132
133
|
if is_valid_url(full_asset_url, base_domain):
|
133
|
-
save_asset(full_asset_url, self.url,self.OUTPUT_DIR,self.session)
|
134
|
+
self.downloaded_assets = save_asset(full_asset_url, self.url,self.OUTPUT_DIR,self.downloaded_assets,self.session)
|
134
135
|
# Update tag to point to the local asset
|
135
136
|
local_asset_path = '/' + parsed_asset_url.path.lstrip('/')
|
136
137
|
tag[attr] = local_asset_path
|
@@ -143,7 +144,7 @@ class usurpManager():
|
|
143
144
|
all_domains = link_mgr.find_all_domain()
|
144
145
|
|
145
146
|
# Process each domain link
|
146
|
-
for link_url in all_domains:
|
147
|
+
for link_url in make_list(all_domains):
|
147
148
|
normalized_link = normalize_url(link_url, url)
|
148
149
|
if is_valid_url(normalized_link, base_domain):
|
149
150
|
time.sleep(self.WAIT_BETWEEN_REQUESTS)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.60
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -1,6 +1,6 @@
|
|
1
1
|
abstract_webtools/__init__.py,sha256=zNMp-9f0Q6BXWxR-tgHrEqKP8GeXw9z7VYzbqIeEydo,132
|
2
2
|
abstract_webtools/abstract_userpit.py,sha256=Rg_0Orx79rxqEePt6Sf-evGslPq5KLlTiL-P2w1u6ng,6462
|
3
|
-
abstract_webtools/abstract_usurpit.py,sha256=
|
3
|
+
abstract_webtools/abstract_usurpit.py,sha256=AAqelbqntBsRZUxPJ0XiGO4xqsmR-y-LbQwn8sDevPo,7131
|
4
4
|
abstract_webtools/abstract_webtools.py,sha256=3NzGmJlZvrdVtEcUi2K5iUgWr1822IBPhIN9us2e2t0,3859
|
5
5
|
abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
|
6
6
|
abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
|
@@ -37,7 +37,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
|
|
37
37
|
abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
|
38
38
|
abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
|
39
39
|
abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
|
40
|
-
abstract_webtools-0.1.6.
|
41
|
-
abstract_webtools-0.1.6.
|
42
|
-
abstract_webtools-0.1.6.
|
43
|
-
abstract_webtools-0.1.6.
|
40
|
+
abstract_webtools-0.1.6.60.dist-info/METADATA,sha256=eOy8Kvm6ipb8jVKRoEyTLtaZrrHSxwYb1tHQP7_PJb0,16029
|
41
|
+
abstract_webtools-0.1.6.60.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
42
|
+
abstract_webtools-0.1.6.60.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
43
|
+
abstract_webtools-0.1.6.60.dist-info/RECORD,,
|
File without changes
|
File without changes
|