abstract-webtools 0.1.6.116__py3-none-any.whl → 0.1.6.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_webtools/abstract_usurpit.py +22 -4
- {abstract_webtools-0.1.6.116.dist-info → abstract_webtools-0.1.6.118.dist-info}/METADATA +1 -1
- {abstract_webtools-0.1.6.116.dist-info → abstract_webtools-0.1.6.118.dist-info}/RECORD +5 -5
- {abstract_webtools-0.1.6.116.dist-info → abstract_webtools-0.1.6.118.dist-info}/WHEEL +1 -1
- {abstract_webtools-0.1.6.116.dist-info → abstract_webtools-0.1.6.118.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,25 @@ from abstract_utilities import *
|
|
9
9
|
|
10
10
|
# Import your custom classes/functions
|
11
11
|
# from your_module import linkManager, get_soup_mgr
|
12
|
-
|
12
|
+
def make_directory(directory=None,path=None):
|
13
|
+
if directory==None:
|
14
|
+
directory=os.getcwd()
|
15
|
+
if path:
|
16
|
+
directory = os.path.join(base_dir,path)
|
17
|
+
os.makedirs(directory,exist_ok=True)
|
18
|
+
return directory
|
19
|
+
def get_domain_name_from_url(url):
|
20
|
+
parsed_url = urlparse(url)
|
21
|
+
netloc = parsed_url.netloc
|
22
|
+
parsed_spl = netloc.split('.')
|
23
|
+
directory_name = '.'.join(parsed_spl[:-1])
|
24
|
+
if directory_name.startswith('www.'):
|
25
|
+
directory_name = directory_name[len('www.'):]
|
26
|
+
return directory_name
|
27
|
+
def get_domain_directory_from_url(url,base_dir=None):
|
28
|
+
base_dir =base_dir or os.getcwd()
|
29
|
+
domain_name = get_domain_name_from_url(url)
|
30
|
+
return make_directory(directory,domain_name)
|
13
31
|
# Configuration
|
14
32
|
def normalize_url(url, base_url):
|
15
33
|
"""
|
@@ -164,12 +182,12 @@ class usurpManager():
|
|
164
182
|
print("Website copying completed.")
|
165
183
|
def test_download(url=None,directory=None):
|
166
184
|
url=url or 'https://www.youtube.com/watch?v=jRGrNDV2mKc&list=RDMMjRGrNDV2mKc&start_radio=1'
|
167
|
-
|
185
|
+
|
186
|
+
output_dir= directory or get_directory_from_url(url) or os.path.join(os.getcwd(),'testit')
|
168
187
|
os.makedirs(output_dir,exist_ok=True)
|
169
188
|
site_mgr = usurpManager(url,output_dir)
|
170
189
|
|
171
190
|
def usurpit(url,output_dir=None,max_depth=None,wait_between_requests=None,operating_system=None, browser=None, version=None,user_agent=None,website_bot=None):
|
172
|
-
output_dir=
|
173
|
-
os.makedirs(output_dir,exist_ok=True)
|
191
|
+
output_dir = domain_directory_from_url(url) or make_directory(path='usurped')
|
174
192
|
site_mgr = usurpManager(url,output_dir=output_dir,max_depth=max_depth,wait_between_requests=wait_between_requests,operating_system=operating_system, browser=browser, version=version,user_agent=user_agent,website_bot=website_bot)
|
175
193
|
site_mgr.main()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.118
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -1,6 +1,6 @@
|
|
1
1
|
abstract_webtools/__init__.py,sha256=zNMp-9f0Q6BXWxR-tgHrEqKP8GeXw9z7VYzbqIeEydo,132
|
2
2
|
abstract_webtools/abstract_userpit.py,sha256=Rg_0Orx79rxqEePt6Sf-evGslPq5KLlTiL-P2w1u6ng,6462
|
3
|
-
abstract_webtools/abstract_usurpit.py,sha256=
|
3
|
+
abstract_webtools/abstract_usurpit.py,sha256=uiJ_AZ1aBbWOB19tKf_KEeaTv7YQFooIhv9EblzHdlk,7893
|
4
4
|
abstract_webtools/abstract_webtools.py,sha256=3NzGmJlZvrdVtEcUi2K5iUgWr1822IBPhIN9us2e2t0,3859
|
5
5
|
abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
|
6
6
|
abstract_webtools/domain_identifier.py,sha256=AvWlGD7C19rySa_J_Brxi3kz43LMWvGsshuuZNg7MvI,3320
|
@@ -42,7 +42,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
|
|
42
42
|
abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
|
43
43
|
abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
|
44
44
|
abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
|
45
|
-
abstract_webtools-0.1.6.
|
46
|
-
abstract_webtools-0.1.6.
|
47
|
-
abstract_webtools-0.1.6.
|
48
|
-
abstract_webtools-0.1.6.
|
45
|
+
abstract_webtools-0.1.6.118.dist-info/METADATA,sha256=izj6wB0Wy3jFc2KMAZFC1T08mS1M0OKVrQQxOjPlbYg,7289
|
46
|
+
abstract_webtools-0.1.6.118.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
47
|
+
abstract_webtools-0.1.6.118.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
48
|
+
abstract_webtools-0.1.6.118.dist-info/RECORD,,
|
{abstract_webtools-0.1.6.116.dist-info → abstract_webtools-0.1.6.118.dist-info}/top_level.txt
RENAMED
File without changes
|