abstract-webtools 0.1.6.117__py3-none-any.whl → 0.1.6.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_webtools/abstract_usurpit.py +16 -5
- {abstract_webtools-0.1.6.117.dist-info → abstract_webtools-0.1.6.118.dist-info}/METADATA +1 -1
- {abstract_webtools-0.1.6.117.dist-info → abstract_webtools-0.1.6.118.dist-info}/RECORD +5 -5
- {abstract_webtools-0.1.6.117.dist-info → abstract_webtools-0.1.6.118.dist-info}/WHEEL +0 -0
- {abstract_webtools-0.1.6.117.dist-info → abstract_webtools-0.1.6.118.dist-info}/top_level.txt +0 -0
@@ -9,13 +9,25 @@ from abstract_utilities import *
|
|
9
9
|
|
10
10
|
# Import your custom classes/functions
|
11
11
|
# from your_module import linkManager, get_soup_mgr
|
12
|
-
def
|
12
|
+
def make_directory(directory=None,path=None):
|
13
|
+
if directory==None:
|
14
|
+
directory=os.getcwd()
|
15
|
+
if path:
|
16
|
+
directory = os.path.join(base_dir,path)
|
17
|
+
os.makedirs(directory,exist_ok=True)
|
18
|
+
return directory
|
19
|
+
def get_domain_name_from_url(url):
|
13
20
|
parsed_url = urlparse(url)
|
14
|
-
netloc = parsed_url
|
15
|
-
|
21
|
+
netloc = parsed_url.netloc
|
22
|
+
parsed_spl = netloc.split('.')
|
23
|
+
directory_name = '.'.join(parsed_spl[:-1])
|
16
24
|
if directory_name.startswith('www.'):
|
17
25
|
directory_name = directory_name[len('www.'):]
|
18
26
|
return directory_name
|
27
|
+
def get_domain_directory_from_url(url,base_dir=None):
|
28
|
+
base_dir =base_dir or os.getcwd()
|
29
|
+
domain_name = get_domain_name_from_url(url)
|
30
|
+
return make_directory(directory,domain_name)
|
19
31
|
# Configuration
|
20
32
|
def normalize_url(url, base_url):
|
21
33
|
"""
|
@@ -176,7 +188,6 @@ def test_download(url=None,directory=None):
|
|
176
188
|
site_mgr = usurpManager(url,output_dir)
|
177
189
|
|
178
190
|
def usurpit(url,output_dir=None,max_depth=None,wait_between_requests=None,operating_system=None, browser=None, version=None,user_agent=None,website_bot=None):
|
179
|
-
output_dir=
|
180
|
-
os.makedirs(output_dir,exist_ok=True)
|
191
|
+
output_dir = domain_directory_from_url(url) or make_directory(path='usurped')
|
181
192
|
site_mgr = usurpManager(url,output_dir=output_dir,max_depth=max_depth,wait_between_requests=wait_between_requests,operating_system=operating_system, browser=browser, version=version,user_agent=user_agent,website_bot=website_bot)
|
182
193
|
site_mgr.main()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.118
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -1,6 +1,6 @@
|
|
1
1
|
abstract_webtools/__init__.py,sha256=zNMp-9f0Q6BXWxR-tgHrEqKP8GeXw9z7VYzbqIeEydo,132
|
2
2
|
abstract_webtools/abstract_userpit.py,sha256=Rg_0Orx79rxqEePt6Sf-evGslPq5KLlTiL-P2w1u6ng,6462
|
3
|
-
abstract_webtools/abstract_usurpit.py,sha256=
|
3
|
+
abstract_webtools/abstract_usurpit.py,sha256=uiJ_AZ1aBbWOB19tKf_KEeaTv7YQFooIhv9EblzHdlk,7893
|
4
4
|
abstract_webtools/abstract_webtools.py,sha256=3NzGmJlZvrdVtEcUi2K5iUgWr1822IBPhIN9us2e2t0,3859
|
5
5
|
abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
|
6
6
|
abstract_webtools/domain_identifier.py,sha256=AvWlGD7C19rySa_J_Brxi3kz43LMWvGsshuuZNg7MvI,3320
|
@@ -42,7 +42,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
|
|
42
42
|
abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
|
43
43
|
abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
|
44
44
|
abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
|
45
|
-
abstract_webtools-0.1.6.
|
46
|
-
abstract_webtools-0.1.6.
|
47
|
-
abstract_webtools-0.1.6.
|
48
|
-
abstract_webtools-0.1.6.
|
45
|
+
abstract_webtools-0.1.6.118.dist-info/METADATA,sha256=izj6wB0Wy3jFc2KMAZFC1T08mS1M0OKVrQQxOjPlbYg,7289
|
46
|
+
abstract_webtools-0.1.6.118.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
47
|
+
abstract_webtools-0.1.6.118.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
48
|
+
abstract_webtools-0.1.6.118.dist-info/RECORD,,
|
File without changes
|
{abstract_webtools-0.1.6.117.dist-info → abstract_webtools-0.1.6.118.dist-info}/top_level.txt
RENAMED
File without changes
|