abstract-webtools 0.1.6.101__tar.gz → 0.1.6.103__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/PKG-INFO +1 -1
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/setup.py +1 -1
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/k2s_downloader.py +41 -8
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools.egg-info/PKG-INFO +1 -1
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/README.md +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/pyproject.toml +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/setup.cfg +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/__init__.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/abstract_usurpit.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/abstract_webtools.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/big_user_agent_list.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/domain_identifier.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/extention_list.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/find_dirs.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/main.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/__init__.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/allss//.py" +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/cipherManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/crawlManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/crawlmgr2.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/curlMgr.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/domainManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/get_test.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/linkManager/__init__.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/linkManager/linkManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/mySocketClient.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/networkManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/requestManager/__init__.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/requestManager/requestManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/seleniumManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/soupManager/__init__.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/soupManager/asoueces.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/soupManager/soupManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/sslManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/urlManager/__init__.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/urlManager/urlManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/userAgentManager.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/videoDownloader.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/managers/videoDownloader2.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/soup_gui.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/url_grabber.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/url_grabber_new.py +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools.egg-info/SOURCES.txt +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools.egg-info/requires.txt +0 -0
- {abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.103
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
4
4
|
long_description = fh.read()
|
5
5
|
setuptools.setup(
|
6
6
|
name='abstract_webtools',
|
7
|
-
version='0.1.6.
|
7
|
+
version='0.1.6.103',
|
8
8
|
author='putkoff',
|
9
9
|
author_email='partners@abstractendeavors.com',
|
10
10
|
description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',
|
{abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/k2s_downloader.py
RENAMED
@@ -13,8 +13,9 @@ from abstract_security import *
|
|
13
13
|
from abstract_webtools import *
|
14
14
|
DOWNLOAD_DIR = os.path.abspath("./downloads")
|
15
15
|
class K2SDownloader:
|
16
|
-
def __init__(self,env_path=None,download_dir=None):
|
16
|
+
def __init__(self,env_path=None,download_dir=None,json_file_path=None):
|
17
17
|
self.download_dir = download_dir or DOWNLOAD_DIR
|
18
|
+
self.json_file_path = json_file_path
|
18
19
|
os.makedirs(self.download_dir, exist_ok=True)
|
19
20
|
self.env_path = env_path
|
20
21
|
self.session = requests.Session()
|
@@ -95,18 +96,36 @@ class K2SDownloader:
|
|
95
96
|
if 'filename=' in cd:
|
96
97
|
return cd.split('filename=')[-1].strip('"')
|
97
98
|
return url.split('/')[-1].split('?')[0]
|
99
|
+
def get_json_key_value(json_data,key):
|
100
|
+
if json_data and isinstance(json_data,dict):
|
101
|
+
return json_data.get(key)
|
102
|
+
def compare_keys(json_data,comp_json_data,key):
|
103
|
+
json_key_value = get_json_key_value(json_data,key)
|
104
|
+
comp_json_key_value = get_json_key_value(comp_json_data,key)
|
105
|
+
if json_key_value and comp_json_key_value and comp_json_key_value==json_key_value:
|
106
|
+
return True
|
107
|
+
def check_json_data(json_list,new_data):
|
108
|
+
keys = ['k2s','link','name']
|
109
|
+
for json_data in json_list:
|
110
|
+
for key in keys:
|
111
|
+
result = compare_keys(json_data,new_data,key)
|
112
|
+
if result:
|
113
|
+
return result
|
98
114
|
|
99
115
|
class dlsManager:
|
100
116
|
def __init__(self, downloader):
|
101
117
|
self.downloader = downloader
|
102
|
-
self.
|
103
|
-
|
118
|
+
self.json_file_path = self.downloader.json_file_path
|
119
|
+
all_dls= None
|
120
|
+
if self.json_file_path:
|
121
|
+
all_dls = safe_load_from_json(self.json_file_path)
|
122
|
+
self.all_dls = all_dls or []
|
123
|
+
self.last_data = None
|
104
124
|
def is_prev_dl(self, data):
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
self.all_dls.append(data)
|
125
|
+
if check_json_data(self.all_dls,data):
|
126
|
+
self.last_data = None
|
127
|
+
return True
|
128
|
+
self.last_data = data
|
110
129
|
return False
|
111
130
|
|
112
131
|
def dl_k2s_link(self, k2s_link):
|
@@ -114,6 +133,10 @@ class dlsManager:
|
|
114
133
|
print(f"Downloading: {k2s_link}")
|
115
134
|
self.downloader.download_file(k2s_link)
|
116
135
|
time.sleep(10)
|
136
|
+
if self.json_file_path:
|
137
|
+
self.all_dls.append(self.last_data)
|
138
|
+
safe_dump_to_file(data=self.all_dls,
|
139
|
+
file_path=self.json_file_path)
|
117
140
|
|
118
141
|
|
119
142
|
def get_soup(url):
|
@@ -128,3 +151,13 @@ def get_soup(url):
|
|
128
151
|
def get_k2s_link(soup):
|
129
152
|
match = re.search(r'https://k2s\.cc/file/[^"<]+', str(soup))
|
130
153
|
return match.group(0) if match else None
|
154
|
+
|
155
|
+
def get_sections_content(content,get_post_attribute,dls_mgr):
|
156
|
+
if not content:
|
157
|
+
return []
|
158
|
+
for section in content:
|
159
|
+
data = get_post_attribute(section)
|
160
|
+
if data and data.get('k2s') and not dls_mgr.is_prev_dl(data):
|
161
|
+
dls_mgr.dl_k2s_link(data['k2s'])
|
162
|
+
results.append(data)
|
163
|
+
return results
|
{abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools.egg-info/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.103
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/extention_list.py
RENAMED
File without changes
|
{abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/find_dirs.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/soup_gui.py
RENAMED
File without changes
|
{abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/url_grabber.py
RENAMED
File without changes
|
{abstract_webtools-0.1.6.101 → abstract_webtools-0.1.6.103}/src/abstract_webtools/url_grabber_new.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|