abstract-webtools 0.1.6.104__py3-none-any.whl → 0.1.6.106__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_webtools/k2s_downloader.py +18 -9
- {abstract_webtools-0.1.6.104.dist-info → abstract_webtools-0.1.6.106.dist-info}/METADATA +1 -1
- {abstract_webtools-0.1.6.104.dist-info → abstract_webtools-0.1.6.106.dist-info}/RECORD +5 -5
- {abstract_webtools-0.1.6.104.dist-info → abstract_webtools-0.1.6.106.dist-info}/WHEEL +0 -0
- {abstract_webtools-0.1.6.104.dist-info → abstract_webtools-0.1.6.106.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@ from selenium.webdriver.support.ui import WebDriverWait
|
|
11
11
|
from selenium.webdriver.support import expected_conditions as EC
|
12
12
|
from abstract_security import *
|
13
13
|
from abstract_webtools import *
|
14
|
+
from abstract_utilities import safe_dump_to_file,safe_load_from_json
|
14
15
|
DOWNLOAD_DIR = os.path.abspath("./downloads")
|
15
16
|
class K2SDownloader:
|
16
17
|
def __init__(self,env_path=None,download_dir=None,json_file_path=None):
|
@@ -116,28 +117,36 @@ class dlsManager:
|
|
116
117
|
def __init__(self, downloader):
|
117
118
|
self.downloader = downloader
|
118
119
|
self.json_file_path = self.downloader.json_file_path
|
119
|
-
all_dls= None
|
120
|
+
all_dls = None
|
120
121
|
if self.json_file_path:
|
121
122
|
all_dls = safe_load_from_json(self.json_file_path)
|
122
|
-
self.all_dls = all_dls or
|
123
|
+
self.all_dls = all_dls or []
|
123
124
|
self.last_data = None
|
125
|
+
|
124
126
|
def is_prev_dl(self, data):
|
125
|
-
|
127
|
+
# Include metadata in data for duplicate checking
|
128
|
+
extended_data = data.copy()
|
129
|
+
if data.get('k2s'):
|
130
|
+
metadata = self.downloader.get_file_metadata(data['k2s'])
|
131
|
+
extended_data.update({
|
132
|
+
'filename': metadata['filename'],
|
133
|
+
'size': metadata['size']
|
134
|
+
})
|
135
|
+
if check_json_data(self.all_dls, extended_data):
|
126
136
|
self.last_data = None
|
127
137
|
return True
|
128
|
-
self.last_data =
|
138
|
+
self.last_data = extended_data
|
129
139
|
return False
|
130
140
|
|
131
141
|
def dl_k2s_link(self, k2s_link):
|
132
142
|
if k2s_link:
|
133
143
|
print(f"Downloading: {k2s_link}")
|
134
|
-
self.downloader.download_file(k2s_link)
|
144
|
+
metadata = self.downloader.download_file(k2s_link)
|
135
145
|
time.sleep(10)
|
136
|
-
if self.json_file_path:
|
146
|
+
if metadata and self.json_file_path and self.last_data:
|
147
|
+
self.last_data.update(metadata) # Merge download metadata
|
137
148
|
self.all_dls.append(self.last_data)
|
138
|
-
safe_dump_to_file(data=self.all_dls,
|
139
|
-
file_path=self.json_file_path)
|
140
|
-
|
149
|
+
safe_dump_to_file(data=self.all_dls, file_path=self.json_file_path)
|
141
150
|
|
142
151
|
def get_soup(url):
|
143
152
|
try:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.106
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -6,7 +6,7 @@ abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSj
|
|
6
6
|
abstract_webtools/domain_identifier.py,sha256=AvWlGD7C19rySa_J_Brxi3kz43LMWvGsshuuZNg7MvI,3320
|
7
7
|
abstract_webtools/extention_list.py,sha256=gRSO4nMbuuXDYzd-ss4s64sS80ZHmUoazMCpgoKG5vE,4884
|
8
8
|
abstract_webtools/find_dirs.py,sha256=BlE4ruzMABqmv03NcutZ1j5N3pCc-Q4uNEAMpNolZCQ,2609
|
9
|
-
abstract_webtools/k2s_downloader.py,sha256=
|
9
|
+
abstract_webtools/k2s_downloader.py,sha256=aiUTLqFSNC_S9lOs98hfuUVj5agDbEiEVWmUqDLXdPU,6708
|
10
10
|
abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
|
11
11
|
abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
|
12
12
|
abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE70B8,10441
|
@@ -42,7 +42,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2s
|
|
42
42
|
abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
|
43
43
|
abstract_webtools/managers/urlManager/urlManager.py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
|
44
44
|
abstract_webtools/managers/videos/Heather brooke swallo from condom.mp4,sha256=h-bKFLAHt7pGLGu4EcMvSSox7BPRK0Nga3u813iMVKQ,8335544
|
45
|
-
abstract_webtools-0.1.6.
|
46
|
-
abstract_webtools-0.1.6.
|
47
|
-
abstract_webtools-0.1.6.
|
48
|
-
abstract_webtools-0.1.6.
|
45
|
+
abstract_webtools-0.1.6.106.dist-info/METADATA,sha256=Sxl5mcl7sgkg4FHIpNmaGT7cKHSoGp5DXOGpmkLJSWI,7289
|
46
|
+
abstract_webtools-0.1.6.106.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
47
|
+
abstract_webtools-0.1.6.106.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
48
|
+
abstract_webtools-0.1.6.106.dist-info/RECORD,,
|
File without changes
|
{abstract_webtools-0.1.6.104.dist-info → abstract_webtools-0.1.6.106.dist-info}/top_level.txt
RENAMED
File without changes
|