abstract-webtools 0.1.5.81__py3-none-any.whl → 0.1.5.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_webtools/__init__.py +3 -2
- abstract_webtools/abstract_webtools.py +4 -1756
- abstract_webtools/managers/__init__.py +14 -0
- abstract_webtools/managers/cipherManager.py +38 -0
- abstract_webtools/managers/crawlManager.py +187 -0
- abstract_webtools/managers/domainManager.py +48 -0
- abstract_webtools/managers/dynamicRateLimiter.py +138 -0
- abstract_webtools/managers/linkManager.py +189 -0
- abstract_webtools/managers/mySocketClient.py +46 -0
- abstract_webtools/managers/networkManager.py +15 -0
- abstract_webtools/managers/requestManager.py +348 -0
- abstract_webtools/managers/seleniumManager.py +85 -0
- abstract_webtools/managers/soupManager.py +314 -0
- abstract_webtools/managers/sslManager.py +21 -0
- abstract_webtools/managers/tlsAdapter.py +27 -0
- abstract_webtools/managers/urlManager.py +225 -0
- abstract_webtools/managers/userAgentManager.py +42 -0
- abstract_webtools/managers/videoDownloader.py +205 -0
- abstract_webtools/url_grabber.py +1 -1
- {abstract_webtools-0.1.5.81.dist-info → abstract_webtools-0.1.5.83.dist-info}/METADATA +6 -6
- abstract_webtools-0.1.5.83.dist-info/RECORD +28 -0
- {abstract_webtools-0.1.5.81.dist-info → abstract_webtools-0.1.5.83.dist-info}/WHEEL +1 -1
- abstract_webtools-0.1.5.81.dist-info/RECORD +0 -12
- {abstract_webtools-0.1.5.81.dist-info → abstract_webtools-0.1.5.83.dist-info}/LICENSE +0 -0
- {abstract_webtools-0.1.5.81.dist-info → abstract_webtools-0.1.5.83.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
import os
|
2
|
+
class VideoDownloader:
|
3
|
+
"""
|
4
|
+
VideoDownloader is a class for downloading videos from URLs using YouTube-DL.
|
5
|
+
|
6
|
+
Args:
|
7
|
+
link (str or list): The URL(s) of the video(s) to be downloaded.
|
8
|
+
temp_directory (str or None): The directory to store temporary video files (default is None, uses video_directory/temp_files).
|
9
|
+
video_directory (str or None): The directory to store downloaded videos (default is None, uses 'videos' in the current working directory).
|
10
|
+
remove_existing (bool): Whether to remove existing video files with the same name (default is True).
|
11
|
+
|
12
|
+
Methods:
|
13
|
+
count_outliers(speed, threshold): Count speed outliers below the threshold.
|
14
|
+
filter_outliers(speeds): Filter out speed outliers in the list of speeds.
|
15
|
+
remove_temps(file_name): Remove temporary video files based on the file name.
|
16
|
+
move_video(): Move the downloaded video to the final directory.
|
17
|
+
yt_dlp_downloader(url, ydl_opts={}, download=True): Download video information using YouTube-DL.
|
18
|
+
progress_callback(d): Callback function to monitor download progress.
|
19
|
+
download(): Download video(s) based on the provided URL(s).
|
20
|
+
monitor(): Monitor the download progress.
|
21
|
+
start(): Start the download and monitoring threads.
|
22
|
+
|
23
|
+
Note:
|
24
|
+
- The VideoDownloader class uses YouTube-DL to download videos.
|
25
|
+
- It allows downloading from multiple URLs.
|
26
|
+
- You need to have YouTube-DL installed to use this class.
|
27
|
+
"""
|
28
|
+
def __init__(self, link,temp_directory=None,video_directory=None,remove_existing=True):
|
29
|
+
if video_directory==None:
|
30
|
+
video_directory=os.path.join(os.getcwd(),'videos')
|
31
|
+
if temp_directory == None:
|
32
|
+
temp_directory=os.path.join(video_directory,'temp_files')
|
33
|
+
self.thread_manager = ThreadManager()
|
34
|
+
self.pause_event = self.thread_manager.add_thread('pause_event')
|
35
|
+
self.link = link
|
36
|
+
self.temp_directory = temp_directory
|
37
|
+
self.video_directory = video_directory
|
38
|
+
self.remove_existing=remove_existing
|
39
|
+
self.video_urls=self.link if isinstance(self.link,list) else [self.link]
|
40
|
+
self.starttime = None
|
41
|
+
self.downloaded = 0
|
42
|
+
self.time_interval=60
|
43
|
+
self.monitoring=True
|
44
|
+
self.temp_file_name = None
|
45
|
+
self.file_name = None
|
46
|
+
self.dl_speed = None
|
47
|
+
self.dl_eta=None
|
48
|
+
self.total_bytes_est=None
|
49
|
+
self.percent_speed=None
|
50
|
+
self.percent=None
|
51
|
+
self.speed_track = []
|
52
|
+
self.video_url=None
|
53
|
+
self.last_checked = get_time_stamp()
|
54
|
+
self.num=0
|
55
|
+
self.start()
|
56
|
+
def count_outliers(self,speed,threshold):
|
57
|
+
if speed < threshold:
|
58
|
+
self.outlier_count+=1
|
59
|
+
else:
|
60
|
+
self.outlier_count=0
|
61
|
+
def filter_outliers(self,speeds):
|
62
|
+
# Step 1: Compute initial average
|
63
|
+
initial_avg = sum(speeds) / len(speeds)
|
64
|
+
|
65
|
+
# Step 2: Remove speeds 25% under the average
|
66
|
+
threshold = initial_avg * 0.75 # 25% under average
|
67
|
+
filtered_speeds = [speed for speed in speeds if speed >= threshold]
|
68
|
+
|
69
|
+
# Step 3: Compute the new average of the filtered list
|
70
|
+
if filtered_speeds: # Ensure the list is not empty
|
71
|
+
self.count_outliers(speeds[-1],threshold)
|
72
|
+
return filtered_speeds
|
73
|
+
else:
|
74
|
+
# This can happen if all values are outliers, it's up to you how to handle it
|
75
|
+
self.outlier_count=0
|
76
|
+
return speeds
|
77
|
+
def remove_temps(self,file_name):
|
78
|
+
for temp_vid in os.listdir(self.temp_directory):
|
79
|
+
if len(file_name)<=len(temp_vid):
|
80
|
+
if temp_vid[:len(file_name)] == file_name:
|
81
|
+
os.remove(os.path.join(self.temp_directory,temp_vid))
|
82
|
+
print(f"removing {temp_vid} from {self.temp_directory}")
|
83
|
+
def move_video(self):
|
84
|
+
if os.path.exists(self.temp_file_path):
|
85
|
+
shutil.move(self.temp_file_path, self.video_directory)
|
86
|
+
print(f"moving {self.file_name} from {self.temp_directory} to {self.video_directory}")
|
87
|
+
self.remove_temps(self.file_name)
|
88
|
+
return True
|
89
|
+
if os.path.exists(self.complete_file_path):
|
90
|
+
print(f"{self.file_name} already existed in {self.video_directory}; removing it from {self.temp_directory}")
|
91
|
+
self.remove_temps(self.file_name)
|
92
|
+
return True
|
93
|
+
return False
|
94
|
+
def yt_dlp_downloader(self,url,ydl_opts={},download=True):
|
95
|
+
try:
|
96
|
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
97
|
+
self.info_dict=ydl.extract_info(url=url, download=download)
|
98
|
+
return True
|
99
|
+
except:
|
100
|
+
return False
|
101
|
+
def progress_callback(self, d):
|
102
|
+
self.status_dict = d
|
103
|
+
keys = ['status',
|
104
|
+
'downloaded_bytes',
|
105
|
+
'fragment_index',
|
106
|
+
'fragment_count',
|
107
|
+
'filename',
|
108
|
+
'tmpfilename',
|
109
|
+
'max_progress',
|
110
|
+
'progress_idx',
|
111
|
+
'elapsed',
|
112
|
+
'total_bytes_estimate',
|
113
|
+
'speed',
|
114
|
+
'eta',
|
115
|
+
'_eta_str',
|
116
|
+
'_speed_str',
|
117
|
+
'_percent_str',
|
118
|
+
'_total_bytes_str',
|
119
|
+
'_total_bytes_estimate_str',
|
120
|
+
'_downloaded_bytes_str',
|
121
|
+
'_elapsed_str',
|
122
|
+
'_default_template']
|
123
|
+
if self.status_dict['status'] == 'finished':
|
124
|
+
print("Done downloading, moving video to final directory...")
|
125
|
+
self.move_video()
|
126
|
+
return
|
127
|
+
if get_time_stamp()-self.last_checked>5:
|
128
|
+
print(self.status_dict['_default_template'])
|
129
|
+
self.last_checked = get_time_stamp()
|
130
|
+
if (get_time_stamp()-self.start_time/5)>6:
|
131
|
+
self.speed_track.append(self.status_dict['speed'])
|
132
|
+
self.speed_track=self.filter_outliers(self.speed_track)
|
133
|
+
|
134
|
+
def download(self):
|
135
|
+
if not os.path.exists(self.video_directory):
|
136
|
+
os.makedirs(self.video_directory,exist_ok=True)
|
137
|
+
if not os.path.exists(self.temp_directory):
|
138
|
+
os.makedirs(self.temp_directory,exist_ok=True)
|
139
|
+
for self.num,video_url in enumerate(self.video_urls):
|
140
|
+
if video_url != self.video_url or self.video_url == None:
|
141
|
+
self.video_url=video_url
|
142
|
+
self.info_dict=None
|
143
|
+
result = self.yt_dlp_downloader(url=self.video_url,ydl_opts={'quiet': True, 'no_warnings': True},download=False)
|
144
|
+
if self.info_dict != None and result:
|
145
|
+
self.start_time = get_time_stamp()
|
146
|
+
self.downloaded = 0
|
147
|
+
self.video_title = self.info_dict.get('title', None)
|
148
|
+
self.video_ext = self.info_dict.get('ext', 'mp4')
|
149
|
+
self.file_name =f"{self.video_title}.{self.video_ext}"
|
150
|
+
self.temp_file_path = os.path.join(self.temp_directory, self.file_name)
|
151
|
+
self.complete_file_path = os.path.join(self.video_directory, self.file_name)
|
152
|
+
if not self.move_video():
|
153
|
+
self.dl_speed = []
|
154
|
+
self.percent=None
|
155
|
+
self.dl_eta=None
|
156
|
+
self.total_bytes_est=None
|
157
|
+
self.percent_speed=None
|
158
|
+
self.speed_track = []
|
159
|
+
self.outlier_count=0
|
160
|
+
ydl_opts = {
|
161
|
+
'outtmpl': self.temp_file_path,
|
162
|
+
'noprogress':True,
|
163
|
+
'progress_hooks': [self.progress_callback]
|
164
|
+
}
|
165
|
+
|
166
|
+
|
167
|
+
print("Starting download...") # Check if this point in code is reached
|
168
|
+
result = self.yt_dlp_downloader(url=self.video_url,ydl_opts=ydl_opts,download=True)
|
169
|
+
if result:
|
170
|
+
print("Download finished!") # Check if download completes
|
171
|
+
else:
|
172
|
+
print(f'error downloding {self.video_url}')
|
173
|
+
self.move_video()
|
174
|
+
else:
|
175
|
+
print(f"The video from {self.video_url} already exists in the directory {self.video_directory}. Skipping download.")
|
176
|
+
else:
|
177
|
+
print(f"could not find video info from {self.video_url} Skipping download.")
|
178
|
+
if self.num==len(self.video_urls)-1:
|
179
|
+
self.monitoring=False
|
180
|
+
self.time_interval=0
|
181
|
+
|
182
|
+
def monitor(self):
|
183
|
+
while self.monitoring:
|
184
|
+
self.thread_manager.wait(name='pause_event',n=self.time_interval)# check every minute
|
185
|
+
if self.monitoring:
|
186
|
+
if 'eta' in self.status_dict:
|
187
|
+
if self.outlier_count>=3 and (self.status_dict['eta']/60)>10:
|
188
|
+
self.start()
|
189
|
+
|
190
|
+
def start(self):
|
191
|
+
download_thread = self.thread_manager.add_thread(name='download_thread',target=self.download)
|
192
|
+
monitor_thread = self.thread_manager.add_thread(name='monitor_thread',target_function=self.monitor)
|
193
|
+
self.thread_manager.start(name='download_thread')
|
194
|
+
self.thread_manager.start(name='monitor_thread')
|
195
|
+
self.thread_manager.join(name='download_thread')
|
196
|
+
self.thread_manager.join(name='monitor_thread')
|
197
|
+
class VideoDownloaderSingleton():
|
198
|
+
_instance = None
|
199
|
+
@staticmethod
|
200
|
+
def get_instance(url_manager,request_manager,title=None,video_extention='mp4',download_directory=os.getcwd(),user_agent=None,download=True,get_info=False):
|
201
|
+
if VideoDownloaderSingleton._instance is None:
|
202
|
+
VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
|
203
|
+
elif VideoDownloaderSingleton._instance.title != title or video_extention != VideoDownloaderSingleton._instance.video_extention or url != VideoDownloaderSingleton._instance.url or download_directory != VideoDownloaderSingleton._instance.download_directory or user_agent != VideoDownloaderSingleton._instance.user_agent:
|
204
|
+
VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
|
205
|
+
return VideoDownloaderSingleton._instance
|
abstract_webtools/url_grabber.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from abstract_gui import make_component,sg
|
2
2
|
import inspect
|
3
3
|
import re
|
4
|
-
from . import
|
4
|
+
from .managers import *
|
5
5
|
window = None
|
6
6
|
|
7
7
|
def get_attrs(values):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
|
-
Name:
|
3
|
-
Version: 0.1.5.
|
2
|
+
Name: abstract_webtools
|
3
|
+
Version: 0.1.5.83
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -13,10 +13,10 @@ Classifier: Programming Language :: Python :: 3.11
|
|
13
13
|
Requires-Python: >=3.6
|
14
14
|
Description-Content-Type: text/markdown
|
15
15
|
License-File: LICENSE
|
16
|
-
Requires-Dist:
|
17
|
-
Requires-Dist:
|
18
|
-
Requires-Dist:
|
19
|
-
Requires-Dist:
|
16
|
+
Requires-Dist: abstract-utilities >=0.2.2.30
|
17
|
+
Requires-Dist: PySimpleGUI >=4.60.5
|
18
|
+
Requires-Dist: urllib3 >=2.0.4
|
19
|
+
Requires-Dist: requests >=2.31.0
|
20
20
|
|
21
21
|
# Abstract WebTools
|
22
22
|
Provides utilities for inspecting and parsing web content, including React components and URL utilities, with enhanced capabilities for managing HTTP requests and TLS configurations.
|
@@ -0,0 +1,28 @@
|
|
1
|
+
abstract_webtools/__init__.py,sha256=rPt-BXa6ksplRXmZoJtn-vMGEPZGpEwfxqqOqZo_EWk,68
|
2
|
+
abstract_webtools/abstract_webtools.py,sha256=6pYoObMhvOnjLiw1oQaNBoX3ipr7QUJYve0YEjoXvC8,3813
|
3
|
+
abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
|
4
|
+
abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
|
5
|
+
abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
|
6
|
+
abstract_webtools/url_grabber.py,sha256=AJN8F6TQ_Kwv3sd8YWXffmC4EHRRrKizuJY1uEXNMlM,10752
|
7
|
+
abstract_webtools/url_grabber_new.py,sha256=Oh2Kc0gBScCo0xpopNsg8JE5lIbPuzZVKM5f5GoZmw0,3454
|
8
|
+
abstract_webtools/managers/__init__.py,sha256=5aIpbdUsDWTrhPUAjfIKnG54OULqOKan9LBL5EIUllo,407
|
9
|
+
abstract_webtools/managers/cipherManager.py,sha256=NHQGdR11eNSm-1H-GezD5dyQgsPTJwY5kczt8Sher2s,1621
|
10
|
+
abstract_webtools/managers/crawlManager.py,sha256=8ico7qFM8sXluIx3Rzgr5TZMmwsb_bnx2eYoS2Vss6I,7661
|
11
|
+
abstract_webtools/managers/domainManager.py,sha256=dluzexs4GdI248kXA7sRaRUm020_dGO86ZLo20xLWOI,1788
|
12
|
+
abstract_webtools/managers/dynamicRateLimiter.py,sha256=gopQcQo50JG2D0KcyepNCIQ_1uDQEBIHBzWf4R2Wgy0,7617
|
13
|
+
abstract_webtools/managers/linkManager.py,sha256=m6y9s8jknrTX8RtOAFKeHd4yd23G7Rgf0T7Sp7wmHUw,12180
|
14
|
+
abstract_webtools/managers/mySocketClient.py,sha256=-j1Q8Ds9RCSbjZdx3ZF9mVpgwxaO0BBssanUcpYVQoY,2045
|
15
|
+
abstract_webtools/managers/networkManager.py,sha256=Op2QDXrP-gmm0tCToe-Ryt9xuOtMppcN2KLKP1WZiu0,952
|
16
|
+
abstract_webtools/managers/requestManager.py,sha256=ZAood3uhs6mi--R8RLTtcNFxACKhu-Jf3m-u4fFMY2Q,17029
|
17
|
+
abstract_webtools/managers/seleniumManager.py,sha256=toG0aM72UO1LMicXGSVm3PCf88yQLJqLpFEMNT9gLU0,3014
|
18
|
+
abstract_webtools/managers/soupManager.py,sha256=03qfg4Ww6T3F9HP0F2RoqxZ-jBFhnwE-lS7m3EOcRCI,14717
|
19
|
+
abstract_webtools/managers/sslManager.py,sha256=C-QgQw9CW84uOE5kx2MPjC3RsLbE2JQqdwdTs0H4ecc,1370
|
20
|
+
abstract_webtools/managers/tlsAdapter.py,sha256=XZSMZz9EUOhv-h3_Waf6mjV1dA3oN_M_oWuoo4VZ_HE,1454
|
21
|
+
abstract_webtools/managers/urlManager.py,sha256=OtEBV26RHHuxtbYLLX56AlMv83s9kOIcsDvlMZ9bKZc,8647
|
22
|
+
abstract_webtools/managers/userAgentManager.py,sha256=33SB2p2FG7EYZl7l2iYm1U4gI9PcdkGTZHw5lg_Ogrw,1653
|
23
|
+
abstract_webtools/managers/videoDownloader.py,sha256=6G_aLc05BTMUYUWc7iqYtHF_BaR7DnCNK_NJ-QnjsYY,10531
|
24
|
+
abstract_webtools-0.1.5.83.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
|
25
|
+
abstract_webtools-0.1.5.83.dist-info/METADATA,sha256=HDnvNqx5Ho0uLgBRSBWh_4TeV5E6tGyOLtSH1841TTw,15858
|
26
|
+
abstract_webtools-0.1.5.83.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
27
|
+
abstract_webtools-0.1.5.83.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
28
|
+
abstract_webtools-0.1.5.83.dist-info/RECORD,,
|
@@ -1,12 +0,0 @@
|
|
1
|
-
abstract_webtools/__init__.py,sha256=VtajT5HAiewyI3wKcKU-r5OVDDyMjYqLWVvTmPn8ToY,80
|
2
|
-
abstract_webtools/abstract_webtools.py,sha256=gX-jWctjrzx1IJUg8y8dVzSZYWyX0jve4qS4ZthnL0A,90125
|
3
|
-
abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
|
4
|
-
abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
|
5
|
-
abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
|
6
|
-
abstract_webtools/url_grabber.py,sha256=W8nXAu-b3Ywn0ihmN78X1D6vjvMwfgX0Ry-FP5YDT4U,10876
|
7
|
-
abstract_webtools/url_grabber_new.py,sha256=Oh2Kc0gBScCo0xpopNsg8JE5lIbPuzZVKM5f5GoZmw0,3454
|
8
|
-
abstract_webtools-0.1.5.81.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
|
9
|
-
abstract_webtools-0.1.5.81.dist-info/METADATA,sha256=Fd-vyebRGojkrOjc6udWELuyDR3gbrD_6Weund2w_nQ,15866
|
10
|
-
abstract_webtools-0.1.5.81.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
|
11
|
-
abstract_webtools-0.1.5.81.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
12
|
-
abstract_webtools-0.1.5.81.dist-info/RECORD,,
|
File without changes
|
File without changes
|