abstract-webtools 0.1.5.81__py3-none-any.whl → 0.1.5.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,205 @@
1
+ import os
2
+ class VideoDownloader:
3
+ """
4
+ VideoDownloader is a class for downloading videos from URLs using YouTube-DL.
5
+
6
+ Args:
7
+ link (str or list): The URL(s) of the video(s) to be downloaded.
8
+ temp_directory (str or None): The directory to store temporary video files (default is None, uses video_directory/temp_files).
9
+ video_directory (str or None): The directory to store downloaded videos (default is None, uses 'videos' in the current working directory).
10
+ remove_existing (bool): Whether to remove existing video files with the same name (default is True).
11
+
12
+ Methods:
13
+ count_outliers(speed, threshold): Count speed outliers below the threshold.
14
+ filter_outliers(speeds): Filter out speed outliers in the list of speeds.
15
+ remove_temps(file_name): Remove temporary video files based on the file name.
16
+ move_video(): Move the downloaded video to the final directory.
17
+ yt_dlp_downloader(url, ydl_opts={}, download=True): Download video information using YouTube-DL.
18
+ progress_callback(d): Callback function to monitor download progress.
19
+ download(): Download video(s) based on the provided URL(s).
20
+ monitor(): Monitor the download progress.
21
+ start(): Start the download and monitoring threads.
22
+
23
+ Note:
24
+ - The VideoDownloader class uses YouTube-DL to download videos.
25
+ - It allows downloading from multiple URLs.
26
+ - You need to have YouTube-DL installed to use this class.
27
+ """
28
+ def __init__(self, link,temp_directory=None,video_directory=None,remove_existing=True):
29
+ if video_directory==None:
30
+ video_directory=os.path.join(os.getcwd(),'videos')
31
+ if temp_directory == None:
32
+ temp_directory=os.path.join(video_directory,'temp_files')
33
+ self.thread_manager = ThreadManager()
34
+ self.pause_event = self.thread_manager.add_thread('pause_event')
35
+ self.link = link
36
+ self.temp_directory = temp_directory
37
+ self.video_directory = video_directory
38
+ self.remove_existing=remove_existing
39
+ self.video_urls=self.link if isinstance(self.link,list) else [self.link]
40
+ self.starttime = None
41
+ self.downloaded = 0
42
+ self.time_interval=60
43
+ self.monitoring=True
44
+ self.temp_file_name = None
45
+ self.file_name = None
46
+ self.dl_speed = None
47
+ self.dl_eta=None
48
+ self.total_bytes_est=None
49
+ self.percent_speed=None
50
+ self.percent=None
51
+ self.speed_track = []
52
+ self.video_url=None
53
+ self.last_checked = get_time_stamp()
54
+ self.num=0
55
+ self.start()
56
+ def count_outliers(self,speed,threshold):
57
+ if speed < threshold:
58
+ self.outlier_count+=1
59
+ else:
60
+ self.outlier_count=0
61
+ def filter_outliers(self,speeds):
62
+ # Step 1: Compute initial average
63
+ initial_avg = sum(speeds) / len(speeds)
64
+
65
+ # Step 2: Remove speeds 25% under the average
66
+ threshold = initial_avg * 0.75 # 25% under average
67
+ filtered_speeds = [speed for speed in speeds if speed >= threshold]
68
+
69
+ # Step 3: Compute the new average of the filtered list
70
+ if filtered_speeds: # Ensure the list is not empty
71
+ self.count_outliers(speeds[-1],threshold)
72
+ return filtered_speeds
73
+ else:
74
+ # This can happen if all values are outliers, it's up to you how to handle it
75
+ self.outlier_count=0
76
+ return speeds
77
+ def remove_temps(self,file_name):
78
+ for temp_vid in os.listdir(self.temp_directory):
79
+ if len(file_name)<=len(temp_vid):
80
+ if temp_vid[:len(file_name)] == file_name:
81
+ os.remove(os.path.join(self.temp_directory,temp_vid))
82
+ print(f"removing {temp_vid} from {self.temp_directory}")
83
+ def move_video(self):
84
+ if os.path.exists(self.temp_file_path):
85
+ shutil.move(self.temp_file_path, self.video_directory)
86
+ print(f"moving {self.file_name} from {self.temp_directory} to {self.video_directory}")
87
+ self.remove_temps(self.file_name)
88
+ return True
89
+ if os.path.exists(self.complete_file_path):
90
+ print(f"{self.file_name} already existed in {self.video_directory}; removing it from {self.temp_directory}")
91
+ self.remove_temps(self.file_name)
92
+ return True
93
+ return False
94
+ def yt_dlp_downloader(self,url,ydl_opts={},download=True):
95
+ try:
96
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
97
+ self.info_dict=ydl.extract_info(url=url, download=download)
98
+ return True
99
+ except:
100
+ return False
101
+ def progress_callback(self, d):
102
+ self.status_dict = d
103
+ keys = ['status',
104
+ 'downloaded_bytes',
105
+ 'fragment_index',
106
+ 'fragment_count',
107
+ 'filename',
108
+ 'tmpfilename',
109
+ 'max_progress',
110
+ 'progress_idx',
111
+ 'elapsed',
112
+ 'total_bytes_estimate',
113
+ 'speed',
114
+ 'eta',
115
+ '_eta_str',
116
+ '_speed_str',
117
+ '_percent_str',
118
+ '_total_bytes_str',
119
+ '_total_bytes_estimate_str',
120
+ '_downloaded_bytes_str',
121
+ '_elapsed_str',
122
+ '_default_template']
123
+ if self.status_dict['status'] == 'finished':
124
+ print("Done downloading, moving video to final directory...")
125
+ self.move_video()
126
+ return
127
+ if get_time_stamp()-self.last_checked>5:
128
+ print(self.status_dict['_default_template'])
129
+ self.last_checked = get_time_stamp()
130
+ if (get_time_stamp()-self.start_time/5)>6:
131
+ self.speed_track.append(self.status_dict['speed'])
132
+ self.speed_track=self.filter_outliers(self.speed_track)
133
+
134
+ def download(self):
135
+ if not os.path.exists(self.video_directory):
136
+ os.makedirs(self.video_directory,exist_ok=True)
137
+ if not os.path.exists(self.temp_directory):
138
+ os.makedirs(self.temp_directory,exist_ok=True)
139
+ for self.num,video_url in enumerate(self.video_urls):
140
+ if video_url != self.video_url or self.video_url == None:
141
+ self.video_url=video_url
142
+ self.info_dict=None
143
+ result = self.yt_dlp_downloader(url=self.video_url,ydl_opts={'quiet': True, 'no_warnings': True},download=False)
144
+ if self.info_dict != None and result:
145
+ self.start_time = get_time_stamp()
146
+ self.downloaded = 0
147
+ self.video_title = self.info_dict.get('title', None)
148
+ self.video_ext = self.info_dict.get('ext', 'mp4')
149
+ self.file_name =f"{self.video_title}.{self.video_ext}"
150
+ self.temp_file_path = os.path.join(self.temp_directory, self.file_name)
151
+ self.complete_file_path = os.path.join(self.video_directory, self.file_name)
152
+ if not self.move_video():
153
+ self.dl_speed = []
154
+ self.percent=None
155
+ self.dl_eta=None
156
+ self.total_bytes_est=None
157
+ self.percent_speed=None
158
+ self.speed_track = []
159
+ self.outlier_count=0
160
+ ydl_opts = {
161
+ 'outtmpl': self.temp_file_path,
162
+ 'noprogress':True,
163
+ 'progress_hooks': [self.progress_callback]
164
+ }
165
+
166
+
167
+ print("Starting download...") # Check if this point in code is reached
168
+ result = self.yt_dlp_downloader(url=self.video_url,ydl_opts=ydl_opts,download=True)
169
+ if result:
170
+ print("Download finished!") # Check if download completes
171
+ else:
172
+ print(f'error downloding {self.video_url}')
173
+ self.move_video()
174
+ else:
175
+ print(f"The video from {self.video_url} already exists in the directory {self.video_directory}. Skipping download.")
176
+ else:
177
+ print(f"could not find video info from {self.video_url} Skipping download.")
178
+ if self.num==len(self.video_urls)-1:
179
+ self.monitoring=False
180
+ self.time_interval=0
181
+
182
+ def monitor(self):
183
+ while self.monitoring:
184
+ self.thread_manager.wait(name='pause_event',n=self.time_interval)# check every minute
185
+ if self.monitoring:
186
+ if 'eta' in self.status_dict:
187
+ if self.outlier_count>=3 and (self.status_dict['eta']/60)>10:
188
+ self.start()
189
+
190
+ def start(self):
191
+ download_thread = self.thread_manager.add_thread(name='download_thread',target=self.download)
192
+ monitor_thread = self.thread_manager.add_thread(name='monitor_thread',target_function=self.monitor)
193
+ self.thread_manager.start(name='download_thread')
194
+ self.thread_manager.start(name='monitor_thread')
195
+ self.thread_manager.join(name='download_thread')
196
+ self.thread_manager.join(name='monitor_thread')
197
+ class VideoDownloaderSingleton():
198
+ _instance = None
199
+ @staticmethod
200
+ def get_instance(url_manager,request_manager,title=None,video_extention='mp4',download_directory=os.getcwd(),user_agent=None,download=True,get_info=False):
201
+ if VideoDownloaderSingleton._instance is None:
202
+ VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
203
+ elif VideoDownloaderSingleton._instance.title != title or video_extention != VideoDownloaderSingleton._instance.video_extention or url != VideoDownloaderSingleton._instance.url or download_directory != VideoDownloaderSingleton._instance.download_directory or user_agent != VideoDownloaderSingleton._instance.user_agent:
204
+ VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
205
+ return VideoDownloaderSingleton._instance
@@ -1,7 +1,7 @@
1
1
  from abstract_gui import make_component,sg
2
2
  import inspect
3
3
  import re
4
- from . import UserAgentManager,UrlManager,SafeRequest,SoupManager,LinkManager,CipherManager,requests,ssl,BeautifulSoup,HTTPAdapter,PoolManager,ssl_
4
+ from .managers import *
5
5
  window = None
6
6
 
7
7
  def get_attrs(values):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
- Name: abstract-webtools
3
- Version: 0.1.5.81
2
+ Name: abstract_webtools
3
+ Version: 0.1.5.83
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -13,10 +13,10 @@ Classifier: Programming Language :: Python :: 3.11
13
13
  Requires-Python: >=3.6
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
- Requires-Dist: PySimpleGUI (>=4.60.5)
17
- Requires-Dist: abstract-utilities (>=0.2.2.30)
18
- Requires-Dist: requests (>=2.31.0)
19
- Requires-Dist: urllib3 (>=2.0.4)
16
+ Requires-Dist: abstract-utilities >=0.2.2.30
17
+ Requires-Dist: PySimpleGUI >=4.60.5
18
+ Requires-Dist: urllib3 >=2.0.4
19
+ Requires-Dist: requests >=2.31.0
20
20
 
21
21
  # Abstract WebTools
22
22
  Provides utilities for inspecting and parsing web content, including React components and URL utilities, with enhanced capabilities for managing HTTP requests and TLS configurations.
@@ -0,0 +1,28 @@
1
+ abstract_webtools/__init__.py,sha256=rPt-BXa6ksplRXmZoJtn-vMGEPZGpEwfxqqOqZo_EWk,68
2
+ abstract_webtools/abstract_webtools.py,sha256=6pYoObMhvOnjLiw1oQaNBoX3ipr7QUJYve0YEjoXvC8,3813
3
+ abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
4
+ abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
5
+ abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
6
+ abstract_webtools/url_grabber.py,sha256=AJN8F6TQ_Kwv3sd8YWXffmC4EHRRrKizuJY1uEXNMlM,10752
7
+ abstract_webtools/url_grabber_new.py,sha256=Oh2Kc0gBScCo0xpopNsg8JE5lIbPuzZVKM5f5GoZmw0,3454
8
+ abstract_webtools/managers/__init__.py,sha256=5aIpbdUsDWTrhPUAjfIKnG54OULqOKan9LBL5EIUllo,407
9
+ abstract_webtools/managers/cipherManager.py,sha256=NHQGdR11eNSm-1H-GezD5dyQgsPTJwY5kczt8Sher2s,1621
10
+ abstract_webtools/managers/crawlManager.py,sha256=8ico7qFM8sXluIx3Rzgr5TZMmwsb_bnx2eYoS2Vss6I,7661
11
+ abstract_webtools/managers/domainManager.py,sha256=dluzexs4GdI248kXA7sRaRUm020_dGO86ZLo20xLWOI,1788
12
+ abstract_webtools/managers/dynamicRateLimiter.py,sha256=gopQcQo50JG2D0KcyepNCIQ_1uDQEBIHBzWf4R2Wgy0,7617
13
+ abstract_webtools/managers/linkManager.py,sha256=m6y9s8jknrTX8RtOAFKeHd4yd23G7Rgf0T7Sp7wmHUw,12180
14
+ abstract_webtools/managers/mySocketClient.py,sha256=-j1Q8Ds9RCSbjZdx3ZF9mVpgwxaO0BBssanUcpYVQoY,2045
15
+ abstract_webtools/managers/networkManager.py,sha256=Op2QDXrP-gmm0tCToe-Ryt9xuOtMppcN2KLKP1WZiu0,952
16
+ abstract_webtools/managers/requestManager.py,sha256=ZAood3uhs6mi--R8RLTtcNFxACKhu-Jf3m-u4fFMY2Q,17029
17
+ abstract_webtools/managers/seleniumManager.py,sha256=toG0aM72UO1LMicXGSVm3PCf88yQLJqLpFEMNT9gLU0,3014
18
+ abstract_webtools/managers/soupManager.py,sha256=03qfg4Ww6T3F9HP0F2RoqxZ-jBFhnwE-lS7m3EOcRCI,14717
19
+ abstract_webtools/managers/sslManager.py,sha256=C-QgQw9CW84uOE5kx2MPjC3RsLbE2JQqdwdTs0H4ecc,1370
20
+ abstract_webtools/managers/tlsAdapter.py,sha256=XZSMZz9EUOhv-h3_Waf6mjV1dA3oN_M_oWuoo4VZ_HE,1454
21
+ abstract_webtools/managers/urlManager.py,sha256=OtEBV26RHHuxtbYLLX56AlMv83s9kOIcsDvlMZ9bKZc,8647
22
+ abstract_webtools/managers/userAgentManager.py,sha256=33SB2p2FG7EYZl7l2iYm1U4gI9PcdkGTZHw5lg_Ogrw,1653
23
+ abstract_webtools/managers/videoDownloader.py,sha256=6G_aLc05BTMUYUWc7iqYtHF_BaR7DnCNK_NJ-QnjsYY,10531
24
+ abstract_webtools-0.1.5.83.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
25
+ abstract_webtools-0.1.5.83.dist-info/METADATA,sha256=HDnvNqx5Ho0uLgBRSBWh_4TeV5E6tGyOLtSH1841TTw,15858
26
+ abstract_webtools-0.1.5.83.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
27
+ abstract_webtools-0.1.5.83.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
28
+ abstract_webtools-0.1.5.83.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- abstract_webtools/__init__.py,sha256=VtajT5HAiewyI3wKcKU-r5OVDDyMjYqLWVvTmPn8ToY,80
2
- abstract_webtools/abstract_webtools.py,sha256=gX-jWctjrzx1IJUg8y8dVzSZYWyX0jve4qS4ZthnL0A,90125
3
- abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
4
- abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
5
- abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
6
- abstract_webtools/url_grabber.py,sha256=W8nXAu-b3Ywn0ihmN78X1D6vjvMwfgX0Ry-FP5YDT4U,10876
7
- abstract_webtools/url_grabber_new.py,sha256=Oh2Kc0gBScCo0xpopNsg8JE5lIbPuzZVKM5f5GoZmw0,3454
8
- abstract_webtools-0.1.5.81.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
9
- abstract_webtools-0.1.5.81.dist-info/METADATA,sha256=Fd-vyebRGojkrOjc6udWELuyDR3gbrD_6Weund2w_nQ,15866
10
- abstract_webtools-0.1.5.81.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
11
- abstract_webtools-0.1.5.81.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
12
- abstract_webtools-0.1.5.81.dist-info/RECORD,,