abstract-webtools 0.1.4.41__py3-none-any.whl → 0.1.4.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,14 +71,13 @@ import requests
71
71
  import os
72
72
  # Google Chrome Driver
73
73
  from selenium import webdriver
74
- import yt_dlp
75
74
  import ssl
76
75
  import re
76
+ import yt_dlp
77
77
  import threading
78
78
  import requests
79
- import time
80
79
  from requests.adapters import HTTPAdapter
81
- from typing import Optional, List
80
+ from typing import Optional, List,Union
82
81
  from requests.packages.urllib3.poolmanager import PoolManager
83
82
  from requests.packages.urllib3.util import ssl_
84
83
  from urllib.parse import urlparse
@@ -220,6 +219,7 @@ class DynamicRateLimiterManagerSingleton:
220
219
  DynamicRateLimiterManagerSingleton._instance = DynamicRateLimiterManager(service_name=service_name, low_limit=low_limit, high_limit=limit_epoch, limit_epoch=60,starting_tokens=starting_tokens,epoch_cycle_adjustment=epoch_cycle_adjustment)
221
220
  return DynamicRateLimiterManagerSingleton._instance
222
221
 
222
+
223
223
  class CipherManager:
224
224
  @staticmethod
225
225
  def get_default_ciphers()-> list:
@@ -259,124 +259,15 @@ class CipherManagerSingleton:
259
259
  CipherManagerSingleton._instance = CipherManager(cipher_list=cipher_list)
260
260
  return CipherManagerSingleton._instance
261
261
  class SSLManager:
262
- @staticmethod
263
- def get_default_certification():
264
- return ssl.CERT_REQUIRED
265
-
266
- @staticmethod
267
- def get_default_tls_options():
268
- return ["OP_NO_TLSv1", "OP_NO_TLSv1_1", "OP_NO_COMPRESSION"]
269
-
270
- @staticmethod
271
- def get_all_tls_options() -> int:
272
- """
273
- Returns the SSL options to be used when creating the SSL context.
274
- [
275
- ssl.OP_SINGLE_ECDH_USE,
276
- ssl.OP_SINGLE_DH_USE,
277
- ssl.OP_NO_TLSv1_3,
278
- ssl.OP_NO_TLSv1_2,
279
- ssl.OP_NO_TLSv1_1,
280
- ssl.OP_NO_TLSv1,
281
- ssl.OP_NO_TICKET,
282
- ssl.OP_NO_RENEGOTIATION,
283
- ssl.OP_NO_QUERY_MTU,
284
- ssl.OP_NO_COMPRESSION,
285
- ssl.OP_CIPHER_SERVER_PREFERENCE,
286
- ssl.OP_ALLOW_NO_DHE_KEX,
287
- ssl.OP_ALL
288
- ]
289
- The `ssl` module in the Python standard library provides several constants that you can use to set various SSL options. Here are the available options as of Python 3.9:
290
-
291
- 1. `ssl.OP_ALL`:
292
- - Enables a collection of various bug workaround options.
293
-
294
- 2. `ssl.OP_ALLOW_NO_DHE_KEX`:
295
- - Allow a non-(EC)DHE handshake on a server socket if no suitable security level can be reached.
296
-
297
- 3. `ssl.OP_CIPHER_SERVER_PREFERENCE`:
298
- - Uses the server's cipher ordering preference rather than the client's.
299
-
300
- 4. `ssl.OP_NO_COMPRESSION`:
301
- - Prevents using SSL/TLS compression to avoid CRIME attacks.
302
-
303
- 5. `ssl.OP_NO_QUERY_MTU`:
304
- - Disables automatic querying of kernel for MTU.
305
-
306
- 6. `ssl.OP_NO_RENEGOTIATION`:
307
- - Disallows all renegotiation.
262
+ def __init__(self, ciphers=None, ssl_options=None, certification=None):
263
+ self.ciphers = ciphers or "ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA:ECDHE-ECDSA-AES256-SHA:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-SHA256:AES256-SHA:AES128-SHA"
264
+ self.ssl_options = ssl_options or ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 | ssl.OP_NO_COMPRESSION
265
+ self.certification = certification or ssl.CERT_REQUIRED
266
+ self.ssl_context = self.get_context()
308
267
 
309
- 7. `ssl.OP_NO_TICKET`:
310
- - Disables use of RFC 5077 session tickets.
268
+ def get_context(self):
269
+ return ssl_.create_urllib3_context(ciphers=self.ciphers, cert_reqs=self.certification, options=self.ssl_options)
311
270
 
312
- 8. `ssl.OP_NO_TLSv1`:
313
- - Prevents the use of TLSv1.
314
-
315
- 9. `ssl.OP_NO_TLSv1_1`:
316
- - Prevents the use of TLSv1.1.
317
-
318
- 10. `ssl.OP_NO_TLSv1_2`:
319
- - Prevents the use of TLSv1.2.
320
-
321
- 11. `ssl.OP_NO_TLSv1_3`:
322
- - Prevents the use of TLSv1.3.
323
-
324
- 12. `ssl.OP_SINGLE_DH_USE`:
325
- - Always create a new key when using temporary/ephemeral DH parameters. This option provides forward secrecy.
326
-
327
- 13. `ssl.OP_SINGLE_ECDH_USE`:
328
- - Always create a new key when using temporary/ephemeral ECDH parameters. This option provides forward secrecy.
329
-
330
- These constants can be combined using the bitwise OR (`|`) operator to set multiple options. For example, to prevent the use of TLSv1 and TLSv1.1, you would use:
331
- Please note that the availability of some options might vary depending on the version of OpenSSL that Python's `ssl` module is linked against and the version of Python itself. You can always check the Python documentation specific to your version to get the most accurate and updated list.
332
-
333
- Returns:
334
- int: The SSL options.
335
-
336
- """
337
- return [
338
- "OP_SINGLE_ECDH_USE",
339
- "OP_SINGLE_DH_USE",
340
- "OP_NO_TLSv1_3",
341
- "OP_NO_TLSv1_2",
342
- "OP_NO_TLSv1_1",
343
- "OP_NO_TLSv1",
344
- "OP_NO_TICKET",
345
- "OP_NO_RENEGOTIATION",
346
- "OP_NO_QUERY_MTU",
347
- "OP_NO_COMPRESSION",
348
- "OP_CIPHER_SERVER_PREFERENCE",
349
- "OP_ALLOW_NO_DHE_KEX",
350
- "OP_ALL"
351
- ]
352
-
353
- @staticmethod
354
- def get_context(ciphers=None, options=None, cert_reqs=None):
355
-
356
- return ssl_.create_urllib3_context(ciphers=ciphers, cert_reqs=cert_reqs, options=options)
357
-
358
- def __init__(self, ciphers=None, ssl_options_list=None, certification=None):
359
- self.ssl_options_list = ssl_options_list
360
- self.create_list()
361
- self.ssl_options_values = self.get_options_values()
362
- self.ssl_options = self.combine_ssl_options()
363
- self.certification = certification or self.get_default_certification()
364
- self.cipher_manager = CipherManagerSingleton().get_instance(cipher_list=ciphers)
365
- self.ssl_context = self.get_context(ciphers=self.cipher_manager.ciphers_string, options=self.ssl_options, cert_reqs=self.certification)
366
- def create_list(self):
367
- if self.ssl_options_list == None:
368
- self.ssl_options_list= []
369
- elif isinstance(self.ssl_options_list, str):
370
- self.ssl_options_list=self.ssl_options_list.split(',')
371
- if isinstance(self.ssl_options_list, str):
372
- self.ssl_options_list=[self.ssl_options_list]
373
- def get_options_values(self):
374
- return [getattr(ssl, option_name) for option_name in self.ssl_options_list]
375
- def combine_ssl_options(self):
376
- combined_options = 0
377
- for option in self.ssl_options_values:
378
- combined_options |= option
379
- return combined_options
380
271
  class SSLManagerSingleton:
381
272
  _instance = None
382
273
  @staticmethod
@@ -387,60 +278,49 @@ class SSLManagerSingleton:
387
278
  SSLManagerSingleton._instance = SSLManager(ciphers=ciphers, ssl_options_list=ssl_options_list, certification=certification)
388
279
  return SSLManagerSingleton._instance
389
280
  class TLSAdapter(HTTPAdapter):
390
- def __init__(self, ciphers: Optional[List[str]] = None, certification: Optional[str] = None, ssl_options: Optional[List[str]] = None):
391
- super().__init__()
392
- self.ciphers = ciphers
393
- self.certification = certification
394
- self.ssl_options = ssl_options
395
-
396
- self.cipher_manager = CipherManagerSingleton.get_instance(cipher_list=self.ciphers)
397
- self.ssl_manager = SSLManagerSingleton.get_instance(
398
- ciphers=self.cipher_manager.ciphers_string,
399
- ssl_options_list=ssl_options,
400
- certification=certification
401
- )
281
+ def __init__(self, ssl_manager=None,ciphers=None, certification: Optional[str] = None, ssl_options: Optional[List[str]] = None):
282
+ if ssl_manager == None:
283
+ ssl_manager = SSLManager(ciphers=ciphers, ssl_options=ssl_options, certification=certification)
284
+ self.ssl_manager = ssl_manager
285
+ self.ciphers = ssl_manager.ciphers
286
+ self.certification = ssl_manager.certification
287
+ self.ssl_options = ssl_manager.ssl_options
402
288
  self.ssl_context = self.ssl_manager.ssl_context
289
+ super().__init__()
403
290
 
404
291
  def init_poolmanager(self, *args, **kwargs):
292
+ kwargs['ssl_context'] = self.ssl_context
405
293
  return super().init_poolmanager(*args, **kwargs)
406
-
407
-
408
294
  class TLSAdapterSingleton:
409
- _instance = None
295
+ _instance: Optional[TLSAdapter] = None
296
+
410
297
  @staticmethod
411
- def get_instance(ciphers=None, certification=None, ssl_options=None):
412
- if TLSAdapterSingleton._instance is None:
413
- TLSAdapterSingleton._instance = TLSAdapter(ciphers=ciphers, certification=certification, ssl_options=ssl_options)
414
- elif TLSAdapterSingleton._instance.ciphers != ciphers or SSLManagerSingleton._instance.certification !=certification or SSLManagerSingleton._instance.ssl_options_list !=ssl_options:
298
+ def get_instance(ciphers: Optional[List[str]] = None, certification: Optional[str] = None, ssl_options: Optional[List[str]] = None) -> TLSAdapter:
299
+ if (not TLSAdapterSingleton._instance) or (
300
+ TLSAdapterSingleton._instance.ciphers != ciphers or
301
+ TLSAdapterSingleton._instance.certification != certification or
302
+ TLSAdapterSingleton._instance.ssl_options != ssl_options
303
+ ):
415
304
  TLSAdapterSingleton._instance = TLSAdapter(ciphers=ciphers, certification=certification, ssl_options=ssl_options)
416
305
  return TLSAdapterSingleton._instance
417
306
  class UserAgentManager:
307
+ def __init__(self, user_agent=None):
308
+ if user_agent == None:
309
+ user_agent = self.desktop_user_agents()[0]
310
+ self.user_agent = user_agent
311
+ self.user_agent=self.get_user_agent(self.user_agent)
418
312
  @staticmethod
419
-
420
313
  def desktop_user_agents() -> list:
421
- """
422
- Returns a list of popular desktop user-agent strings for various browsers.
423
-
424
- Returns:
425
- list: A list of desktop user-agent strings.
426
- """
427
- return ['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36','Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:87.0) Gecko/20100101 Firefox/87.0','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1 Safari/605.1.15','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59','Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko','Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14']
428
-
314
+ return ['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36','Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:87.0) Gecko/20100101 Firefox/87.0','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1 Safari/605.1.15','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59','Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko','Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14']
429
315
  @staticmethod
430
- def get_user_agent(user_agent:str=desktop_user_agents()[0]) -> dict:
431
- """
432
- Returns the user-agent header dictionary with the specified user-agent.
433
-
434
- Args:
435
- user_agent (str, optional): The user-agent string to be used. Defaults to the first user-agent in the list.
436
-
437
- Returns:
438
- dict: A dictionary containing the 'user-agent' header.
439
- """
440
- return {"user-agent": user_agent}
441
- def __init__(self,user_agent=desktop_user_agents()[0]):
442
- self.user_agent = user_agent
443
- self.get_user_agent(user_agent=user_agent)
316
+ def big_user_agent_list(n=0):
317
+ from .big_user_agent_list import big_user_agent_list
318
+ return big_user_agent_list[n]
319
+ @staticmethod
320
+ def get_user_agent(user_agent: str = desktop_user_agents()[0]) -> dict:
321
+ if isinstance(user_agent,dict):
322
+ return user_agent
323
+ return {"user-agent": user_agent}
444
324
  class UserAgentManagerSingleton:
445
325
  _instance = None
446
326
  @staticmethod
@@ -450,215 +330,20 @@ class UserAgentManagerSingleton:
450
330
  elif UserAgentManagerSingleton._instance.user_agent != user_agent:
451
331
  UserAgentManagerSingleton._instance = UserAgentManager(user_agent=user_agent)
452
332
  return UserAgentManagerSingleton._instance
453
- def safe_json_loads(data):
454
- try:
455
- return json.loads(data)
456
- except json.JSONDecodeError:
457
- return None
458
- def convert_to_json(obj):
459
- if isinstance(obj, dict):
460
- return obj
461
- if isinstance(obj, str):
462
- return safe_json_loads(obj)
463
- return None
464
- class SafeRequest:
465
- def __init__(self,
466
- url_manager,
467
- session=None,
468
- headers=None,
469
- cookies=None,
470
- adapter=None,
471
- protocol=None,
472
- proxies=None,
473
- auth=None,
474
- stream=False,
475
- last_request_time=None,
476
- max_retries=None,
477
- request_wait_limit=None):
478
- self.url_manager=url_manager
479
- self.session=session or requests.Session()
480
- self.headers = headers or {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'}
481
- self.cookies=cookies or "cb4c883efc59d0e990caf7508902591f4569e7bf-1617321078-0-150"
482
- self.adapter=adapter or TLSAdapterSingleton().get_instance()
483
- self.protocol=protocol or 'https://'
484
- self.proxies=None or {}
485
- self.auth=auth
486
- self.stream=stream if isinstance(stream,bool) else False
487
- self.last_request_time=last_request_time
488
- self.max_retries = max_retries or 3
489
- self.request_wait_limit = request_wait_limit or 1.5
490
- self.session = self.initialize_session()
491
- self.make_request()
492
- self.source_code = None
493
- self.source_code_bytes=None
494
- self.source_code_json = {}
495
- self.react_source_code=[]
496
- self.get_response()
497
- def initialize_session(self):
498
- s = self.session
499
- s.proxies=self.proxies
500
- s.auth=self.auth
501
- s.cookies["cf_clearance"] = self.cookies
502
- s.headers.update(self.headers)
503
- # Add any other headers or cookie settings here
504
- s.mount(self.protocol, self.adapter)
505
- return s
506
- def get_response(self):
507
- if self.request:
508
- self.source_code = self.request.text
509
- self.source_code_bytes=self.request.content
510
- self.source_code_json = {}
511
- if self.request.headers.get('content-type') == 'application/json':
512
- data = convert_to_json(self.source_code)
513
- if data:
514
- self.source_code_json=data.get("response", data)
515
- self.get_react_source_code()
516
- def get_react_source_code(self) -> list:
517
- """
518
- Fetches the source code of the specified URL and extracts JavaScript and JSX source code (React components).
519
-
520
- Args:
521
- url (str): The URL to fetch the source code from.
522
-
523
- Returns:
524
- list: A list of strings containing JavaScript and JSX source code found in <script> tags.
525
- """
526
- if self.url_manager.correct_url is None:
527
- return []
528
- soup = BeautifulSoup(self.source_code_bytes,"html.parser")
529
- script_tags = soup.find_all('script', type=lambda t: t and ('javascript' in t or 'jsx' in t))
530
- for script_tag in script_tags:
531
- self.react_source_code.append(script_tag.string)
532
-
533
-
534
- def get_status(url:str=None) -> int:
535
- """
536
- Gets the HTTP status code of the given URL.
537
-
538
- Args:
539
- url (str): The URL to check the status of.
540
-
541
- Returns:
542
- int: The HTTP status code of the URL, or None if the request fails.
543
- """
544
- # Get the status code of the URL
545
- return try_request(url=url).status_code
546
- def wait_between_requests(self):
547
- """
548
- Wait between requests based on the request_wait_limit.
549
- """
550
- if self.last_request_time:
551
- sleep_time = self.request_wait_limit - (get_time_stamp() - self.last_request_time)
552
- if sleep_time > 0:
553
- logging.info(f"Sleeping for {sleep_time:.2f} seconds.")
554
- get_sleep(sleep_time)
555
-
556
- def make_request(self):
557
- """
558
- Make a request and handle potential errors.
559
- """
560
- # Update the instance attributes if they are passed
561
-
562
- self.wait_between_requests()
563
- for _ in range(self.max_retries):
564
- try:
565
- self.try_request() # 10 seconds timeout
566
- if self.request:
567
- if self.request.status_code == 200:
568
- self.last_request_time = get_time_stamp()
569
- return self.request
570
- elif self.request.status_code == 429:
571
- logging.warning(f"Rate limited by {self.url_manager.correct_url}. Retrying...")
572
- get_sleep(5) # adjust this based on the server's rate limit reset time
573
- except requests.Timeout as e:
574
- logging.error(f"Request to {cleaned_url} timed out: {e}")
575
- except requests.ConnectionError:
576
- logging.error(f"Connection error for URL {self.url_manager.correct_url}.")
577
- except requests.Timeout:
578
- logging.error(f"Request timeout for URL {self.url_manager.correct_url}.")
579
- except requests.RequestException as e:
580
- logging.error(f"Request exception for URL {self.url_manager.correct_url}: {e}")
581
-
582
- logging.error(f"Failed to retrieve content from {self.url_manager.correct_url} after {self.max_retries} retries.")
583
- return None
584
- def try_request(self,timeout=10) -> (requests.Response or None):
585
- """
586
- Tries to make an HTTP request to the given URL using the provided session.
587
-
588
- Args:
589
- url (str): The URL to make the request to.
590
- session (type(requests.Session), optional): The requests session to use for making HTTP requests.
591
- Defaults to requests.
592
-
593
- Returns:
594
- requests.Response or None: The response object if the request is successful, or None if the request fails.
595
- """
596
- try:
597
- self.request = self.session.get(url=self.url_manager.url, timeout=10)
598
- except requests.exceptions.RequestException as e:
599
- print(e)
600
- return False
601
- def get_limited_request(self,request_url,service_name="default"):
602
- manager = DynamicRateLimiterManagerSingleton.get_instance() # Get the singleton instance
603
- unwanted_response=True
604
- # Check with the rate limiter if we can make a request
605
- while True:
606
- if not manager.request(service_name):
607
- print("Rate limit reached for coin_gecko. Waiting for the next epoch...")
608
- sleep_count_down(manager.services[service_name].get_sleep()["current_sleep"]) # Wait for the limit_epoch duration
609
- # Make the actual request
610
- response = try_request(request_url=request_url)
611
-
612
- # If you get a rate-limit error (usually 429 status code but can vary), adjust the rate limiter
613
- if response.status_code ==429:
614
- print(response.json())
615
- manager.services[service_name].request_tracker(False)
616
- print("Rate limited by coin_gecko. Adjusted limit. Retrying...")
617
- if len(manager.services[service_name].calculate_tokens()["succesful"])<2:
618
- sleep_count_down(manager.services[service_name].limit_epoch) # Wait for the limit_epoch duration
619
- else:
620
- manager.services[service_name].current_limit-=1
621
- sleep_count_down(manager.services[service_name].limit_epoch/len(manager.services[service_name].calculate_tokens()["succesful"])) # Wait for the limit_epoch duration
622
- # Return the data if the request was successful
623
- if response.status_code == 200:
624
- manager.services[service_name].request_tracker(True)
625
- return response.json()
626
- elif response.status_code not in [200,429]:
627
- print(f"Unexpected response: {response.status_code}. Message: {response.text}")
628
- return None
629
- @property
630
- def url(self):
631
- return self.url_manager.url
632
-
633
- @url.setter
634
- def url(self, new_url):
635
- self._url_manager.url = new_url
636
- class SafeRequestSingleton:
637
- _instance = None
638
- @staticmethod
639
- def get_instance(url=None,headers:dict=None,max_retries=3,last_request_time=None,request_wait_limit=1.5):
640
- if SafeRequestSingleton._instance is None:
641
- SafeRequestSingleton._instance = SafeRequest(url,url_manager=URLManagerSingleton,headers=headers,max_retries=max_retries,last_request_time=last_request_time,request_wait_limit=request_wait_limit)
642
- elif SafeRequestSingleton._instance.url != url or SafeRequestSingleton._instance.headers != headers or SafeRequestSingleton._instance.max_retries != max_retries or SafeRequestSingleton._instance.request_wait_limit != request_wait_limit:
643
- SafeRequestSingleton._instance = SafeRequest(url,url_manager=URLManagerSingleton,headers=headers,max_retries=max_retries,last_request_time=last_request_time,request_wait_limit=request_wait_limit)
644
- return SafeRequestSingleton._instance
645
- ## ##
646
- # Usage
647
- ## safe_requester = SafeRequest()
648
- ##
649
- ## url = "example.com" # replace with your URL
650
- #### if safe_requester.is_valid_url(url):
651
- ## response = safe_requester.make_request(url)
652
- ## if response:
653
- ## print(response.text)
654
- ## else:
655
- ## logging.error(f"Invalid URL: {url}")
656
- # Usage 2
657
- ## safe_requester = SafeRequest()
658
- ## source_code = safe_requester.get_source_code('https://www.example.com')
659
- ## if source_code:
660
- ## print(source_code)
661
- ## ##
333
+ class NetworkManager:
334
+ def __init__(self, user_agent_manager=None,ssl_manager=None, tls_adapter=None,user_agent=None,proxies=None,auth=None,cookies=None,ciphers=None, certification: Optional[str] = None, ssl_options: Optional[List[str]] = None):
335
+ if ssl_manager == None:
336
+ ssl_manager = SSLManager(ciphers=ciphers, ssl_options=ssl_options, certification=certification)
337
+ self.ssl_manager=ssl_manager
338
+ if tls_adapter == None:
339
+ tls_adapter=TLSAdapter(ssl_manager=ssl_manager,ciphers=ciphers, certification=certification, ssl_options=ssl_options)
340
+ self.tls_adapter=tls_adapter
341
+ self.ciphers=tls_adapter.ciphers
342
+ self.certification=tls_adapter.certification
343
+ self.ssl_options=tls_adapter.ssl_options
344
+ self.proxies=None or {}
345
+ self.auth=auth
346
+ self.cookies=cookies or "cb4c883efc59d0e990caf7508902591f4569e7bf-1617321078-0-150"
662
347
  class MySocketClient:
663
348
  def __init__(self, ip_address=None, port=None,domain_name=None):
664
349
  self.sock
@@ -705,158 +390,21 @@ class MySocketClient():
705
390
  elif MySocketClientSingleton._instance.ip_address != ip_address or MySocketClientSingleton._instance.port != port or URLManagerSingleton._instance.domain_name != domain_name:
706
391
  MySocketClientSingleton._instance = MySocketClient(ip_address=ip_address,port=port,domain_name=domain_name)
707
392
  return MySocketClient
708
- class VideoDownloader:
709
- def __init__(self, url,title=None,download_directory=os.getcwd(),user_agent=None,video_extention='mp4',download_video=True,get_info=False,auto_file_gen=True,standalone_download=False):
710
- self.url=url
711
- self.monitoring = True
712
- self.pause_event = threading.Event()
713
- self.get_download = download_video
714
- self.get_info = get_info
715
- self.user_agent=user_agent
716
- self.title = title
717
- self.auto_file_gen=auto_file_gen
718
- self.standalone_download=standalone_download
719
- self.video_extention=video_extention
720
- self.download_directory=download_directory
721
- self.video_extention=video_extention
722
- self.header = {}#UserAgentManagerSingleton().get_instance(user_agent=user_agent)
723
- self.base_name = os.path.basename(self.url)
724
- self.file_name,self.ext = os.path.splitext(self.base_name)
725
- self.video_urls = [self.url]
726
- self.fetch_video_urls()
727
- self.info={}
728
- self.starttime = None
729
- self.downloaded = 0
730
- self.video_urls = url if isinstance(url,list) else [url]
731
- self.send_to_dl()
732
-
733
-
734
- def get_request(self,url):
735
- self.request_manager = SafeRequestSingleton.get_instance(url=url)
736
- return self.request_manager
737
- def send_to_dl(self):
738
- if self.standalone_download:
739
- self.standalone_downloader()
740
- else:
741
- self.start()
742
- def get_headers(self,url):
743
- # Send an HTTP GET request to the URL
744
- response = requests.get(url)
745
- # Check if the request was successful (status code 200)
746
- if response.status_code == 200:
747
- # Access and print the response headers
748
- return response.headers
749
- else:
750
- print(f"Failed to retrieve the web page. Status code: {response.status_code}")
751
- def progress_callback(self, stream, chunk, bytes_remaining):
752
- total_size = stream.filesize
753
- self.downloaded = total_size - bytes_remaining
754
- def get_directory_path(directory,name,video_extention):
755
- file_path=os.path.join(directory,name+'.'+video_extention)
756
- i=0
757
- while os.path.exists(file_path) == True:
758
- file_path=os.path.join(directory,name+f'_{i}'+'.'+video_extention)
759
- i+=1
760
- return file_path
761
- def fetch_video_urls(self):
762
- driver = webdriver.Chrome()
763
- driver.get(self.url)
764
- self.page_source = driver.page_source
765
- for each in self.page_source.split('<source ')[1:]:
766
- # NOTE: Make sure to import the `eatAll` function and use it here.
767
- self.video_urls.append(eatInner(each.split('.{self.video_extention}'.replace('..','.'))[0].split('http')[-1],['h','t','t','p','s',':','//','/','s','=',' ','\n','\t',''])+'.mp4')
768
- def download(self):
769
- for video_url in self.video_urls:
770
- ydl_opts = {
771
- 'external_downloader': 'ffmpeg',
772
- 'external_downloader_args': "-ss 00:01:00.00 -to 00:02:00.00",
773
- 'format': 'best',
774
- 'outtmpl': f'{self.download_directory} %(title)s-%(id)s.%(ext)s',
775
- 'noprogress': True
776
- }
777
- self.output_video = ydl_opts['outtmpl']
778
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
779
- self.info = ydl.extract_info(video_url, download=self.get_download)
780
- self.starttime = time.time()
781
- if self.auto_file_gen:
782
- file_path = ydl.prepare_filename(self.info)
783
- if self.get_info == True:
784
- self.info['file_path']=file_path
785
-
786
- if self.get_info == True:
787
- self.monitoring = False
788
- self.pause_event.set()
789
- return self.info
790
- self.monitoring = False
791
- self.pause_event.set()
792
- def monitor(self):
793
- name = self.title or False
794
- while self.monitoring:
795
- print("Monitoring...")
796
- self.pause_event.wait(60) # check every minute
797
- if self.starttime:
798
- elapsed_time = time.time() - self.starttime
799
- if self.downloaded != 0 and elapsed_time !=0:
800
- percent = self.downloaded / (self.downloaded + elapsed_time)
801
- else:
802
- percent = 0
803
- if elapsed_time !=0:
804
- try:
805
- # operations that can cause ZeroDivisionError
806
- percent = self.downloaded / (self.downloaded + elapsed_time)
807
- downloaded_minutes = elapsed_time / 60
808
- estimated_download_time = downloaded_minutes / percent - downloaded_minutes
809
- except ZeroDivisionError:
810
- print("Caught a division by zero!")
811
- continue
812
-
813
- if downloaded_minutes != 0 and (percent - downloaded_minutes) !=0:
814
- estimated_download_time = downloaded_minutes / percent - downloaded_minutes
815
- print(estimated_download_time)
816
- if estimated_download_time >= 1.5:
817
- print("Seems like YouTube is limiting our download speed, restarting the download to mitigate the problem..")
818
- # TODO: Find a way to stop the current download and restart. This may not work efficiently since pytube doesn't expose a cancel download method.
819
- self.start() # Restart the download process
820
- def standalone_downloader(self):
821
- name = self.title or False
822
- for video_url in self.video_urls:
823
- self.request_manager
824
- self.info=True
825
- self.download_video=False
826
- headers = self.get_headers(video_url)
827
- for v in soup.select("video source[src]"):
828
- print("Downloading {}".format(v["src"]))
829
- if not name_chosen:
830
- name = v["src"].split("/")[-1].strip()
831
- file_path = get_directory_path(directory,name,video_extention)
832
- print(f"saving to {file_path}")
833
- with open(get_directory_path(directory,name,video_extention), "wb") as f_out:
834
- f_out.write(requests.get(v["src"].strip(), headers=headers).content)
835
-
836
- def start(self):
837
- self.download_thread = threading.Thread(target=self.download)
838
- self.download_thread.daemon = True
839
- self.monitor_thread = threading.Thread(target=self.monitor)
840
- self.download_thread.start()
841
- self.monitor_thread.start()
842
- self.download_thread.join()
843
- self.monitor_thread.join()
844
- def stop(self):
845
- self.monitoring = False
846
-
847
-
848
-
849
- class VideoDownloaderSingleton():
850
- _instance = None
851
- @staticmethod
852
- def get_instance(url_manager,request_manager,title=None,video_extention='mp4',download_directory=os.getcwd(),user_agent=None,download=True,get_info=False):
853
- if VideoDownloaderSingleton._instance is None:
854
- VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
855
- elif VideoDownloaderSingleton._instance.title != title or video_extention != VideoDownloaderSingleton._instance.video_extention or url != VideoDownloaderSingleton._instance.url or download_directory != VideoDownloaderSingleton._instance.download_directory or user_agent != VideoDownloaderSingleton._instance.user_agent:
856
- VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
857
- return VideoDownloaderSingleton._instance
393
+ def safe_json_loads(data):
394
+ try:
395
+ return json.loads(data)
396
+ except json.JSONDecodeError:
397
+ return None
398
+ def convert_to_json(obj):
399
+ if isinstance(obj, dict):
400
+ return obj
401
+ if isinstance(obj, str):
402
+ return safe_json_loads(obj)
403
+ return None
858
404
  class URLManager:
859
- def __init__(self, url, session=requests):
405
+ def __init__(self, url=None, session=requests):
406
+ if url==None:
407
+ url='www.example.com'
860
408
  self.url = url
861
409
  self.session = session
862
410
 
@@ -980,7 +528,225 @@ class URLManagerSingleton:
980
528
  elif URLManagerSingleton._instance.session != session or URLManagerSingleton._instance.url != url:
981
529
  URLManagerSingleton._instance = URLManager(url,session=session)
982
530
  return URLManagerSingleton._instance
531
+ class SafeRequest:
532
+ def __init__(self,
533
+ url=None,
534
+ url_manager=None,
535
+ network_manager=None,
536
+ user_agent_manager=None,
537
+ ssl_manager=None,
538
+ tls_adapter=None,
539
+ user_agent=None,
540
+ proxies=None,
541
+ headers=None,
542
+ auth=None,
543
+ cookies=None,
544
+ session=None,
545
+ adapter=None,
546
+ protocol=None,
547
+ ciphers=None,
548
+ certification=None,
549
+ ssl_options=None,
550
+ stream=False,
551
+ last_request_time=None,
552
+ max_retries=None,
553
+ request_wait_limit=None):
554
+ if url_manager == None:
555
+ url_manager = URLManager(url=url)
556
+ self.url_manager=url_manager
557
+ if network_manager == None:
558
+ network_manager=NetworkManager(user_agent_manager=user_agent_manager,ssl_manager=ssl_manager, tls_adapter=tls_adapter,user_agent=user_agent,proxies=proxies,auth=auth,cookies=cookies,ciphers=ciphers, certification=certification, ssl_options=ssl_options)
559
+ if user_agent_manager == None:
560
+ user_agent_manager = UserAgentManager(user_agent=user_agent)
561
+ self.user_agent_manager = user_agent_manager
562
+ self.user_agent= self.user_agent_manager.user_agent
563
+ self.network_manager = network_manager
564
+ self.tls_adapter=self.network_manager.tls_adapter
565
+ self.ciphers=self.network_manager.ciphers
566
+ self.certification=self.network_manager.certification
567
+ self.ssl_options=self.network_manager.ssl_options
568
+ self.proxies=self.network_manager.proxies
569
+ self.auth=self.network_manager.auth
570
+ self.cookies=self.network_manager.cookies
571
+ self.session = session or requests.session()
572
+ self.protocol=protocol or 'https://'
573
+ self.headers=headers or self.user_agent or {'Accept': '*/*'}
574
+ self.stream=stream if isinstance(stream,bool) else False
575
+ self.initialize_session()
576
+ self.last_request_time=last_request_time
577
+ self.max_retries = max_retries or 3
578
+ self.request_wait_limit = request_wait_limit or 1.5
579
+ self._response=None
580
+ self.make_request()
581
+ self.source_code = None
582
+ self.source_code_bytes=None
583
+ self.source_code_json = {}
584
+ self.react_source_code=[]
585
+ self._response_data = None
586
+ self.process_response_data()
587
+ @property
588
+ def response(self):
589
+ """Lazy-loading of response."""
590
+ if self._response is None:
591
+ self._response = self.fetch_response()
592
+ return self._response
593
+
594
+ def fetch_response(self) -> Union[requests.Response, None]:
595
+ """Actually fetches the response from the server."""
596
+ # You can further adapt this method to use retries or other logic you had
597
+ # in your original code, but the main goal here is to fetch and return the response
598
+ return self.try_request()
599
+ def initialize_session(self):
600
+ s = self.session
601
+ s.proxies = self.network_manager.proxies # Use the proxies from the NetworkManager
602
+ s.auth = self.network_manager.auth # Use the auth from SafeRequest (if provided)
603
+ # Add any other headers or cookie settings here
604
+ s.cookies["cf_clearance"] = self.network_manager.cookies
605
+ s.headers.update(self.headers)
606
+ s.mount(self.protocol, self.network_manager.tls_adapter) # Use the TLSAdapter from the NetworkManager
607
+ return s
608
+ def process_response_data(self):
609
+ """Processes the fetched response data."""
610
+ if not self.response:
611
+ return # No data to process
612
+
613
+ self.source_code = self.response.text
614
+ self.source_code_bytes = self.response.content
615
+
616
+ if self.response.headers.get('content-type') == 'application/json':
617
+ data = convert_to_json(self.source_code)
618
+ if data:
619
+ self.source_code_json = data.get("response", data)
620
+
621
+ self.get_react_source_code()
622
+ def get_react_source_code(self) -> list:
623
+ """
624
+ Fetches the source code of the specified URL and extracts JavaScript and JSX source code (React components).
625
+
626
+ Args:
627
+ url (str): The URL to fetch the source code from.
983
628
 
629
+ Returns:
630
+ list: A list of strings containing JavaScript and JSX source code found in <script> tags.
631
+ """
632
+ if self.url_manager.correct_url is None:
633
+ return []
634
+ soup = BeautifulSoup(self.source_code_bytes,"html.parser")
635
+ script_tags = soup.find_all('script', type=lambda t: t and ('javascript' in t or 'jsx' in t))
636
+ for script_tag in script_tags:
637
+ self.react_source_code.append(script_tag.string)
638
+
639
+
640
+ def get_status(url:str=None) -> int:
641
+ """
642
+ Gets the HTTP status code of the given URL.
643
+
644
+ Args:
645
+ url (str): The URL to check the status of.
646
+
647
+ Returns:
648
+ int: The HTTP status code of the URL, or None if the request fails.
649
+ """
650
+ # Get the status code of the URL
651
+ return try_request(url=url).status_code
652
+ def wait_between_requests(self):
653
+ """
654
+ Wait between requests based on the request_wait_limit.
655
+ """
656
+ if self.last_request_time:
657
+ sleep_time = self.request_wait_limit - (get_time_stamp() - self.last_request_time)
658
+ if sleep_time > 0:
659
+ logging.info(f"Sleeping for {sleep_time:.2f} seconds.")
660
+ get_sleep(sleep_time)
661
+
662
+ def make_request(self):
663
+ """
664
+ Make a request and handle potential errors.
665
+ """
666
+ # Update the instance attributes if they are passed
667
+
668
+ self.wait_between_requests()
669
+ for _ in range(self.max_retries):
670
+ try:
671
+ self.try_request() # 10 seconds timeout
672
+ if self.response:
673
+ if self.response.status_code == 200:
674
+ self.last_request_time = get_time_stamp()
675
+ return self.response
676
+ elif self.response.status_code == 429:
677
+ logging.warning(f"Rate limited by {self.url_manager.correct_url}. Retrying...")
678
+ get_sleep(5) # adjust this based on the server's rate limit reset time
679
+ except requests.Timeout as e:
680
+ logging.error(f"Request to {cleaned_url} timed out: {e}")
681
+ except requests.ConnectionError:
682
+ logging.error(f"Connection error for URL {self.url_manager.correct_url}.")
683
+ except requests.Timeout:
684
+ logging.error(f"Request timeout for URL {self.url_manager.correct_url}.")
685
+ except requests.RequestException as e:
686
+ logging.error(f"Request exception for URL {self.url_manager.correct_url}: {e}")
687
+
688
+ logging.error(f"Failed to retrieve content from {self.url_manager.correct_url} after {self.max_retries} retries.")
689
+ return None
690
+ def try_request(self, timeout=10) -> Union[requests.Response, None]:
691
+ """
692
+ Tries to make an HTTP request to the given URL using the provided session.
693
+
694
+ Args:
695
+ timeout (int): Timeout for the request.
696
+
697
+ Returns:
698
+ requests.Response or None: The response object if the request is successful, or None if the request fails.
699
+ """
700
+ try:
701
+ return self.session.get(url=self.url_manager.url, timeout=timeout)
702
+ except requests.exceptions.RequestException as e:
703
+ print(e)
704
+ return None
705
+
706
+ def get_limited_request(self,request_url,service_name="default"):
707
+ manager = DynamicRateLimiterManagerSingleton.get_instance() # Get the singleton instance
708
+ unwanted_response=True
709
+ # Check with the rate limiter if we can make a request
710
+ while True:
711
+ if not manager.request(service_name):
712
+ print("Rate limit reached for coin_gecko. Waiting for the next epoch...")
713
+ sleep_count_down(manager.services[service_name].get_sleep()["current_sleep"]) # Wait for the limit_epoch duration
714
+ # Make the actual request
715
+ response = try_request(request_url=request_url)
716
+
717
+ # If you get a rate-limit error (usually 429 status code but can vary), adjust the rate limiter
718
+ if response.status_code ==429:
719
+ print(response.json())
720
+ manager.services[service_name].request_tracker(False)
721
+ print("Rate limited by coin_gecko. Adjusted limit. Retrying...")
722
+ if len(manager.services[service_name].calculate_tokens()["succesful"])<2:
723
+ sleep_count_down(manager.services[service_name].limit_epoch) # Wait for the limit_epoch duration
724
+ else:
725
+ manager.services[service_name].current_limit-=1
726
+ sleep_count_down(manager.services[service_name].limit_epoch/len(manager.services[service_name].calculate_tokens()["succesful"])) # Wait for the limit_epoch duration
727
+ # Return the data if the request was successful
728
+ if response.status_code == 200:
729
+ manager.services[service_name].request_tracker(True)
730
+ return response.json()
731
+ elif response.status_code not in [200,429]:
732
+ print(f"Unexpected response: {response.status_code}. Message: {response.text}")
733
+ return None
734
+ @property
735
+ def url(self):
736
+ return self.url_manager.url
737
+
738
+ @url.setter
739
+ def url(self, new_url):
740
+ self._url_manager.url = new_url
741
+ class SafeRequestSingleton:
742
+ _instance = None
743
+ @staticmethod
744
+ def get_instance(url=None,headers:dict=None,max_retries=3,last_request_time=None,request_wait_limit=1.5):
745
+ if SafeRequestSingleton._instance is None:
746
+ SafeRequestSingleton._instance = SafeRequest(url,url_manager=URLManagerSingleton,headers=headers,max_retries=max_retries,last_request_time=last_request_time,request_wait_limit=request_wait_limit)
747
+ elif SafeRequestSingleton._instance.url != url or SafeRequestSingleton._instance.headers != headers or SafeRequestSingleton._instance.max_retries != max_retries or SafeRequestSingleton._instance.request_wait_limit != request_wait_limit:
748
+ SafeRequestSingleton._instance = SafeRequest(url,url_manager=URLManagerSingleton,headers=headers,max_retries=max_retries,last_request_time=last_request_time,request_wait_limit=request_wait_limit)
749
+ return SafeRequestSingleton._instance
984
750
  class SoupManager:
985
751
  def __init__(self,url_manager,request_manager, parse_type="html.parser"):
986
752
  self.soup=[]
@@ -1185,6 +951,172 @@ class SoupManager:
1185
951
  @url.setter
1186
952
  def url(self, new_url):
1187
953
  self._url = new_url
954
+
955
+ class SoupManagerSingleton():
956
+ _instance = None
957
+ @staticmethod
958
+ def get_instance(url_manager,request_manager,parse_type="html.parser",source_code=None):
959
+ if SoupManagerSingleton._instance is None:
960
+ SoupManagerSingleton._instance = SoupManager(url_manager,request_manager,parse_type=parse_type,source_code=source_code)
961
+ elif parse_type != SoupManagerSingleton._instance.parse_type or source_code != SoupManagerSingleton._instance.source_code:
962
+ SoupManagerSingleton._instance = SoupManager(url_manager,request_manager,parse_type=parse_type,source_code=source_code)
963
+ return SoupManagerSingleton._instance
964
+ class VideoDownloader:
965
+ def __init__(self, url,title=None,download_directory=os.getcwd(),user_agent=None,video_extention='mp4',download_video=True,get_info=False,auto_file_gen=True,standalone_download=False):
966
+ self.url=url
967
+ self.monitoring = True
968
+ self.downloading = True
969
+ self.pause_event = threading.Event()
970
+ self.get_download = download_video
971
+ self.get_info = get_info
972
+ self.user_agent=user_agent
973
+ self.title = title
974
+ self.auto_file_gen=auto_file_gen
975
+ self.standalone_download=standalone_download
976
+ self.video_extention=video_extention
977
+ self.download_directory=download_directory
978
+ self.video_extention=video_extention
979
+ self.header = {}#UserAgentManagerSingleton().get_instance(user_agent=user_agent)
980
+ self.base_name = os.path.basename(self.url)
981
+ self.file_name,self.ext = os.path.splitext(self.base_name)
982
+ self.video_urls = [self.url]
983
+ self.info={}
984
+ self.starttime = None
985
+ self.downloaded = 0
986
+ self.video_urls = url if isinstance(url,list) else [url]
987
+ self.send_to_dl()
988
+
989
+ def get_request(self,url):
990
+ self.request_manager = SafeRequestSingleton.get_instance(url=url)
991
+ return self.request_manager
992
+ def send_to_dl(self):
993
+ if self.standalone_download:
994
+ self.standalone_downloader()
995
+ else:
996
+ self.start()
997
+ def get_headers(self,url):
998
+ # Send an HTTP GET request to the URL
999
+ response = requests.get(url)
1000
+ # Check if the request was successful (status code 200)
1001
+ if response.status_code == 200:
1002
+ # Access and print the response headers
1003
+ return response.headers
1004
+ else:
1005
+ print(f"Failed to retrieve the web page. Status code: {response.status_code}")
1006
+ def progress_callback(self, stream, chunk, bytes_remaining):
1007
+ total_size = stream.filesize
1008
+ self.downloaded = total_size - bytes_remaining
1009
+ def get_directory_path(directory,name,video_extention):
1010
+ file_path=os.path.join(directory,name+'.'+video_extention)
1011
+ i=0
1012
+ while os.path.exists(file_path) == True:
1013
+ file_path=os.path.join(directory,name+f'_{i}'+'.'+video_extention)
1014
+ i+=1
1015
+ return file_path
1016
+ def fetch_video_urls(self):
1017
+ driver = webdriver.Chrome()
1018
+ driver.get(self.url)
1019
+ self.page_source = driver.page_source
1020
+ for each in self.page_source.split('<source ')[1:]:
1021
+ self.video_urls.append(eatInner(each.split('.{self.video_extention}'.replace('..','.'))[0].split('http')[-1],['h','t','t','p','s',':','//','/','s','=',' ','\n','\t',''])+'.mp4')
1022
+ def download(self):
1023
+ def downloading_while():
1024
+ while self.downloading:
1025
+ self.pause_event.wait(3)
1026
+ print('downloading...')
1027
+ for video_url in self.video_urls:
1028
+ ydl_opts = {
1029
+ 'external_downloader': 'ffmpeg',
1030
+ 'external_downloader_args': "-ss 00:01:00.00 -to 00:02:00.00",
1031
+ 'format': 'best',
1032
+ 'outtmpl': f'%(title)s-%(id)s.%(ext)s',
1033
+ 'noprogress': True
1034
+ }
1035
+ self.output_video = ydl_opts['outtmpl']
1036
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
1037
+ self.downloading_while = threading.Thread(target=downloading_while)
1038
+ self.downloading_while.start()
1039
+ self.info = ydl.extract_info(video_url, download=self.get_download)
1040
+ self.downloading=False
1041
+ self.starttime = get_time_stamp()
1042
+ if self.auto_file_gen:
1043
+ file_path = ydl.prepare_filename(self.info)
1044
+ if self.get_info == True:
1045
+ self.info['file_path']=file_path
1046
+ if self.get_info == True:
1047
+ self.stop()
1048
+ return self.info
1049
+ self.stop()
1050
+ return self.info
1051
+ def monitor(self):
1052
+ name = self.title or False
1053
+ while self.monitoring:
1054
+ print("Monitoring...")
1055
+ self.pause_event.wait(60) # check every minute
1056
+ if self.starttime:
1057
+ elapsed_time = get_time_stamp() - self.starttime
1058
+ if self.downloaded != 0 and elapsed_time !=0:
1059
+ percent = self.downloaded / (self.downloaded + elapsed_time)
1060
+ else:
1061
+ percent = 0
1062
+ if elapsed_time !=0:
1063
+ try:
1064
+ # operations that can cause ZeroDivisionError
1065
+ percent = self.downloaded / (self.downloaded + elapsed_time)
1066
+ downloaded_minutes = elapsed_time / 60
1067
+ estimated_download_time = downloaded_minutes / percent - downloaded_minutes
1068
+ except ZeroDivisionError:
1069
+ print("Caught a division by zero!")
1070
+ continue
1071
+
1072
+ if downloaded_minutes != 0 and (percent - downloaded_minutes) !=0:
1073
+ estimated_download_time = downloaded_minutes / percent - downloaded_minutes
1074
+ print(estimated_download_time)
1075
+ if estimated_download_time >= 1.5:
1076
+ print("Seems like YouTube is limiting our download speed, restarting the download to mitigate the problem..")
1077
+ # TODO: Find a way to stop the current download and restart. This may not work efficiently since pytube doesn't expose a cancel download method.
1078
+ self.start() # Restart the download process
1079
+ def standalone_downloader(self):
1080
+ name = self.title or False
1081
+ fetch_video_urls()
1082
+ for video_url in self.video_urls:
1083
+ self.request_manager
1084
+ self.info=True
1085
+ self.download_video=False
1086
+ headers = self.get_headers(video_url)
1087
+ for v in soup.select("video source[src]"):
1088
+ print("Downloading {}".format(v["src"]))
1089
+ if not name_chosen:
1090
+ name = v["src"].split("/")[-1].strip()
1091
+ file_path = get_directory_path(directory,name,video_extention)
1092
+ print(f"saving to {file_path}")
1093
+ with open(get_directory_path(directory,name,video_extention), "wb") as f_out:
1094
+ f_out.write(requests.get(v["src"].strip(), headers=headers).content)
1095
+
1096
+ def start(self):
1097
+ self.download_thread = threading.Thread(target=self.download)
1098
+ self.download_thread.daemon = True
1099
+ self.monitor_thread = threading.Thread(target=self.monitor)
1100
+ self.download_thread.start()
1101
+ self.monitor_thread.start()
1102
+ self.download_thread.join()
1103
+ self.monitor_thread.join()
1104
+ def stop(self):
1105
+ self.monitoring = False
1106
+ self.pause_event.set()
1107
+
1108
+
1109
+
1110
+ class VideoDownloaderSingleton():
1111
+ _instance = None
1112
+ @staticmethod
1113
+ def get_instance(url_manager,request_manager,title=None,video_extention='mp4',download_directory=os.getcwd(),user_agent=None,download=True,get_info=False):
1114
+ if VideoDownloaderSingleton._instance is None:
1115
+ VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
1116
+ elif VideoDownloaderSingleton._instance.title != title or video_extention != VideoDownloaderSingleton._instance.video_extention or url != VideoDownloaderSingleton._instance.url or download_directory != VideoDownloaderSingleton._instance.download_directory or user_agent != VideoDownloaderSingleton._instance.user_agent:
1117
+ VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
1118
+ return VideoDownloaderSingleton._instance
1119
+
1188
1120
  class LinkManager:
1189
1121
  def __init__(self,url_manager,soup_manager,img_attr_value_desired=None,img_attr_value_undesired=None,link_attr_value_desired=None,link_attr_value_undesired=None,associated_data_attr=["data-title",'alt','title'],get_img=["data-title",'alt','title']):
1190
1122
  self.url_manager= url_manager
@@ -1251,15 +1183,7 @@ class LinkManager:
1251
1183
  valid_assiciated_attrs[-1]["link"]=valid_attr
1252
1184
  desired_links.append(valid_assiciated_attrs)
1253
1185
  return desired_links
1254
- class SoupManagerSingleton():
1255
- _instance = None
1256
- @staticmethod
1257
- def get_instance(url_manager,request_manager,parse_type="html.parser",source_code=None):
1258
- if SoupManagerSingleton._instance is None:
1259
- SoupManagerSingleton._instance = SoupManager(url_manager,request_manager,parse_type=parse_type,source_code=source_code)
1260
- elif parse_type != SoupManagerSingleton._instance.parse_type or source_code != SoupManagerSingleton._instance.source_code:
1261
- SoupManagerSingleton._instance = SoupManager(url_manager,request_manager,parse_type=parse_type,source_code=source_code)
1262
- return SoupManagerSingleton._instance
1186
+
1263
1187
  def CrawlManager():
1264
1188
  def __init__(self,url=None,source_code=None,parse_type="html.parser"):
1265
1189
  self.url=url
@@ -1447,4 +1371,3 @@ class CrawlManagerSingleton():
1447
1371
  elif parse_type != CrawlManagerSingleton._instance.parse_type or url != CrawlManagerSingleton._instance.url or source_code != CrawlManagerSingleton._instance.source_code:
1448
1372
  CrawlManagerSingleton._instance = CrawlManager(url=url,parse_type=parse_type,source_code=source_code)
1449
1373
  return CrawlManagerSingleton._instance
1450
-