abstract-webtools 0.1.4.24__tar.gz → 0.1.4.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/PKG-INFO +1 -1
  2. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/setup.py +1 -1
  3. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools/abstract_webtools.py +73 -11
  4. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools.egg-info/PKG-INFO +1 -1
  5. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/LICENSE +0 -0
  6. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/README.md +0 -0
  7. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/pyproject.toml +0 -0
  8. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/setup.cfg +0 -0
  9. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools/__init__.py +0 -0
  10. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools/abstract_crawler.py +0 -0
  11. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools/abstract_webtools2.py +0 -0
  12. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools/dfgdsf.py +0 -0
  13. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools/grab_source_gui.py +0 -0
  14. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools/main.py +0 -0
  15. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools/sou.py +0 -0
  16. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools.egg-info/SOURCES.txt +0 -0
  17. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
  18. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools.egg-info/requires.txt +0 -0
  19. {abstract_webtools-0.1.4.24 → abstract_webtools-0.1.4.25}/src/abstract_webtools.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: abstract_webtools
3
- Version: 0.1.4.24
3
+ Version: 0.1.4.25
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
4
4
  long_description = fh.read()
5
5
  setuptools.setup(
6
6
  name='abstract_webtools',
7
- version='0.1.4.24',
7
+ version='0.1.4.25',
8
8
  author='putkoff',
9
9
  author_email='partners@abstractendeavors.com',
10
10
  description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',
@@ -85,6 +85,7 @@ from bs4 import BeautifulSoup
85
85
  import xml.etree.ElementTree as ET
86
86
  from abstract_utilities.time_utils import get_time_stamp,get_sleep,sleep_count_down
87
87
  from abstract_utilities.string_clean import eatInner,eatAll
88
+ import socket
88
89
  logging.basicConfig(level=logging.INFO)
89
90
  class DynamicRateLimiterManager:
90
91
  def __init__(self):
@@ -641,6 +642,50 @@ class SafeRequestSingleton:
641
642
  ## if source_code:
642
643
  ## print(source_code)
643
644
  ## ##
645
+ class MySocketClient:
646
+ def __init__(self, ip_address, port,domain_name):
647
+ self.sock
648
+ self.ip_address= ip_address
649
+ self.port = port
650
+ self.sock.connect((host, port))
651
+ self.domain_name =
652
+ def receive_data(self):
653
+ chunks = []
654
+ while True:
655
+ chunk = self.sock.recv(4096)
656
+ if chunk:
657
+ chunks.append(chunk)
658
+ else:
659
+ break
660
+ return b''.join(chunks).decode('utf-8')
661
+ def _parse_socket_response_as_json(self, data, *args, **kwargs):
662
+ return self._parse_json(data[data.find('{'):data.rfind('}') + 1], *args, **kwargs)
663
+ def process_data(self):
664
+ data = self.receive_data()
665
+ return self._parse_socket_response_as_json(data)
666
+ def _parse_json(self,json_string):
667
+ return json.loads(json_string)
668
+ def get_ip(self,domain=None):
669
+ try:
670
+ return self.sock.gethostbyname(domain_name if domain_name != None else self.domain_name)
671
+ except self.sock.gaierror:
672
+ return None
673
+ def grt_host_name(self,ip_address=None):
674
+ return self.sock.gethostbyaddr(ip_address if ip_address != None else self.ip_address)
675
+ def toggle_sock(self):
676
+ if self.sock != None:
677
+ self.sock.close()
678
+ else:
679
+ self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
680
+ class MySocketClient():
681
+ _instance = None
682
+ @staticmethod
683
+ def get_instance(ip_address='local_host',port=22,domain_name="example.com"):
684
+ if MySocketClientSingleton._instance is None:
685
+ MySocketClientSingleton._instance = MySocketClient(ip_address=ip_address,port=port,domain_name=domain_name)
686
+ elif MySocketClientSingleton._instance.ip_address != ip_address or MySocketClientSingleton._instance.port != port or URLManagerSingleton._instance.domain_name != domain_name:
687
+ MySocketClientSingleton._instance = MySocketClient(ip_address=ip_address,port=port,domain_name=domain_name)
688
+ return MySocketClient
644
689
  class URLManager:
645
690
  def __init__(self,url=None,session=requests):
646
691
  self.url = url
@@ -648,8 +693,15 @@ class URLManager:
648
693
  self.striped_url = None if url == None else self.strip_web()
649
694
  self.clean_urls = None if url == None else self.clean_url(url=self.url)
650
695
  self.correct_url = None if url == None else self.get_correct_url()
651
- self.domain_name = None if url == None else self.get_domain_name(self.correct_url)
696
+ if self.correct_url != None:
697
+ self.url = self.correct_url
698
+ self.protocol=None
699
+ self.domain_name= None if url == None else self.get_domain_name(self.correct_url)
700
+ self.path=None
701
+ self.query=None
702
+ self.strip_web()
652
703
  self.all_urls=[]
704
+
653
705
  def strip_web(self) -> str:
654
706
  """
655
707
  Strip the 'http://' or 'https://' prefix from a URL, if present.
@@ -666,6 +718,13 @@ class URLManager:
666
718
  elif self.url.startswith("https://"):
667
719
  url = self.url.replace("https://", '', 1)
668
720
  return url
721
+ def url_to_pieces(self):
722
+ match = re.match(r'^(https?):\/\/([^\/]+)(\/[^?]+)?(\?.+)?', self.correct_url)
723
+ if match:
724
+ self.protocol = match.group(1)
725
+ self.domain = match.group(2)
726
+ self.path = match.group(3) if match.group(3) else "" # Handle None
727
+ self.query = match.group(4) if match.group(4) else "" # Handle None
669
728
  @staticmethod
670
729
  def clean_url(url: str) -> list:
671
730
  """
@@ -780,17 +839,20 @@ class URLManagerSingleton:
780
839
  return URLManagerSingleton._instance
781
840
 
782
841
  class VideoDownloader:
783
-
784
842
  def __init__(self, url,title=None,download_directory=os.getcwd(),user_agent=None,video_extention='mp4',download=True,get_info=False):
785
843
  self.url = url
844
+ self.download = download
845
+ self.get_info = get_info
846
+ self.user_agent=user_agent
847
+ self.download_directory=download_directory
786
848
  self.video_extention=video_extention
787
849
  self.header = UserAgentManagerSingleton().get_instance(user_agent=user_agent).user_agent_header
788
850
  self.base_name = os.path.basename(self.url)
789
851
  self.file_name,self.ext = os.path.splitext(self.base_name)
790
- self.download_directory=download_directory
791
852
  self.title = url.split('/')[3] if title == None else title
792
- self.video_urls = []
853
+ self.video_urls = [self.url]
793
854
  self.fetch_video_urls()
855
+ self.info={}
794
856
  self.download_videos()
795
857
  def fetch_video_urls(self):
796
858
  driver = webdriver.Chrome()
@@ -803,12 +865,12 @@ class VideoDownloader:
803
865
  for video_url in self.video_urls:
804
866
  ydl_opts = {}
805
867
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
806
- info = ydl.extract_info(self.url,download=download)
807
- if get_info == True:
808
- return info
809
- self.base_name = os.path.basename(info['url'])
868
+ self.info = ydl.extract_info(url=video_url,download=self.download)
869
+ if self.get_info == True:
870
+ return self.info
871
+ self.base_name = os.path.basename(self.info['url'])
810
872
  self.file_name,self.ext = os.path.splitext(self.base_name)
811
- video_content =SafeRequestSingleton().get_instance(url=info['url']).response
873
+ video_content =SafeRequestSingleton().get_instance(url=self.info['url']).response
812
874
  print("Start downloading")
813
875
  content_length = int(video_content.headers['content-length'])
814
876
  print(f'Size: {content_length / 1024 / 1024:.2f}MB')
@@ -824,9 +886,9 @@ class VideoDownloaderSingleton():
824
886
  @staticmethod
825
887
  def get_instance(url,title=None,video_extention='mp4',download_directory=os.getcwd(),user_agent=None,download=True,get_info=False):
826
888
  if VideoDownloaderSingleton._instance is None:
827
- VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info)
889
+ VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
828
890
  elif VideoDownloaderSingleton._instance.title != title or video_extention != VideoDownloaderSingleton._instance.video_extention or url != VideoDownloaderSingleton._instance.url or download_directory != VideoDownloaderSingleton._instance.download_directory or user_agent != VideoDownloaderSingleton._instance.user_agent:
829
- VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info)
891
+ VideoDownloaderSingleton._instance = VideoDownloader(url=url,title=title,video_extention=video_extention,download_directory=download_directory,download=download,get_info=get_info,user_agent=user_agent)
830
892
  return VideoDownloaderSingleton._instance
831
893
  class SoupManager:
832
894
  def __init__(self, url=None, source_code=None, parse_type="html.parser"):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: abstract-webtools
3
- Version: 0.1.4.24
3
+ Version: 0.1.4.25
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff