py2ls 0.2.4.25__py3-none-any.whl → 0.2.4.26__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
py2ls/netfinder.py CHANGED
@@ -626,7 +626,7 @@ def filter_links(links, contains="html", driver="requ", booster=False):
626
626
  )
627
627
  if condition:
628
628
  filtered_links.append(link)
629
- return filtered_links
629
+ return ips.unique(filtered_links)
630
630
 
631
631
 
632
632
  def find_domain(links):
@@ -717,7 +717,7 @@ def downloader(
717
717
  kind=[".pdf"],
718
718
  contains=None,
719
719
  rm_folder=False,
720
- booster=False,
720
+ booster=True,# use find_links
721
721
  verbose=True,
722
722
  timeout=30,
723
723
  n_try=3,
@@ -726,7 +726,7 @@ def downloader(
726
726
 
727
727
  from requests.exceptions import ChunkedEncodingError, ConnectionError
728
728
 
729
- if verbose:
729
+ if verbose and ips.run_once_within():
730
730
  print(
731
731
  "usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)"
732
732
  )
@@ -734,8 +734,11 @@ def downloader(
734
734
  def fname_corrector(fname, ext):
735
735
  if not ext.startswith("."):
736
736
  ext = "." + ext
737
- if not fname.endswith("ext"): # if not ext in fname:
737
+ if not fname.endswith(ext): # if not ext in fname:
738
738
  fname = fname[: -len(ext)] + ext
739
+ if not any(fname[: -len(ext)]):
740
+ from datetime import datetime
741
+ fname = datetime.now().strftime("%H%M%S") + ext
739
742
  return fname
740
743
 
741
744
  def check_and_modify_filename(directory, filename):
@@ -784,8 +787,8 @@ def downloader(
784
787
  kind[i] = "." + kind[i]
785
788
  file_links_all = []
786
789
  for kind_ in kind:
787
- if isinstance(contains, str):
788
- contains = [contains]
790
+ # if isinstance(contains, str):
791
+ # contains = [contains]
789
792
  if isinstance(url, str):
790
793
  if any(ext in url for ext in kind):
791
794
  file_links = [url]
@@ -799,7 +802,7 @@ def downloader(
799
802
  if contains is not None:
800
803
  file_links = filter_links(links_all, contains=contains + kind_)
801
804
  else:
802
- file_links = links_all # filter_links(links_all, contains=kind_)
805
+ file_links = filter_links(links_all, contains=kind_)#links_all #
803
806
  elif isinstance(url, list):
804
807
  links_all = url
805
808
  if contains is not None:
@@ -812,6 +815,7 @@ def downloader(
812
815
  file_links = filter_links(links_all, contains=contains + kind_)
813
816
  else:
814
817
  file_links = filter_links(links_all, contains=kind_)
818
+ file_links=ips.unique(file_links)
815
819
  if verbose:
816
820
  if file_links:
817
821
  from pprint import pp
@@ -825,6 +829,7 @@ def downloader(
825
829
  file_links_all = [file_links]
826
830
  elif isinstance(file_links, list):
827
831
  file_links_all.extend(file_links)
832
+ file_links_all=ips.unique(file_links_all)
828
833
  if dir_save:
829
834
  if rm_folder:
830
835
  ips.rm_folder(dir_save)
@@ -847,7 +852,7 @@ def downloader(
847
852
  )
848
853
  if ext is None:
849
854
  ext = kind_
850
- print("ehereerere", ext)
855
+
851
856
  if ext:
852
857
  corrected_fname = fname_corrector(fnames[idx], ext)
853
858
  corrected_fname = check_and_modify_filename(
@@ -860,13 +865,13 @@ def downloader(
860
865
  datetime.now().strftime("%y%m%d_%H%M%S_")
861
866
  + corrected_fname
862
867
  )
863
- fpath_tmp = os.path.join(dir_save, corrected_fname)
868
+ fpath_tmp = os.path.join(dir_save, corrected_fname)
864
869
  with open(fpath_tmp, "wb") as file:
865
870
  for chunk in response.iter_content(chunk_size=8192):
866
871
  if chunk: # Filter out keep-alive chunks
867
872
  file.write(chunk)
868
873
  if verbose:
869
- print(f"Done! {fnames[idx]}")
874
+ print(f"Done{fnames[idx]}")
870
875
  else:
871
876
  if verbose:
872
877
  print(f"Unknown file type for {file_link}")
@@ -886,16 +891,7 @@ def downloader(
886
891
 
887
892
  if itry == n_try:
888
893
  print(f"Failed to download {file_link} after {n_try} attempts.")
889
-
890
- # print(f"\n{len(fnames)} files were downloaded:")
891
- if verbose:
892
- from pprint import pp
893
-
894
- if corrected_fname:
895
- pp(corrected_fname)
896
- print(f"\n\nsaved @:\n{dir_save}")
897
- else:
898
- pp(fnames)
894
+
899
895
 
900
896
 
901
897
  def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=True):