py2ls 0.2.4.25__py3-none-any.whl → 0.2.4.26__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- py2ls/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/corr.py +475 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/styles/example/.DS_Store +0 -0
- py2ls/data/usages_sns.json +6 -1
- py2ls/ips.py +399 -91
- py2ls/ml2ls.py +758 -186
- py2ls/netfinder.py +16 -20
- py2ls/plot.py +916 -141
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.26.dist-info}/METADATA +5 -1
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.26.dist-info}/RECORD +15 -13
- py2ls/data/usages_pd copy.json +0 -1105
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.26.dist-info}/WHEEL +0 -0
py2ls/netfinder.py
CHANGED
@@ -626,7 +626,7 @@ def filter_links(links, contains="html", driver="requ", booster=False):
|
|
626
626
|
)
|
627
627
|
if condition:
|
628
628
|
filtered_links.append(link)
|
629
|
-
return filtered_links
|
629
|
+
return ips.unique(filtered_links)
|
630
630
|
|
631
631
|
|
632
632
|
def find_domain(links):
|
@@ -717,7 +717,7 @@ def downloader(
|
|
717
717
|
kind=[".pdf"],
|
718
718
|
contains=None,
|
719
719
|
rm_folder=False,
|
720
|
-
booster=
|
720
|
+
booster=True,# use find_links
|
721
721
|
verbose=True,
|
722
722
|
timeout=30,
|
723
723
|
n_try=3,
|
@@ -726,7 +726,7 @@ def downloader(
|
|
726
726
|
|
727
727
|
from requests.exceptions import ChunkedEncodingError, ConnectionError
|
728
728
|
|
729
|
-
if verbose:
|
729
|
+
if verbose and ips.run_once_within():
|
730
730
|
print(
|
731
731
|
"usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)"
|
732
732
|
)
|
@@ -734,8 +734,11 @@ def downloader(
|
|
734
734
|
def fname_corrector(fname, ext):
|
735
735
|
if not ext.startswith("."):
|
736
736
|
ext = "." + ext
|
737
|
-
if not fname.endswith(
|
737
|
+
if not fname.endswith(ext): # if not ext in fname:
|
738
738
|
fname = fname[: -len(ext)] + ext
|
739
|
+
if not any(fname[: -len(ext)]):
|
740
|
+
from datetime import datetime
|
741
|
+
fname = datetime.now().strftime("%H%M%S") + ext
|
739
742
|
return fname
|
740
743
|
|
741
744
|
def check_and_modify_filename(directory, filename):
|
@@ -784,8 +787,8 @@ def downloader(
|
|
784
787
|
kind[i] = "." + kind[i]
|
785
788
|
file_links_all = []
|
786
789
|
for kind_ in kind:
|
787
|
-
if isinstance(contains, str):
|
788
|
-
|
790
|
+
# if isinstance(contains, str):
|
791
|
+
# contains = [contains]
|
789
792
|
if isinstance(url, str):
|
790
793
|
if any(ext in url for ext in kind):
|
791
794
|
file_links = [url]
|
@@ -799,7 +802,7 @@ def downloader(
|
|
799
802
|
if contains is not None:
|
800
803
|
file_links = filter_links(links_all, contains=contains + kind_)
|
801
804
|
else:
|
802
|
-
file_links =
|
805
|
+
file_links = filter_links(links_all, contains=kind_)#links_all #
|
803
806
|
elif isinstance(url, list):
|
804
807
|
links_all = url
|
805
808
|
if contains is not None:
|
@@ -812,6 +815,7 @@ def downloader(
|
|
812
815
|
file_links = filter_links(links_all, contains=contains + kind_)
|
813
816
|
else:
|
814
817
|
file_links = filter_links(links_all, contains=kind_)
|
818
|
+
file_links=ips.unique(file_links)
|
815
819
|
if verbose:
|
816
820
|
if file_links:
|
817
821
|
from pprint import pp
|
@@ -825,6 +829,7 @@ def downloader(
|
|
825
829
|
file_links_all = [file_links]
|
826
830
|
elif isinstance(file_links, list):
|
827
831
|
file_links_all.extend(file_links)
|
832
|
+
file_links_all=ips.unique(file_links_all)
|
828
833
|
if dir_save:
|
829
834
|
if rm_folder:
|
830
835
|
ips.rm_folder(dir_save)
|
@@ -847,7 +852,7 @@ def downloader(
|
|
847
852
|
)
|
848
853
|
if ext is None:
|
849
854
|
ext = kind_
|
850
|
-
|
855
|
+
|
851
856
|
if ext:
|
852
857
|
corrected_fname = fname_corrector(fnames[idx], ext)
|
853
858
|
corrected_fname = check_and_modify_filename(
|
@@ -860,13 +865,13 @@ def downloader(
|
|
860
865
|
datetime.now().strftime("%y%m%d_%H%M%S_")
|
861
866
|
+ corrected_fname
|
862
867
|
)
|
863
|
-
fpath_tmp = os.path.join(dir_save, corrected_fname)
|
868
|
+
fpath_tmp = os.path.join(dir_save, corrected_fname)
|
864
869
|
with open(fpath_tmp, "wb") as file:
|
865
870
|
for chunk in response.iter_content(chunk_size=8192):
|
866
871
|
if chunk: # Filter out keep-alive chunks
|
867
872
|
file.write(chunk)
|
868
873
|
if verbose:
|
869
|
-
print(f"Done
|
874
|
+
print(f"Done⤵{fnames[idx]}")
|
870
875
|
else:
|
871
876
|
if verbose:
|
872
877
|
print(f"Unknown file type for {file_link}")
|
@@ -886,16 +891,7 @@ def downloader(
|
|
886
891
|
|
887
892
|
if itry == n_try:
|
888
893
|
print(f"Failed to download {file_link} after {n_try} attempts.")
|
889
|
-
|
890
|
-
# print(f"\n{len(fnames)} files were downloaded:")
|
891
|
-
if verbose:
|
892
|
-
from pprint import pp
|
893
|
-
|
894
|
-
if corrected_fname:
|
895
|
-
pp(corrected_fname)
|
896
|
-
print(f"\n\nsaved @:\n{dir_save}")
|
897
|
-
else:
|
898
|
-
pp(fnames)
|
894
|
+
|
899
895
|
|
900
896
|
|
901
897
|
def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=True):
|