py2ls 0.2.4.25__py3-none-any.whl → 0.2.4.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/corr.py +475 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/styles/example/.DS_Store +0 -0
- py2ls/data/usages_sns.json +6 -1
- py2ls/ips.py +399 -91
- py2ls/ml2ls.py +758 -186
- py2ls/netfinder.py +16 -20
- py2ls/plot.py +916 -141
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.26.dist-info}/METADATA +5 -1
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.26.dist-info}/RECORD +15 -13
- py2ls/data/usages_pd copy.json +0 -1105
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.26.dist-info}/WHEEL +0 -0
py2ls/netfinder.py
CHANGED
@@ -626,7 +626,7 @@ def filter_links(links, contains="html", driver="requ", booster=False):
|
|
626
626
|
)
|
627
627
|
if condition:
|
628
628
|
filtered_links.append(link)
|
629
|
-
return filtered_links
|
629
|
+
return ips.unique(filtered_links)
|
630
630
|
|
631
631
|
|
632
632
|
def find_domain(links):
|
@@ -717,7 +717,7 @@ def downloader(
|
|
717
717
|
kind=[".pdf"],
|
718
718
|
contains=None,
|
719
719
|
rm_folder=False,
|
720
|
-
booster=
|
720
|
+
booster=True,# use find_links
|
721
721
|
verbose=True,
|
722
722
|
timeout=30,
|
723
723
|
n_try=3,
|
@@ -726,7 +726,7 @@ def downloader(
|
|
726
726
|
|
727
727
|
from requests.exceptions import ChunkedEncodingError, ConnectionError
|
728
728
|
|
729
|
-
if verbose:
|
729
|
+
if verbose and ips.run_once_within():
|
730
730
|
print(
|
731
731
|
"usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)"
|
732
732
|
)
|
@@ -734,8 +734,11 @@ def downloader(
|
|
734
734
|
def fname_corrector(fname, ext):
|
735
735
|
if not ext.startswith("."):
|
736
736
|
ext = "." + ext
|
737
|
-
if not fname.endswith(
|
737
|
+
if not fname.endswith(ext): # if not ext in fname:
|
738
738
|
fname = fname[: -len(ext)] + ext
|
739
|
+
if not any(fname[: -len(ext)]):
|
740
|
+
from datetime import datetime
|
741
|
+
fname = datetime.now().strftime("%H%M%S") + ext
|
739
742
|
return fname
|
740
743
|
|
741
744
|
def check_and_modify_filename(directory, filename):
|
@@ -784,8 +787,8 @@ def downloader(
|
|
784
787
|
kind[i] = "." + kind[i]
|
785
788
|
file_links_all = []
|
786
789
|
for kind_ in kind:
|
787
|
-
if isinstance(contains, str):
|
788
|
-
|
790
|
+
# if isinstance(contains, str):
|
791
|
+
# contains = [contains]
|
789
792
|
if isinstance(url, str):
|
790
793
|
if any(ext in url for ext in kind):
|
791
794
|
file_links = [url]
|
@@ -799,7 +802,7 @@ def downloader(
|
|
799
802
|
if contains is not None:
|
800
803
|
file_links = filter_links(links_all, contains=contains + kind_)
|
801
804
|
else:
|
802
|
-
file_links =
|
805
|
+
file_links = filter_links(links_all, contains=kind_)#links_all #
|
803
806
|
elif isinstance(url, list):
|
804
807
|
links_all = url
|
805
808
|
if contains is not None:
|
@@ -812,6 +815,7 @@ def downloader(
|
|
812
815
|
file_links = filter_links(links_all, contains=contains + kind_)
|
813
816
|
else:
|
814
817
|
file_links = filter_links(links_all, contains=kind_)
|
818
|
+
file_links=ips.unique(file_links)
|
815
819
|
if verbose:
|
816
820
|
if file_links:
|
817
821
|
from pprint import pp
|
@@ -825,6 +829,7 @@ def downloader(
|
|
825
829
|
file_links_all = [file_links]
|
826
830
|
elif isinstance(file_links, list):
|
827
831
|
file_links_all.extend(file_links)
|
832
|
+
file_links_all=ips.unique(file_links_all)
|
828
833
|
if dir_save:
|
829
834
|
if rm_folder:
|
830
835
|
ips.rm_folder(dir_save)
|
@@ -847,7 +852,7 @@ def downloader(
|
|
847
852
|
)
|
848
853
|
if ext is None:
|
849
854
|
ext = kind_
|
850
|
-
|
855
|
+
|
851
856
|
if ext:
|
852
857
|
corrected_fname = fname_corrector(fnames[idx], ext)
|
853
858
|
corrected_fname = check_and_modify_filename(
|
@@ -860,13 +865,13 @@ def downloader(
|
|
860
865
|
datetime.now().strftime("%y%m%d_%H%M%S_")
|
861
866
|
+ corrected_fname
|
862
867
|
)
|
863
|
-
fpath_tmp = os.path.join(dir_save, corrected_fname)
|
868
|
+
fpath_tmp = os.path.join(dir_save, corrected_fname)
|
864
869
|
with open(fpath_tmp, "wb") as file:
|
865
870
|
for chunk in response.iter_content(chunk_size=8192):
|
866
871
|
if chunk: # Filter out keep-alive chunks
|
867
872
|
file.write(chunk)
|
868
873
|
if verbose:
|
869
|
-
print(f"Done
|
874
|
+
print(f"Done⤵{fnames[idx]}")
|
870
875
|
else:
|
871
876
|
if verbose:
|
872
877
|
print(f"Unknown file type for {file_link}")
|
@@ -886,16 +891,7 @@ def downloader(
|
|
886
891
|
|
887
892
|
if itry == n_try:
|
888
893
|
print(f"Failed to download {file_link} after {n_try} attempts.")
|
889
|
-
|
890
|
-
# print(f"\n{len(fnames)} files were downloaded:")
|
891
|
-
if verbose:
|
892
|
-
from pprint import pp
|
893
|
-
|
894
|
-
if corrected_fname:
|
895
|
-
pp(corrected_fname)
|
896
|
-
print(f"\n\nsaved @:\n{dir_save}")
|
897
|
-
else:
|
898
|
-
pp(fnames)
|
894
|
+
|
899
895
|
|
900
896
|
|
901
897
|
def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=True):
|