py2ls 0.1.6.7__py3-none-any.whl → 0.1.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py
CHANGED
@@ -1022,15 +1022,30 @@ def fupdate(fpath, content=None):
|
|
1022
1022
|
with open(fpath, 'w') as file:
|
1023
1023
|
file.write(content)
|
1024
1024
|
file.write(old_content)
|
1025
|
+
def fappend(fpath, content=None):
|
1026
|
+
"""
|
1027
|
+
append new content at the end.
|
1028
|
+
"""
|
1029
|
+
content = content or ""
|
1030
|
+
if os.path.exists(fpath):
|
1031
|
+
with open(fpath, 'r') as file:
|
1032
|
+
old_content = file.read()
|
1033
|
+
else:
|
1034
|
+
old_content = ''
|
1035
|
+
|
1036
|
+
with open(fpath, 'w') as file:
|
1037
|
+
file.write(old_content)
|
1038
|
+
file.write(content)
|
1025
1039
|
|
1026
1040
|
def fsave(
|
1027
1041
|
fpath,
|
1028
1042
|
content,
|
1043
|
+
mode='w',
|
1044
|
+
how ='overwrite',
|
1029
1045
|
kind=None,
|
1030
1046
|
font_name="Times",
|
1031
1047
|
font_size=10,
|
1032
1048
|
spacing=6,
|
1033
|
-
mode='w',
|
1034
1049
|
**kwargs,
|
1035
1050
|
):
|
1036
1051
|
"""
|
@@ -1046,9 +1061,14 @@ def fsave(
|
|
1046
1061
|
Returns:
|
1047
1062
|
None
|
1048
1063
|
"""
|
1049
|
-
def save_content(fpath, content, mode=mode):
|
1050
|
-
|
1051
|
-
|
1064
|
+
def save_content(fpath, content, mode=mode, how='overwrite'):
|
1065
|
+
if 'wri' in how.lower():
|
1066
|
+
with open(fpath, mode, encoding='utf-8') as file:
|
1067
|
+
file.write(content)
|
1068
|
+
elif 'upd' in how.lower():
|
1069
|
+
fupdate(fpath, content=content)
|
1070
|
+
elif 'app' in how.lower():
|
1071
|
+
fappend(fpath, content=content)
|
1052
1072
|
|
1053
1073
|
|
1054
1074
|
def save_docx(fpath, content, font_name, font_size, spacing):
|
py2ls/netfinder.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
2
|
import requests
|
3
3
|
from requests.utils import dict_from_cookiejar
|
4
|
+
from requests.exceptions import ChunkedEncodingError, ConnectionError
|
4
5
|
import os
|
5
6
|
from urllib.parse import urlparse, urljoin
|
6
7
|
import base64
|
@@ -150,7 +151,7 @@ def flatten_json(y):
|
|
150
151
|
def get_proxy():
|
151
152
|
list_ = []
|
152
153
|
headers = {"User-Agent": user_agent()}
|
153
|
-
response = requests.get("https://free-proxy-list.net", headers=headers)
|
154
|
+
response = requests.get("https://free-proxy-list.net", headers=headers,timeout=30,stream=True)
|
154
155
|
content = BeautifulSoup(response.content, "html.parser")
|
155
156
|
info = extract_text_from_content(content, where="td", extend=0)[0].split()
|
156
157
|
count, pair_proxy = 0, 2
|
@@ -200,18 +201,18 @@ def fetch_all(url, parser="lxml", driver='request', # request or selenium
|
|
200
201
|
|
201
202
|
headers = {"User-Agent": user_agent()}
|
202
203
|
if 'req' in driver.lower():
|
203
|
-
response = requests.get(url, headers=headers,proxies=proxies_glob)
|
204
|
+
response = requests.get(url, headers=headers,proxies=proxies_glob,timeout=30,stream=True)
|
204
205
|
|
205
206
|
# If the response is a redirect, follow it
|
206
207
|
while response.is_redirect:
|
207
208
|
logger.info(f"Redirecting to: {response.headers['Location']}")
|
208
|
-
response = requests.get(response.headers["Location"], headers=headers,proxies=proxies_glob)
|
209
|
+
response = requests.get(response.headers["Location"], headers=headers,proxies=proxies_glob,timeout=30,stream=True)
|
209
210
|
# Check for a 403 error
|
210
211
|
if response.status_code == 403:
|
211
212
|
logger.warning("403 Forbidden error. Retrying...")
|
212
213
|
# Retry the request after a short delay
|
213
214
|
sleep(random.uniform(1, 3))
|
214
|
-
response = requests.get(url, headers=headers,proxies=proxies_glob)
|
215
|
+
response = requests.get(url, headers=headers,proxies=proxies_glob,timeout=30,stream=True)
|
215
216
|
# Raise an error if retry also fails
|
216
217
|
response.raise_for_status()
|
217
218
|
|
@@ -471,7 +472,7 @@ def pdf_detector(url, contains = None, dir_save = None, booster = False):
|
|
471
472
|
idx += 1
|
472
473
|
print(f'{len(fnames)} files are downloaded:\n{fnames}\n to local: \n{dir_save}')
|
473
474
|
|
474
|
-
def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=False, booster=False,verbose=True):
|
475
|
+
def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=False, booster=False,verbose=True, timeout=30, n_try=3,timestamp=False):
|
475
476
|
if verbose:
|
476
477
|
print("usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)")
|
477
478
|
def fname_corrector(fname, ext):
|
@@ -482,17 +483,21 @@ def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=F
|
|
482
483
|
return fname
|
483
484
|
def check_and_modify_filename(directory, filename):
|
484
485
|
base, ext = os.path.splitext(filename)
|
485
|
-
counter =
|
486
|
+
counter = 1
|
486
487
|
new_filename = filename
|
487
488
|
while os.path.exists(os.path.join(directory, new_filename)):
|
488
|
-
|
489
|
+
if counter<=9:
|
490
|
+
counter_='0'+str(counter)
|
491
|
+
else:
|
492
|
+
counter_=str(counter)
|
493
|
+
new_filename = f"{base}_{counter_}{ext}"
|
489
494
|
counter += 1
|
490
495
|
return new_filename
|
491
496
|
if not isinstance(kind,list):
|
492
497
|
kind=[kind]
|
493
498
|
if isinstance(url, list):
|
494
499
|
for url_ in url:
|
495
|
-
downloader(url_, dir_save=dir_save, kind=kind, contains=contains, booster=booster,verbose=verbose)
|
500
|
+
downloader(url_, dir_save=dir_save, kind=kind, contains=contains, booster=booster,verbose=verbose,timeout=timeout,n_try=n_try,timestamp=timestamp)
|
496
501
|
# sleep(random.uniform(1, 3))
|
497
502
|
for i,k in enumerate(kind):
|
498
503
|
if not k.startswith('.'):
|
@@ -544,25 +549,45 @@ def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=F
|
|
544
549
|
fnames = [file_link.split("/")[-1] for file_link in file_links_all]
|
545
550
|
for idx, file_link in enumerate(file_links_all):
|
546
551
|
headers = {"User-Agent": user_agent()}
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
552
|
+
itry = 0 # Retry logic with exception handling
|
553
|
+
while itry < n_try:
|
554
|
+
try:
|
555
|
+
# streaming to handle large files and reduce memory usage.
|
556
|
+
response = requests.get(file_link, headers=headers, timeout=timeout, stream=True)
|
557
|
+
if response.status_code == 200:
|
558
|
+
ext = next((ftype for ftype in kind if ftype in file_link), None)
|
559
|
+
if ext:
|
560
|
+
corrected_fname = fname_corrector(fnames[idx], ext)
|
561
|
+
corrected_fname = check_and_modify_filename(dir_save, corrected_fname)
|
562
|
+
if timestamp:
|
563
|
+
corrected_fname=datetime.now().strftime("%y%m%d_%H%M%S_")+corrected_fname
|
564
|
+
fpath_tmp = os.path.join(dir_save, corrected_fname)
|
565
|
+
with open(fpath_tmp, "wb") as file:
|
566
|
+
for chunk in response.iter_content(chunk_size=8192):
|
567
|
+
if chunk: # Filter out keep-alive chunks
|
568
|
+
file.write(chunk)
|
569
|
+
if verbose:
|
570
|
+
print(f"Done! {fnames[idx]}")
|
571
|
+
else:
|
572
|
+
if verbose:
|
573
|
+
print(f"Unknown file type for {file_link}")
|
574
|
+
break # Exit the retry loop if successful
|
575
|
+
else:
|
576
|
+
if verbose:
|
577
|
+
print(f"Failed to download file: HTTP status code {response.status_code}")
|
578
|
+
except (ChunkedEncodingError, ConnectionError) as e:
|
579
|
+
print(f"Attempt {itry+1} failed: {e}. Retrying in a few seconds...")
|
580
|
+
# time.sleep(random.uniform(0, 2)) # Random sleep to mitigate server issues
|
581
|
+
if os.path.exists(fpath_tmp):
|
582
|
+
os.remove(fpath_tmp)
|
583
|
+
itry += 1
|
584
|
+
|
585
|
+
if itry == n_try:
|
586
|
+
print(f"Failed to download {file_link} after {n_try} attempts.")
|
587
|
+
|
563
588
|
print(f'\n{len(fnames)} files were downloaded:')
|
564
589
|
if verbose:
|
565
|
-
pp(fnames)
|
590
|
+
pp(corrected_fname) if corrected_fname in locals() else pp(fnames)
|
566
591
|
print(f"\n\nsaved @:\n{dir_save}")
|
567
592
|
|
568
593
|
def find_img(url, driver='request',dir_save="images", rm_folder=False, verbose=True):
|
@@ -134,14 +134,14 @@ py2ls/db2ls.py,sha256=MMfFX47aIPIyu7fU9aPvX9lbPRPYOpJ_VXwlnWk-8qo,13615
|
|
134
134
|
py2ls/doc.py,sha256=xN3g1OWfoaGUhikbJ0NqbN5eKy1VZVvWwRlhHMgyVEc,4243
|
135
135
|
py2ls/export_requirements.py,sha256=psZtSe-MOD9L_w3dVpA_VJEKfq3J914g3Y1OtRNAb4g,2324
|
136
136
|
py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
|
137
|
-
py2ls/ips.py,sha256=
|
138
|
-
py2ls/netfinder.py,sha256=
|
137
|
+
py2ls/ips.py,sha256=jyvui0X41-AyZfpcujfgBMt6NsQNfyh_SiQ5lQ0gk1Q,86744
|
138
|
+
py2ls/netfinder.py,sha256=aOrgXp2rqpUDREZMlP_875SuAAcQXu3lhnRMk1cPG5M,47269
|
139
139
|
py2ls/plot.py,sha256=8_33-1wpkGZrDUuvRBfTPUi_BRKdf1njoR725OLSLSY,48579
|
140
140
|
py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
|
141
141
|
py2ls/sleep_events_detectors.py,sha256=36MCuRrpurn0Uvzpo3p3b3_JlVsRNHSWCXbJxCGM3mg,51546
|
142
142
|
py2ls/stats.py,sha256=Wd9yCKQ_61QD29WMEgMuEcreFxF91NmlPW65iWT2B5w,39041
|
143
143
|
py2ls/translator.py,sha256=6S7MmTZmjj8NljVmj0W5uEauu4ePxso3AMf2LvGVRQA,30516
|
144
144
|
py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
|
145
|
-
py2ls-0.1.6.
|
146
|
-
py2ls-0.1.6.
|
147
|
-
py2ls-0.1.6.
|
145
|
+
py2ls-0.1.6.8.dist-info/METADATA,sha256=yQ6DRgZ13Lc-Hw3FhI0dbn-nE0N3JwbC9X_8rQlxILQ,20998
|
146
|
+
py2ls-0.1.6.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
147
|
+
py2ls-0.1.6.8.dist-info/RECORD,,
|
File without changes
|