py2ls 0.1.6.7__py3-none-any.whl → 0.1.6.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py
CHANGED
@@ -841,7 +841,48 @@ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
|
|
841
841
|
|
842
842
|
# dir_pdf = "/Users/macjianfeng/Dropbox/github/python/240308_Python Data Science Handbook.pdf"
|
843
843
|
# df_page = pdf2img(dir_pdf, page=[1, 5],dpi=300)
|
844
|
+
def get_encoding(fpath, alternative_encodings=None, verbose=False):
|
845
|
+
"""
|
846
|
+
Attempt to determine the encoding of a file by trying multiple encodings.
|
847
|
+
|
848
|
+
Parameters:
|
849
|
+
fpath (str): The path to the file.
|
850
|
+
alternative_encodings (list): List of encodings to try. If None, uses a default list.
|
851
|
+
verbose (bool): If True, print detailed information about each attempted encoding.
|
852
|
+
|
853
|
+
Returns:
|
854
|
+
str: The encoding that successfully read the file, or None if no encoding worked.
|
855
|
+
"""
|
856
|
+
if alternative_encodings is None:
|
857
|
+
alternative_encodings = [
|
858
|
+
'utf-8', 'latin1', 'windows-1252', 'iso-8859-1',
|
859
|
+
'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5',
|
860
|
+
'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-9',
|
861
|
+
'windows-1250', 'windows-1251', 'windows-1253', 'windows-1254',
|
862
|
+
'windows-1255', 'windows-1256', 'windows-1257', 'windows-1258',
|
863
|
+
'big5', 'gb18030', 'shift_jis', 'euc_jp', 'koi8_r',
|
864
|
+
'mac_roman', 'mac_central_europe', 'mac_greek', 'mac_cyrillic',
|
865
|
+
'mac_arabic', 'mac_hebrew'
|
866
|
+
]
|
844
867
|
|
868
|
+
if not os.path.isfile(fpath):
|
869
|
+
raise FileNotFoundError(f"The file {fpath} does not exist.")
|
870
|
+
|
871
|
+
for enc in alternative_encodings:
|
872
|
+
try:
|
873
|
+
with open(fpath, mode='r', encoding=enc) as file:
|
874
|
+
file.read() # Try to read the file
|
875
|
+
if verbose:
|
876
|
+
print(f"Successfully detected encoding: {enc}")
|
877
|
+
return enc
|
878
|
+
except UnicodeDecodeError:
|
879
|
+
if verbose:
|
880
|
+
print(f"Failed to decode with encoding: {enc}")
|
881
|
+
continue
|
882
|
+
|
883
|
+
# If no encoding worked
|
884
|
+
print("No suitable encoding found.")
|
885
|
+
return None
|
845
886
|
|
846
887
|
|
847
888
|
def fload(fpath, kind=None, **kwargs):
|
@@ -979,7 +1020,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
979
1020
|
elif kind == "ipynb":
|
980
1021
|
return load_ipynb(fpath, **kwargs)
|
981
1022
|
elif kind == "pdf":
|
982
|
-
print('usage:load_pdf(fpath, page="all", verbose=False)')
|
1023
|
+
# print('usage:load_pdf(fpath, page="all", verbose=False)')
|
983
1024
|
return load_pdf(fpath, **kwargs)
|
984
1025
|
elif kind.lower() in img_types:
|
985
1026
|
print(f'Image ".{kind}" is loaded.')
|
@@ -1022,15 +1063,30 @@ def fupdate(fpath, content=None):
|
|
1022
1063
|
with open(fpath, 'w') as file:
|
1023
1064
|
file.write(content)
|
1024
1065
|
file.write(old_content)
|
1066
|
+
def fappend(fpath, content=None):
|
1067
|
+
"""
|
1068
|
+
append new content at the end.
|
1069
|
+
"""
|
1070
|
+
content = content or ""
|
1071
|
+
if os.path.exists(fpath):
|
1072
|
+
with open(fpath, 'r') as file:
|
1073
|
+
old_content = file.read()
|
1074
|
+
else:
|
1075
|
+
old_content = ''
|
1076
|
+
|
1077
|
+
with open(fpath, 'w') as file:
|
1078
|
+
file.write(old_content)
|
1079
|
+
file.write(content)
|
1025
1080
|
|
1026
1081
|
def fsave(
|
1027
1082
|
fpath,
|
1028
1083
|
content,
|
1084
|
+
mode='w',
|
1085
|
+
how ='overwrite',
|
1029
1086
|
kind=None,
|
1030
1087
|
font_name="Times",
|
1031
1088
|
font_size=10,
|
1032
1089
|
spacing=6,
|
1033
|
-
mode='w',
|
1034
1090
|
**kwargs,
|
1035
1091
|
):
|
1036
1092
|
"""
|
@@ -1046,9 +1102,14 @@ def fsave(
|
|
1046
1102
|
Returns:
|
1047
1103
|
None
|
1048
1104
|
"""
|
1049
|
-
def save_content(fpath, content, mode=mode):
|
1050
|
-
|
1051
|
-
|
1105
|
+
def save_content(fpath, content, mode=mode, how='overwrite'):
|
1106
|
+
if 'wri' in how.lower():
|
1107
|
+
with open(fpath, mode, encoding='utf-8') as file:
|
1108
|
+
file.write(content)
|
1109
|
+
elif 'upd' in how.lower():
|
1110
|
+
fupdate(fpath, content=content)
|
1111
|
+
elif 'app' in how.lower():
|
1112
|
+
fappend(fpath, content=content)
|
1052
1113
|
|
1053
1114
|
|
1054
1115
|
def save_docx(fpath, content, font_name, font_size, spacing):
|
@@ -1109,16 +1170,16 @@ def fsave(
|
|
1109
1170
|
for i, part in enumerate(parts):
|
1110
1171
|
if i % 2 == 0:
|
1111
1172
|
# Even index: markdown content
|
1112
|
-
cells.append(
|
1173
|
+
cells.append(nbformat.v4.new_markdown_cell(part.strip()))
|
1113
1174
|
else:
|
1114
1175
|
# Odd index: code content
|
1115
|
-
cells.append(
|
1176
|
+
cells.append(nbformat.v4.new_code_cell(part.strip()))
|
1116
1177
|
# Create a new notebook
|
1117
1178
|
nb = nbformat.v4.new_notebook()
|
1118
1179
|
nb['cells'] = cells
|
1119
1180
|
# Write the notebook to a file
|
1120
1181
|
with open(fpath, 'w', encoding='utf-8') as ipynb_file:
|
1121
|
-
|
1182
|
+
nbformat.write(nb, ipynb_file)
|
1122
1183
|
|
1123
1184
|
# def save_json(fpath, data, **kwargs):
|
1124
1185
|
# with open(fpath, "w") as file:
|
@@ -1330,7 +1391,7 @@ def listdir(
|
|
1330
1391
|
ascending=True,
|
1331
1392
|
contains=None,
|
1332
1393
|
orient="list",
|
1333
|
-
output="df"
|
1394
|
+
output="df" # 'list','dict','records','index','series'
|
1334
1395
|
):
|
1335
1396
|
if not kind.startswith("."):
|
1336
1397
|
kind = "." + kind
|
@@ -1432,7 +1493,7 @@ def list_func(lib_name, opt="call"):
|
|
1432
1493
|
def func_list(lib_name, opt="call"):
|
1433
1494
|
return list_func(lib_name, opt=opt)
|
1434
1495
|
|
1435
|
-
def
|
1496
|
+
def mkdir(*args, **kwargs):
|
1436
1497
|
"""
|
1437
1498
|
newfolder(pardir, chdir)
|
1438
1499
|
Args:
|
@@ -1444,7 +1505,7 @@ def newfolder(*args, **kwargs):
|
|
1444
1505
|
"""
|
1445
1506
|
overwrite=kwargs.get("overwrite",False)
|
1446
1507
|
for arg in args:
|
1447
|
-
if isinstance(arg, str):
|
1508
|
+
if isinstance(arg, (str,list)):
|
1448
1509
|
if "/" in arg or "\\" in arg:
|
1449
1510
|
pardir=arg
|
1450
1511
|
print(f'pardir{pardir}')
|
py2ls/netfinder.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from bs4 import BeautifulSoup
|
2
2
|
import requests
|
3
3
|
from requests.utils import dict_from_cookiejar
|
4
|
+
from requests.exceptions import ChunkedEncodingError, ConnectionError
|
4
5
|
import os
|
5
6
|
from urllib.parse import urlparse, urljoin
|
6
7
|
import base64
|
@@ -150,7 +151,7 @@ def flatten_json(y):
|
|
150
151
|
def get_proxy():
|
151
152
|
list_ = []
|
152
153
|
headers = {"User-Agent": user_agent()}
|
153
|
-
response = requests.get("https://free-proxy-list.net", headers=headers)
|
154
|
+
response = requests.get("https://free-proxy-list.net", headers=headers,timeout=30,stream=True)
|
154
155
|
content = BeautifulSoup(response.content, "html.parser")
|
155
156
|
info = extract_text_from_content(content, where="td", extend=0)[0].split()
|
156
157
|
count, pair_proxy = 0, 2
|
@@ -200,18 +201,18 @@ def fetch_all(url, parser="lxml", driver='request', # request or selenium
|
|
200
201
|
|
201
202
|
headers = {"User-Agent": user_agent()}
|
202
203
|
if 'req' in driver.lower():
|
203
|
-
response = requests.get(url, headers=headers,proxies=proxies_glob)
|
204
|
+
response = requests.get(url, headers=headers,proxies=proxies_glob,timeout=30,stream=True)
|
204
205
|
|
205
206
|
# If the response is a redirect, follow it
|
206
207
|
while response.is_redirect:
|
207
208
|
logger.info(f"Redirecting to: {response.headers['Location']}")
|
208
|
-
response = requests.get(response.headers["Location"], headers=headers,proxies=proxies_glob)
|
209
|
+
response = requests.get(response.headers["Location"], headers=headers,proxies=proxies_glob,timeout=30,stream=True)
|
209
210
|
# Check for a 403 error
|
210
211
|
if response.status_code == 403:
|
211
212
|
logger.warning("403 Forbidden error. Retrying...")
|
212
213
|
# Retry the request after a short delay
|
213
214
|
sleep(random.uniform(1, 3))
|
214
|
-
response = requests.get(url, headers=headers,proxies=proxies_glob)
|
215
|
+
response = requests.get(url, headers=headers,proxies=proxies_glob,timeout=30,stream=True)
|
215
216
|
# Raise an error if retry also fails
|
216
217
|
response.raise_for_status()
|
217
218
|
|
@@ -471,7 +472,7 @@ def pdf_detector(url, contains = None, dir_save = None, booster = False):
|
|
471
472
|
idx += 1
|
472
473
|
print(f'{len(fnames)} files are downloaded:\n{fnames}\n to local: \n{dir_save}')
|
473
474
|
|
474
|
-
def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=False, booster=False,verbose=True):
|
475
|
+
def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=False, booster=False,verbose=True, timeout=30, n_try=3,timestamp=False):
|
475
476
|
if verbose:
|
476
477
|
print("usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)")
|
477
478
|
def fname_corrector(fname, ext):
|
@@ -482,17 +483,21 @@ def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=F
|
|
482
483
|
return fname
|
483
484
|
def check_and_modify_filename(directory, filename):
|
484
485
|
base, ext = os.path.splitext(filename)
|
485
|
-
counter =
|
486
|
+
counter = 1
|
486
487
|
new_filename = filename
|
487
488
|
while os.path.exists(os.path.join(directory, new_filename)):
|
488
|
-
|
489
|
+
if counter<=9:
|
490
|
+
counter_='0'+str(counter)
|
491
|
+
else:
|
492
|
+
counter_=str(counter)
|
493
|
+
new_filename = f"{base}_{counter_}{ext}"
|
489
494
|
counter += 1
|
490
495
|
return new_filename
|
491
496
|
if not isinstance(kind,list):
|
492
497
|
kind=[kind]
|
493
498
|
if isinstance(url, list):
|
494
499
|
for url_ in url:
|
495
|
-
downloader(url_, dir_save=dir_save, kind=kind, contains=contains, booster=booster,verbose=verbose)
|
500
|
+
downloader(url_, dir_save=dir_save, kind=kind, contains=contains, booster=booster,verbose=verbose,timeout=timeout,n_try=n_try,timestamp=timestamp)
|
496
501
|
# sleep(random.uniform(1, 3))
|
497
502
|
for i,k in enumerate(kind):
|
498
503
|
if not k.startswith('.'):
|
@@ -544,25 +549,45 @@ def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=F
|
|
544
549
|
fnames = [file_link.split("/")[-1] for file_link in file_links_all]
|
545
550
|
for idx, file_link in enumerate(file_links_all):
|
546
551
|
headers = {"User-Agent": user_agent()}
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
552
|
+
itry = 0 # Retry logic with exception handling
|
553
|
+
while itry < n_try:
|
554
|
+
try:
|
555
|
+
# streaming to handle large files and reduce memory usage.
|
556
|
+
response = requests.get(file_link, headers=headers, timeout=timeout, stream=True)
|
557
|
+
if response.status_code == 200:
|
558
|
+
ext = next((ftype for ftype in kind if ftype in file_link), None)
|
559
|
+
if ext:
|
560
|
+
corrected_fname = fname_corrector(fnames[idx], ext)
|
561
|
+
corrected_fname = check_and_modify_filename(dir_save, corrected_fname)
|
562
|
+
if timestamp:
|
563
|
+
corrected_fname=datetime.now().strftime("%y%m%d_%H%M%S_")+corrected_fname
|
564
|
+
fpath_tmp = os.path.join(dir_save, corrected_fname)
|
565
|
+
with open(fpath_tmp, "wb") as file:
|
566
|
+
for chunk in response.iter_content(chunk_size=8192):
|
567
|
+
if chunk: # Filter out keep-alive chunks
|
568
|
+
file.write(chunk)
|
569
|
+
if verbose:
|
570
|
+
print(f"Done! {fnames[idx]}")
|
571
|
+
else:
|
572
|
+
if verbose:
|
573
|
+
print(f"Unknown file type for {file_link}")
|
574
|
+
break # Exit the retry loop if successful
|
575
|
+
else:
|
576
|
+
if verbose:
|
577
|
+
print(f"Failed to download file: HTTP status code {response.status_code}")
|
578
|
+
except (ChunkedEncodingError, ConnectionError) as e:
|
579
|
+
print(f"Attempt {itry+1} failed: {e}. Retrying in a few seconds...")
|
580
|
+
# time.sleep(random.uniform(0, 2)) # Random sleep to mitigate server issues
|
581
|
+
if os.path.exists(fpath_tmp):
|
582
|
+
os.remove(fpath_tmp)
|
583
|
+
itry += 1
|
584
|
+
|
585
|
+
if itry == n_try:
|
586
|
+
print(f"Failed to download {file_link} after {n_try} attempts.")
|
587
|
+
|
563
588
|
print(f'\n{len(fnames)} files were downloaded:')
|
564
589
|
if verbose:
|
565
|
-
pp(fnames)
|
590
|
+
pp(corrected_fname) if corrected_fname in locals() else pp(fnames)
|
566
591
|
print(f"\n\nsaved @:\n{dir_save}")
|
567
592
|
|
568
593
|
def find_img(url, driver='request',dir_save="images", rm_folder=False, verbose=True):
|
py2ls/translator.py
CHANGED
@@ -59,7 +59,7 @@ def get_lang_code_iso639():
|
|
59
59
|
lang_code_iso639=dict([*zip(fullname,shortcut)])
|
60
60
|
return lang_code_iso639
|
61
61
|
|
62
|
-
def detect_lang(text, output='lang',verbose=
|
62
|
+
def detect_lang(text, output='lang',verbose=False):
|
63
63
|
dir_curr_script=os.path.dirname(os.path.abspath(__file__))
|
64
64
|
dir_lang_code=dir_curr_script+"/data/lang_code_iso639.json"
|
65
65
|
with open(dir_lang_code, "r") as file:
|
@@ -85,7 +85,7 @@ def is_text(s):
|
|
85
85
|
# no_special = not re.search(r'[^A-Za-z0-9\s]', s)
|
86
86
|
return has_alpha and has_non_alpha
|
87
87
|
|
88
|
-
def strcmp(search_term, candidates, ignore_case=True, verbose=
|
88
|
+
def strcmp(search_term, candidates, ignore_case=True, verbose=False, scorer='WR'):
|
89
89
|
"""
|
90
90
|
Compares a search term with a list of candidate strings and finds the best match based on similarity score.
|
91
91
|
|
@@ -392,6 +392,8 @@ def translate(
|
|
392
392
|
Translate text to the target language using the specified translation method (Google Translate or DeepL).
|
393
393
|
lang_src (str): e.g., 'english', or 'chinese' when there are two languages, then lang_src must be given
|
394
394
|
"""
|
395
|
+
# error_verbose = verbose or False
|
396
|
+
|
395
397
|
if isinstance(text,list):
|
396
398
|
text=merge_text(text)
|
397
399
|
text = replace_text(text)
|
@@ -508,18 +510,19 @@ def translate_with_retry(
|
|
508
510
|
lang_src = detect_lang(text)
|
509
511
|
lang_src = get_language_code(language=lang_src)
|
510
512
|
lang = get_language_code(language=lang)
|
511
|
-
print(f"lang:{lang},lang_src:{lang_src}")
|
512
513
|
try:
|
513
|
-
print(len(text))
|
514
514
|
return try_translate(text,lang=lang,lang_src=lang_src,user_agent=user_agent,service_url=service_urls[0])
|
515
515
|
except Exception as e:
|
516
|
-
|
516
|
+
if error_verbose:
|
517
|
+
print("Connection error:", e)
|
517
518
|
try:
|
518
519
|
time.sleep(1)
|
519
520
|
return try_translate(text,lang=lang,lang_src=lang_src,user_agent=user_agent,service_url=service_urls[1])
|
520
521
|
except Exception as e:
|
521
|
-
|
522
|
-
|
522
|
+
if error_verbose:
|
523
|
+
print(f"(translate_with_retry):Connection error with {service_urls}: {e}")
|
524
|
+
if error_verbose:
|
525
|
+
print("All service URLs failed. Unable to translate the text.")
|
523
526
|
return text
|
524
527
|
|
525
528
|
|
@@ -134,14 +134,14 @@ py2ls/db2ls.py,sha256=MMfFX47aIPIyu7fU9aPvX9lbPRPYOpJ_VXwlnWk-8qo,13615
|
|
134
134
|
py2ls/doc.py,sha256=xN3g1OWfoaGUhikbJ0NqbN5eKy1VZVvWwRlhHMgyVEc,4243
|
135
135
|
py2ls/export_requirements.py,sha256=psZtSe-MOD9L_w3dVpA_VJEKfq3J914g3Y1OtRNAb4g,2324
|
136
136
|
py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
|
137
|
-
py2ls/ips.py,sha256=
|
138
|
-
py2ls/netfinder.py,sha256=
|
137
|
+
py2ls/ips.py,sha256=KkrkGAF0VQ-N0rH4FQFLyP-C-skY6EPpeO8t_5RngWw,88519
|
138
|
+
py2ls/netfinder.py,sha256=aOrgXp2rqpUDREZMlP_875SuAAcQXu3lhnRMk1cPG5M,47269
|
139
139
|
py2ls/plot.py,sha256=8_33-1wpkGZrDUuvRBfTPUi_BRKdf1njoR725OLSLSY,48579
|
140
140
|
py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
|
141
141
|
py2ls/sleep_events_detectors.py,sha256=36MCuRrpurn0Uvzpo3p3b3_JlVsRNHSWCXbJxCGM3mg,51546
|
142
142
|
py2ls/stats.py,sha256=Wd9yCKQ_61QD29WMEgMuEcreFxF91NmlPW65iWT2B5w,39041
|
143
|
-
py2ls/translator.py,sha256=
|
143
|
+
py2ls/translator.py,sha256=bc5FB-wqC4TtQz9gyCP1mE38HqNRJ_pmuRIgKnAlMzM,30581
|
144
144
|
py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
|
145
|
-
py2ls-0.1.6.
|
146
|
-
py2ls-0.1.6.
|
147
|
-
py2ls-0.1.6.
|
145
|
+
py2ls-0.1.6.9.dist-info/METADATA,sha256=iPwvGzCypApng9Ci3pxCknbx6mek6zOQTy3rWg2VKo4,20998
|
146
|
+
py2ls-0.1.6.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
147
|
+
py2ls-0.1.6.9.dist-info/RECORD,,
|
File without changes
|