py2ls 0.1.6.7__py3-none-any.whl → 0.1.6.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py CHANGED
@@ -841,7 +841,48 @@ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
841
841
 
842
842
  # dir_pdf = "/Users/macjianfeng/Dropbox/github/python/240308_Python Data Science Handbook.pdf"
843
843
  # df_page = pdf2img(dir_pdf, page=[1, 5],dpi=300)
844
+ def get_encoding(fpath, alternative_encodings=None, verbose=False):
845
+ """
846
+ Attempt to determine the encoding of a file by trying multiple encodings.
847
+
848
+ Parameters:
849
+ fpath (str): The path to the file.
850
+ alternative_encodings (list): List of encodings to try. If None, uses a default list.
851
+ verbose (bool): If True, print detailed information about each attempted encoding.
852
+
853
+ Returns:
854
+ str: The encoding that successfully read the file, or None if no encoding worked.
855
+ """
856
+ if alternative_encodings is None:
857
+ alternative_encodings = [
858
+ 'utf-8', 'latin1', 'windows-1252', 'iso-8859-1',
859
+ 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5',
860
+ 'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-9',
861
+ 'windows-1250', 'windows-1251', 'windows-1253', 'windows-1254',
862
+ 'windows-1255', 'windows-1256', 'windows-1257', 'windows-1258',
863
+ 'big5', 'gb18030', 'shift_jis', 'euc_jp', 'koi8_r',
864
+ 'mac_roman', 'mac_central_europe', 'mac_greek', 'mac_cyrillic',
865
+ 'mac_arabic', 'mac_hebrew'
866
+ ]
844
867
 
868
+ if not os.path.isfile(fpath):
869
+ raise FileNotFoundError(f"The file {fpath} does not exist.")
870
+
871
+ for enc in alternative_encodings:
872
+ try:
873
+ with open(fpath, mode='r', encoding=enc) as file:
874
+ file.read() # Try to read the file
875
+ if verbose:
876
+ print(f"Successfully detected encoding: {enc}")
877
+ return enc
878
+ except UnicodeDecodeError:
879
+ if verbose:
880
+ print(f"Failed to decode with encoding: {enc}")
881
+ continue
882
+
883
+ # If no encoding worked
884
+ print("No suitable encoding found.")
885
+ return None
845
886
 
846
887
 
847
888
  def fload(fpath, kind=None, **kwargs):
@@ -979,7 +1020,7 @@ def fload(fpath, kind=None, **kwargs):
979
1020
  elif kind == "ipynb":
980
1021
  return load_ipynb(fpath, **kwargs)
981
1022
  elif kind == "pdf":
982
- print('usage:load_pdf(fpath, page="all", verbose=False)')
1023
+ # print('usage:load_pdf(fpath, page="all", verbose=False)')
983
1024
  return load_pdf(fpath, **kwargs)
984
1025
  elif kind.lower() in img_types:
985
1026
  print(f'Image ".{kind}" is loaded.')
@@ -1022,15 +1063,30 @@ def fupdate(fpath, content=None):
1022
1063
  with open(fpath, 'w') as file:
1023
1064
  file.write(content)
1024
1065
  file.write(old_content)
1066
+ def fappend(fpath, content=None):
1067
+ """
1068
+ append new content at the end.
1069
+ """
1070
+ content = content or ""
1071
+ if os.path.exists(fpath):
1072
+ with open(fpath, 'r') as file:
1073
+ old_content = file.read()
1074
+ else:
1075
+ old_content = ''
1076
+
1077
+ with open(fpath, 'w') as file:
1078
+ file.write(old_content)
1079
+ file.write(content)
1025
1080
 
1026
1081
  def fsave(
1027
1082
  fpath,
1028
1083
  content,
1084
+ mode='w',
1085
+ how ='overwrite',
1029
1086
  kind=None,
1030
1087
  font_name="Times",
1031
1088
  font_size=10,
1032
1089
  spacing=6,
1033
- mode='w',
1034
1090
  **kwargs,
1035
1091
  ):
1036
1092
  """
@@ -1046,9 +1102,14 @@ def fsave(
1046
1102
  Returns:
1047
1103
  None
1048
1104
  """
1049
- def save_content(fpath, content, mode=mode):
1050
- with open(fpath, mode, encoding='utf-8') as file:
1051
- file.write(content)
1105
+ def save_content(fpath, content, mode=mode, how='overwrite'):
1106
+ if 'wri' in how.lower():
1107
+ with open(fpath, mode, encoding='utf-8') as file:
1108
+ file.write(content)
1109
+ elif 'upd' in how.lower():
1110
+ fupdate(fpath, content=content)
1111
+ elif 'app' in how.lower():
1112
+ fappend(fpath, content=content)
1052
1113
 
1053
1114
 
1054
1115
  def save_docx(fpath, content, font_name, font_size, spacing):
@@ -1109,16 +1170,16 @@ def fsave(
1109
1170
  for i, part in enumerate(parts):
1110
1171
  if i % 2 == 0:
1111
1172
  # Even index: markdown content
1112
- cells.append(nbf.v4.new_markdown_cell(part.strip()))
1173
+ cells.append(nbformat.v4.new_markdown_cell(part.strip()))
1113
1174
  else:
1114
1175
  # Odd index: code content
1115
- cells.append(nbf.v4.new_code_cell(part.strip()))
1176
+ cells.append(nbformat.v4.new_code_cell(part.strip()))
1116
1177
  # Create a new notebook
1117
1178
  nb = nbformat.v4.new_notebook()
1118
1179
  nb['cells'] = cells
1119
1180
  # Write the notebook to a file
1120
1181
  with open(fpath, 'w', encoding='utf-8') as ipynb_file:
1121
- nbf.write(fpath, ipynb_file)
1182
+ nbformat.write(nb, ipynb_file)
1122
1183
 
1123
1184
  # def save_json(fpath, data, **kwargs):
1124
1185
  # with open(fpath, "w") as file:
@@ -1330,7 +1391,7 @@ def listdir(
1330
1391
  ascending=True,
1331
1392
  contains=None,
1332
1393
  orient="list",
1333
- output="df"
1394
+ output="df" # 'list','dict','records','index','series'
1334
1395
  ):
1335
1396
  if not kind.startswith("."):
1336
1397
  kind = "." + kind
@@ -1432,7 +1493,7 @@ def list_func(lib_name, opt="call"):
1432
1493
  def func_list(lib_name, opt="call"):
1433
1494
  return list_func(lib_name, opt=opt)
1434
1495
 
1435
- def newfolder(*args, **kwargs):
1496
+ def mkdir(*args, **kwargs):
1436
1497
  """
1437
1498
  newfolder(pardir, chdir)
1438
1499
  Args:
@@ -1444,7 +1505,7 @@ def newfolder(*args, **kwargs):
1444
1505
  """
1445
1506
  overwrite=kwargs.get("overwrite",False)
1446
1507
  for arg in args:
1447
- if isinstance(arg, str):
1508
+ if isinstance(arg, (str,list)):
1448
1509
  if "/" in arg or "\\" in arg:
1449
1510
  pardir=arg
1450
1511
  print(f'pardir{pardir}')
py2ls/netfinder.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from bs4 import BeautifulSoup
2
2
  import requests
3
3
  from requests.utils import dict_from_cookiejar
4
+ from requests.exceptions import ChunkedEncodingError, ConnectionError
4
5
  import os
5
6
  from urllib.parse import urlparse, urljoin
6
7
  import base64
@@ -150,7 +151,7 @@ def flatten_json(y):
150
151
  def get_proxy():
151
152
  list_ = []
152
153
  headers = {"User-Agent": user_agent()}
153
- response = requests.get("https://free-proxy-list.net", headers=headers)
154
+ response = requests.get("https://free-proxy-list.net", headers=headers,timeout=30,stream=True)
154
155
  content = BeautifulSoup(response.content, "html.parser")
155
156
  info = extract_text_from_content(content, where="td", extend=0)[0].split()
156
157
  count, pair_proxy = 0, 2
@@ -200,18 +201,18 @@ def fetch_all(url, parser="lxml", driver='request', # request or selenium
200
201
 
201
202
  headers = {"User-Agent": user_agent()}
202
203
  if 'req' in driver.lower():
203
- response = requests.get(url, headers=headers,proxies=proxies_glob)
204
+ response = requests.get(url, headers=headers,proxies=proxies_glob,timeout=30,stream=True)
204
205
 
205
206
  # If the response is a redirect, follow it
206
207
  while response.is_redirect:
207
208
  logger.info(f"Redirecting to: {response.headers['Location']}")
208
- response = requests.get(response.headers["Location"], headers=headers,proxies=proxies_glob)
209
+ response = requests.get(response.headers["Location"], headers=headers,proxies=proxies_glob,timeout=30,stream=True)
209
210
  # Check for a 403 error
210
211
  if response.status_code == 403:
211
212
  logger.warning("403 Forbidden error. Retrying...")
212
213
  # Retry the request after a short delay
213
214
  sleep(random.uniform(1, 3))
214
- response = requests.get(url, headers=headers,proxies=proxies_glob)
215
+ response = requests.get(url, headers=headers,proxies=proxies_glob,timeout=30,stream=True)
215
216
  # Raise an error if retry also fails
216
217
  response.raise_for_status()
217
218
 
@@ -471,7 +472,7 @@ def pdf_detector(url, contains = None, dir_save = None, booster = False):
471
472
  idx += 1
472
473
  print(f'{len(fnames)} files are downloaded:\n{fnames}\n to local: \n{dir_save}')
473
474
 
474
- def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=False, booster=False,verbose=True):
475
+ def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=False, booster=False,verbose=True, timeout=30, n_try=3,timestamp=False):
475
476
  if verbose:
476
477
  print("usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)")
477
478
  def fname_corrector(fname, ext):
@@ -482,17 +483,21 @@ def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=F
482
483
  return fname
483
484
  def check_and_modify_filename(directory, filename):
484
485
  base, ext = os.path.splitext(filename)
485
- counter = 2
486
+ counter = 1
486
487
  new_filename = filename
487
488
  while os.path.exists(os.path.join(directory, new_filename)):
488
- new_filename = f"{base}_{counter}{ext}"
489
+ if counter<=9:
490
+ counter_='0'+str(counter)
491
+ else:
492
+ counter_=str(counter)
493
+ new_filename = f"{base}_{counter_}{ext}"
489
494
  counter += 1
490
495
  return new_filename
491
496
  if not isinstance(kind,list):
492
497
  kind=[kind]
493
498
  if isinstance(url, list):
494
499
  for url_ in url:
495
- downloader(url_, dir_save=dir_save, kind=kind, contains=contains, booster=booster,verbose=verbose)
500
+ downloader(url_, dir_save=dir_save, kind=kind, contains=contains, booster=booster,verbose=verbose,timeout=timeout,n_try=n_try,timestamp=timestamp)
496
501
  # sleep(random.uniform(1, 3))
497
502
  for i,k in enumerate(kind):
498
503
  if not k.startswith('.'):
@@ -544,25 +549,45 @@ def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=F
544
549
  fnames = [file_link.split("/")[-1] for file_link in file_links_all]
545
550
  for idx, file_link in enumerate(file_links_all):
546
551
  headers = {"User-Agent": user_agent()}
547
- response = requests.get(file_link, headers=headers)
548
- if response.status_code == 200:
549
- ext = next((ftype for ftype in kind if ftype in file_link), None)
550
- if ext:
551
- corrected_fname = fname_corrector(fnames[idx], ext)
552
- corrected_fname = check_and_modify_filename(dir_save, corrected_fname)
553
- with open(os.path.join(dir_save, corrected_fname), "wb") as file:
554
- file.write(response.content)
555
- if verbose:
556
- print(f"Done! {fnames[idx]}")
557
- else:
558
- if verbose:
559
- print(f"Unknown file type for {file_link}")
560
- else:
561
- if verbose:
562
- print(f"Failed to download file: {response.status_code}")
552
+ itry = 0 # Retry logic with exception handling
553
+ while itry < n_try:
554
+ try:
555
+ # streaming to handle large files and reduce memory usage.
556
+ response = requests.get(file_link, headers=headers, timeout=timeout, stream=True)
557
+ if response.status_code == 200:
558
+ ext = next((ftype for ftype in kind if ftype in file_link), None)
559
+ if ext:
560
+ corrected_fname = fname_corrector(fnames[idx], ext)
561
+ corrected_fname = check_and_modify_filename(dir_save, corrected_fname)
562
+ if timestamp:
563
+ corrected_fname=datetime.now().strftime("%y%m%d_%H%M%S_")+corrected_fname
564
+ fpath_tmp = os.path.join(dir_save, corrected_fname)
565
+ with open(fpath_tmp, "wb") as file:
566
+ for chunk in response.iter_content(chunk_size=8192):
567
+ if chunk: # Filter out keep-alive chunks
568
+ file.write(chunk)
569
+ if verbose:
570
+ print(f"Done! {fnames[idx]}")
571
+ else:
572
+ if verbose:
573
+ print(f"Unknown file type for {file_link}")
574
+ break # Exit the retry loop if successful
575
+ else:
576
+ if verbose:
577
+ print(f"Failed to download file: HTTP status code {response.status_code}")
578
+ except (ChunkedEncodingError, ConnectionError) as e:
579
+ print(f"Attempt {itry+1} failed: {e}. Retrying in a few seconds...")
580
+ # time.sleep(random.uniform(0, 2)) # Random sleep to mitigate server issues
581
+ if os.path.exists(fpath_tmp):
582
+ os.remove(fpath_tmp)
583
+ itry += 1
584
+
585
+ if itry == n_try:
586
+ print(f"Failed to download {file_link} after {n_try} attempts.")
587
+
563
588
  print(f'\n{len(fnames)} files were downloaded:')
564
589
  if verbose:
565
- pp(fnames)
590
+ pp(corrected_fname) if corrected_fname in locals() else pp(fnames)
566
591
  print(f"\n\nsaved @:\n{dir_save}")
567
592
 
568
593
  def find_img(url, driver='request',dir_save="images", rm_folder=False, verbose=True):
py2ls/translator.py CHANGED
@@ -59,7 +59,7 @@ def get_lang_code_iso639():
59
59
  lang_code_iso639=dict([*zip(fullname,shortcut)])
60
60
  return lang_code_iso639
61
61
 
62
- def detect_lang(text, output='lang',verbose=True):
62
+ def detect_lang(text, output='lang',verbose=False):
63
63
  dir_curr_script=os.path.dirname(os.path.abspath(__file__))
64
64
  dir_lang_code=dir_curr_script+"/data/lang_code_iso639.json"
65
65
  with open(dir_lang_code, "r") as file:
@@ -85,7 +85,7 @@ def is_text(s):
85
85
  # no_special = not re.search(r'[^A-Za-z0-9\s]', s)
86
86
  return has_alpha and has_non_alpha
87
87
 
88
- def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR'):
88
+ def strcmp(search_term, candidates, ignore_case=True, verbose=False, scorer='WR'):
89
89
  """
90
90
  Compares a search term with a list of candidate strings and finds the best match based on similarity score.
91
91
 
@@ -392,6 +392,8 @@ def translate(
392
392
  Translate text to the target language using the specified translation method (Google Translate or DeepL).
393
393
  lang_src (str): e.g., 'english', or 'chinese' when there are two languages, then lang_src must be given
394
394
  """
395
+ # error_verbose = verbose or False
396
+
395
397
  if isinstance(text,list):
396
398
  text=merge_text(text)
397
399
  text = replace_text(text)
@@ -508,18 +510,19 @@ def translate_with_retry(
508
510
  lang_src = detect_lang(text)
509
511
  lang_src = get_language_code(language=lang_src)
510
512
  lang = get_language_code(language=lang)
511
- print(f"lang:{lang},lang_src:{lang_src}")
512
513
  try:
513
- print(len(text))
514
514
  return try_translate(text,lang=lang,lang_src=lang_src,user_agent=user_agent,service_url=service_urls[0])
515
515
  except Exception as e:
516
- print("Connection error:", e)
516
+ if error_verbose:
517
+ print("Connection error:", e)
517
518
  try:
518
519
  time.sleep(1)
519
520
  return try_translate(text,lang=lang,lang_src=lang_src,user_agent=user_agent,service_url=service_urls[1])
520
521
  except Exception as e:
521
- print(f"(translate_with_retry):Connection error with {service_urls}: {e}")
522
- print("All service URLs failed. Unable to translate the text.")
522
+ if error_verbose:
523
+ print(f"(translate_with_retry):Connection error with {service_urls}: {e}")
524
+ if error_verbose:
525
+ print("All service URLs failed. Unable to translate the text.")
523
526
  return text
524
527
 
525
528
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py2ls
3
- Version: 0.1.6.7
3
+ Version: 0.1.6.9
4
4
  Summary: py(thon)2(too)ls
5
5
  Author: Jianfeng
6
6
  Author-email: Jianfeng.Liu0413@gmail.com
@@ -134,14 +134,14 @@ py2ls/db2ls.py,sha256=MMfFX47aIPIyu7fU9aPvX9lbPRPYOpJ_VXwlnWk-8qo,13615
134
134
  py2ls/doc.py,sha256=xN3g1OWfoaGUhikbJ0NqbN5eKy1VZVvWwRlhHMgyVEc,4243
135
135
  py2ls/export_requirements.py,sha256=psZtSe-MOD9L_w3dVpA_VJEKfq3J914g3Y1OtRNAb4g,2324
136
136
  py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
137
- py2ls/ips.py,sha256=B88jol_EYZ4IERWAeabkHSwMkGytqyBheEbbpM_YDiQ,86146
138
- py2ls/netfinder.py,sha256=ZsLWGYMeRuGvxj2nqE0Z8ANoaVl18Necfw0HQfh2q7I,45548
137
+ py2ls/ips.py,sha256=KkrkGAF0VQ-N0rH4FQFLyP-C-skY6EPpeO8t_5RngWw,88519
138
+ py2ls/netfinder.py,sha256=aOrgXp2rqpUDREZMlP_875SuAAcQXu3lhnRMk1cPG5M,47269
139
139
  py2ls/plot.py,sha256=8_33-1wpkGZrDUuvRBfTPUi_BRKdf1njoR725OLSLSY,48579
140
140
  py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
141
141
  py2ls/sleep_events_detectors.py,sha256=36MCuRrpurn0Uvzpo3p3b3_JlVsRNHSWCXbJxCGM3mg,51546
142
142
  py2ls/stats.py,sha256=Wd9yCKQ_61QD29WMEgMuEcreFxF91NmlPW65iWT2B5w,39041
143
- py2ls/translator.py,sha256=6S7MmTZmjj8NljVmj0W5uEauu4ePxso3AMf2LvGVRQA,30516
143
+ py2ls/translator.py,sha256=bc5FB-wqC4TtQz9gyCP1mE38HqNRJ_pmuRIgKnAlMzM,30581
144
144
  py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
145
- py2ls-0.1.6.7.dist-info/METADATA,sha256=-gKMv_eCD4spLGvNAidRLyfI07hf1eq_ldQsfFmxlsA,20998
146
- py2ls-0.1.6.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
147
- py2ls-0.1.6.7.dist-info/RECORD,,
145
+ py2ls-0.1.6.9.dist-info/METADATA,sha256=iPwvGzCypApng9Ci3pxCknbx6mek6zOQTy3rWg2VKo4,20998
146
+ py2ls-0.1.6.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
147
+ py2ls-0.1.6.9.dist-info/RECORD,,