PyPI - py2ls - Versions diffs - 0.1.6.7__py3-none-any.whl → 0.1.6.9__py3-none-any.whl - Mend

py2ls 0.1.6.7py3-none-any.whl → 0.1.6.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

py2ls/ips.py +72 -11
py2ls/netfinder.py +50 -25
py2ls/translator.py +10 -7
{py2ls-0.1.6.7.dist-info → py2ls-0.1.6.9.dist-info}/METADATA +1 -1
{py2ls-0.1.6.7.dist-info → py2ls-0.1.6.9.dist-info}/RECORD +6 -6
{py2ls-0.1.6.7.dist-info → py2ls-0.1.6.9.dist-info}/WHEEL +0 -0

py2ls/ips.py CHANGED Viewed

@@ -841,7 +841,48 @@ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
 # dir_pdf = "/Users/macjianfeng/Dropbox/github/python/240308_Python Data Science Handbook.pdf"
 # df_page = pdf2img(dir_pdf, page=[1, 5],dpi=300)
+def get_encoding(fpath, alternative_encodings=None, verbose=False):
+    """
+    Attempt to determine the encoding of a file by trying multiple encodings.
+    Parameters:
+    fpath (str): The path to the file.
+    alternative_encodings (list): List of encodings to try. If None, uses a default list.
+    verbose (bool): If True, print detailed information about each attempted encoding.
+    Returns:
+    str: The encoding that successfully read the file, or None if no encoding worked.
+    """
+    if alternative_encodings is None:
+        alternative_encodings = [
+            'utf-8', 'latin1', 'windows-1252', 'iso-8859-1',
+            'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5',
+            'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-9',
+            'windows-1250', 'windows-1251', 'windows-1253', 'windows-1254',
+            'windows-1255', 'windows-1256', 'windows-1257', 'windows-1258',
+            'big5', 'gb18030', 'shift_jis', 'euc_jp', 'koi8_r',
+            'mac_roman', 'mac_central_europe', 'mac_greek', 'mac_cyrillic',
+            'mac_arabic', 'mac_hebrew'
+        ]
+    if not os.path.isfile(fpath):
+        raise FileNotFoundError(f"The file {fpath} does not exist.")
+    for enc in alternative_encodings:
+        try:
+            with open(fpath, mode='r', encoding=enc) as file:
+                file.read()  # Try to read the file
+            if verbose:
+                print(f"Successfully detected encoding: {enc}")
+            return enc
+        except UnicodeDecodeError:
+            if verbose:
+                print(f"Failed to decode with encoding: {enc}")
+            continue
+    # If no encoding worked
+    print("No suitable encoding found.")
+    return None
 def fload(fpath, kind=None, **kwargs):
@@ -979,7 +1020,7 @@ def fload(fpath, kind=None, **kwargs):
     elif kind == "ipynb":
         return load_ipynb(fpath, **kwargs)
     elif kind == "pdf":
-        print('usage:load_pdf(fpath, page="all", verbose=False)')
+        # print('usage:load_pdf(fpath, page="all", verbose=False)')
         return load_pdf(fpath, **kwargs)
     elif kind.lower() in img_types:
         print(f'Image ".{kind}" is loaded.')
@@ -1022,15 +1063,30 @@ def fupdate(fpath, content=None):
     with open(fpath, 'w') as file:
         file.write(content)
         file.write(old_content)
+def fappend(fpath, content=None):
+    """
+    append new content at the end.
+    """
+    content = content or ""
+    if os.path.exists(fpath):
+        with open(fpath, 'r') as file:
+            old_content = file.read()
+    else:
+        old_content = ''
+    with open(fpath, 'w') as file:
+        file.write(old_content)
+        file.write(content)
 def fsave(
     fpath,
     content,
+    mode='w',
+    how ='overwrite',
     kind=None,
     font_name="Times",
     font_size=10,
     spacing=6,
-    mode='w',
     **kwargs,
 ):
     """
@@ -1046,9 +1102,14 @@ def fsave(
     Returns:
         None
     """
-    def save_content(fpath, content, mode=mode):
-        with open(fpath, mode, encoding='utf-8') as file:
-            file.write(content)
+    def save_content(fpath, content, mode=mode, how='overwrite'):
+        if 'wri' in how.lower():
+            with open(fpath, mode, encoding='utf-8') as file:
+                file.write(content)
+        elif 'upd' in how.lower():
+            fupdate(fpath, content=content)
+        elif 'app' in how.lower():
+            fappend(fpath, content=content)
     def save_docx(fpath, content, font_name, font_size, spacing):
@@ -1109,16 +1170,16 @@ def fsave(
         for i, part in enumerate(parts):
             if i % 2 == 0:
                 # Even index: markdown content
-                cells.append(nbf.v4.new_markdown_cell(part.strip()))
+                cells.append(nbformat.v4.new_markdown_cell(part.strip()))
             else:
                 # Odd index: code content
-                cells.append(nbf.v4.new_code_cell(part.strip()))
+                cells.append(nbformat.v4.new_code_cell(part.strip()))
         # Create a new notebook
         nb = nbformat.v4.new_notebook()
         nb['cells'] = cells
         # Write the notebook to a file
         with open(fpath, 'w', encoding='utf-8') as ipynb_file:
-            nbf.write(fpath, ipynb_file)
+            nbformat.write(nb, ipynb_file)
     # def save_json(fpath, data, **kwargs):
     #     with open(fpath, "w") as file:
@@ -1330,7 +1391,7 @@ def listdir(
     ascending=True,
     contains=None,
     orient="list",
-    output="df"
+    output="df" # 'list','dict','records','index','series'
 ):
     if not kind.startswith("."):
         kind = "." + kind
@@ -1432,7 +1493,7 @@ def list_func(lib_name, opt="call"):
 def func_list(lib_name, opt="call"):
     return list_func(lib_name, opt=opt)
-def newfolder(*args, **kwargs):
+def mkdir(*args, **kwargs):
     """
     newfolder(pardir, chdir)
     Args:
@@ -1444,7 +1505,7 @@ def newfolder(*args, **kwargs):
     """
     overwrite=kwargs.get("overwrite",False)
     for arg in args:
-        if isinstance(arg, str):
+        if isinstance(arg, (str,list)):
             if "/" in arg or "\\" in arg:
                 pardir=arg
                 print(f'pardir{pardir}')

py2ls/netfinder.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from bs4 import BeautifulSoup
 import requests
 from requests.utils import dict_from_cookiejar
+from requests.exceptions import ChunkedEncodingError, ConnectionError
 import os
 from urllib.parse import urlparse, urljoin
 import base64
@@ -150,7 +151,7 @@ def flatten_json(y):
 def get_proxy():
     list_ = []
     headers = {"User-Agent": user_agent()}
-    response = requests.get("https://free-proxy-list.net", headers=headers)
+    response = requests.get("https://free-proxy-list.net", headers=headers,timeout=30,stream=True)
     content = BeautifulSoup(response.content, "html.parser")
     info = extract_text_from_content(content, where="td", extend=0)[0].split()
     count, pair_proxy = 0, 2
@@ -200,18 +201,18 @@ def fetch_all(url, parser="lxml", driver='request', # request or selenium
         headers = {"User-Agent": user_agent()}
         if 'req' in driver.lower():
-            response = requests.get(url, headers=headers,proxies=proxies_glob)
+            response = requests.get(url, headers=headers,proxies=proxies_glob,timeout=30,stream=True)
             # If the response is a redirect, follow it
             while response.is_redirect:
                 logger.info(f"Redirecting to: {response.headers['Location']}")
-                response = requests.get(response.headers["Location"], headers=headers,proxies=proxies_glob)
+                response = requests.get(response.headers["Location"], headers=headers,proxies=proxies_glob,timeout=30,stream=True)
             # Check for a 403 error
             if response.status_code == 403:
                 logger.warning("403 Forbidden error. Retrying...")
                 # Retry the request after a short delay
                 sleep(random.uniform(1, 3))
-                response = requests.get(url, headers=headers,proxies=proxies_glob)
+                response = requests.get(url, headers=headers,proxies=proxies_glob,timeout=30,stream=True)
                 # Raise an error if retry also fails
                 response.raise_for_status()
@@ -471,7 +472,7 @@ def pdf_detector(url, contains = None, dir_save = None, booster = False):
             idx += 1
         print(f'{len(fnames)} files are downloaded:\n{fnames}\n to local: \n{dir_save}')
-def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=False, booster=False,verbose=True):
+def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=False, booster=False,verbose=True, timeout=30, n_try=3,timestamp=False):
     if verbose:
         print("usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)")
     def fname_corrector(fname, ext):
@@ -482,17 +483,21 @@ def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=F
         return fname
     def check_and_modify_filename(directory, filename):
         base, ext = os.path.splitext(filename)
-        counter = 2
+        counter = 1
         new_filename = filename
         while os.path.exists(os.path.join(directory, new_filename)):
-            new_filename = f"{base}_{counter}{ext}"
+            if counter<=9:
+                counter_='0'+str(counter)
+            else:
+                counter_=str(counter)
+            new_filename = f"{base}_{counter_}{ext}"
             counter += 1
         return new_filename
     if not isinstance(kind,list):
         kind=[kind]
     if isinstance(url, list):
         for url_ in url:
-            downloader(url_, dir_save=dir_save, kind=kind, contains=contains, booster=booster,verbose=verbose)
+            downloader(url_, dir_save=dir_save, kind=kind, contains=contains, booster=booster,verbose=verbose,timeout=timeout,n_try=n_try,timestamp=timestamp)
             # sleep(random.uniform(1, 3))
     for i,k in enumerate(kind):
         if not k.startswith('.'):
@@ -544,25 +549,45 @@ def downloader(url, dir_save=dir_save, kind=['.pdf'], contains=None, rm_folder=F
         fnames = [file_link.split("/")[-1] for file_link in file_links_all]
         for idx, file_link in enumerate(file_links_all):
             headers = {"User-Agent": user_agent()}
-            response = requests.get(file_link, headers=headers)
-            if response.status_code == 200:
-                ext = next((ftype for ftype in kind if ftype in file_link), None)
-                if ext:
-                    corrected_fname = fname_corrector(fnames[idx], ext)
-                    corrected_fname = check_and_modify_filename(dir_save, corrected_fname)
-                    with open(os.path.join(dir_save, corrected_fname), "wb") as file:
-                        file.write(response.content)
-                    if verbose:
-                        print(f"Done! {fnames[idx]}")
-                else:
-                    if verbose:
-                        print(f"Unknown file type for {file_link}")
-            else:
-                if verbose:
-                    print(f"Failed to download file: {response.status_code}")
+            itry = 0 # Retry logic with exception handling
+            while itry < n_try:
+                try:
+                    # streaming to handle large files and reduce memory usage.
+                    response = requests.get(file_link, headers=headers, timeout=timeout, stream=True)
+                    if response.status_code == 200:
+                        ext = next((ftype for ftype in kind if ftype in file_link), None)
+                        if ext:
+                            corrected_fname = fname_corrector(fnames[idx], ext)
+                            corrected_fname = check_and_modify_filename(dir_save, corrected_fname)
+                            if timestamp:
+                                corrected_fname=datetime.now().strftime("%y%m%d_%H%M%S_")+corrected_fname
+                            fpath_tmp = os.path.join(dir_save, corrected_fname)
+                            with open(fpath_tmp, "wb") as file:
+                                for chunk in response.iter_content(chunk_size=8192):
+                                    if chunk:  # Filter out keep-alive chunks
+                                        file.write(chunk)
+                            if verbose:
+                                print(f"Done! {fnames[idx]}")
+                        else:
+                            if verbose:
+                                print(f"Unknown file type for {file_link}")
+                        break  # Exit the retry loop if successful
+                    else:
+                        if verbose:
+                            print(f"Failed to download file: HTTP status code {response.status_code}")
+                except (ChunkedEncodingError, ConnectionError) as e:
+                    print(f"Attempt {itry+1} failed: {e}. Retrying in a few seconds...")
+                    # time.sleep(random.uniform(0, 2))  # Random sleep to mitigate server issues
+                    if os.path.exists(fpath_tmp):
+                                os.remove(fpath_tmp)
+                    itry += 1
+            if itry == n_try:
+                print(f"Failed to download {file_link} after {n_try} attempts.")
         print(f'\n{len(fnames)} files were downloaded:')
         if verbose:
-            pp(fnames)
+            pp(corrected_fname) if corrected_fname in locals() else pp(fnames)
             print(f"\n\nsaved @:\n{dir_save}")
 def find_img(url, driver='request',dir_save="images", rm_folder=False, verbose=True):

py2ls/translator.py CHANGED Viewed

@@ -59,7 +59,7 @@ def get_lang_code_iso639():
     lang_code_iso639=dict([*zip(fullname,shortcut)])
     return lang_code_iso639
-def detect_lang(text, output='lang',verbose=True):
+def detect_lang(text, output='lang',verbose=False):
     dir_curr_script=os.path.dirname(os.path.abspath(__file__))
     dir_lang_code=dir_curr_script+"/data/lang_code_iso639.json"
     with open(dir_lang_code, "r") as file:
@@ -85,7 +85,7 @@ def is_text(s):
     # no_special = not re.search(r'[^A-Za-z0-9\s]', s)
     return has_alpha and has_non_alpha
-def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR'):
+def strcmp(search_term, candidates, ignore_case=True, verbose=False, scorer='WR'):
     """
     Compares a search term with a list of candidate strings and finds the best match based on similarity score.
@@ -392,6 +392,8 @@ def translate(
     Translate text to the target language using the specified translation method (Google Translate or DeepL).
     lang_src (str): e.g., 'english', or 'chinese' when there are two languages, then lang_src must be given
     """
+    # error_verbose = verbose or False
     if isinstance(text,list):
         text=merge_text(text)
     text = replace_text(text)
@@ -508,18 +510,19 @@ def translate_with_retry(
         lang_src = detect_lang(text)
         lang_src = get_language_code(language=lang_src)
     lang = get_language_code(language=lang)
-    print(f"lang:{lang},lang_src:{lang_src}")
     try:
-        print(len(text))
         return try_translate(text,lang=lang,lang_src=lang_src,user_agent=user_agent,service_url=service_urls[0])
     except Exception as e:
-        print("Connection error:", e)
+        if error_verbose:
+            print("Connection error:", e)
         try:
             time.sleep(1)
             return try_translate(text,lang=lang,lang_src=lang_src,user_agent=user_agent,service_url=service_urls[1])
         except Exception as e:
-            print(f"(translate_with_retry):Connection error with {service_urls}: {e}")
-        print("All service URLs failed. Unable to translate the text.")
+            if error_verbose:
+                print(f"(translate_with_retry):Connection error with {service_urls}: {e}")
+        if error_verbose:
+            print("All service URLs failed. Unable to translate the text.")
         return text

{py2ls-0.1.6.7.dist-info → py2ls-0.1.6.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: py2ls
-Version: 0.1.6.7
+Version: 0.1.6.9
 Summary: py(thon)2(too)ls
 Author: Jianfeng
 Author-email: Jianfeng.Liu0413@gmail.com

{py2ls-0.1.6.7.dist-info → py2ls-0.1.6.9.dist-info}/RECORD RENAMED Viewed

@@ -134,14 +134,14 @@ py2ls/db2ls.py,sha256=MMfFX47aIPIyu7fU9aPvX9lbPRPYOpJ_VXwlnWk-8qo,13615
 py2ls/doc.py,sha256=xN3g1OWfoaGUhikbJ0NqbN5eKy1VZVvWwRlhHMgyVEc,4243
 py2ls/export_requirements.py,sha256=psZtSe-MOD9L_w3dVpA_VJEKfq3J914g3Y1OtRNAb4g,2324
 py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
-py2ls/ips.py,sha256=B88jol_EYZ4IERWAeabkHSwMkGytqyBheEbbpM_YDiQ,86146
-py2ls/netfinder.py,sha256=ZsLWGYMeRuGvxj2nqE0Z8ANoaVl18Necfw0HQfh2q7I,45548
+py2ls/ips.py,sha256=KkrkGAF0VQ-N0rH4FQFLyP-C-skY6EPpeO8t_5RngWw,88519
+py2ls/netfinder.py,sha256=aOrgXp2rqpUDREZMlP_875SuAAcQXu3lhnRMk1cPG5M,47269
 py2ls/plot.py,sha256=8_33-1wpkGZrDUuvRBfTPUi_BRKdf1njoR725OLSLSY,48579
 py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
 py2ls/sleep_events_detectors.py,sha256=36MCuRrpurn0Uvzpo3p3b3_JlVsRNHSWCXbJxCGM3mg,51546
 py2ls/stats.py,sha256=Wd9yCKQ_61QD29WMEgMuEcreFxF91NmlPW65iWT2B5w,39041
-py2ls/translator.py,sha256=6S7MmTZmjj8NljVmj0W5uEauu4ePxso3AMf2LvGVRQA,30516
+py2ls/translator.py,sha256=bc5FB-wqC4TtQz9gyCP1mE38HqNRJ_pmuRIgKnAlMzM,30581
 py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
-py2ls-0.1.6.7.dist-info/METADATA,sha256=-gKMv_eCD4spLGvNAidRLyfI07hf1eq_ldQsfFmxlsA,20998
-py2ls-0.1.6.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-py2ls-0.1.6.7.dist-info/RECORD,,
+py2ls-0.1.6.9.dist-info/METADATA,sha256=iPwvGzCypApng9Ci3pxCknbx6mek6zOQTy3rWg2VKo4,20998
+py2ls-0.1.6.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+py2ls-0.1.6.9.dist-info/RECORD,,

{py2ls-0.1.6.7.dist-info → py2ls-0.1.6.9.dist-info}/WHEEL RENAMED Viewed

File without changes

py2ls 0.1.6.7__py3-none-any.whl → 0.1.6.9__py3-none-any.whl

py2ls 0.1.6.7py3-none-any.whl → 0.1.6.9py3-none-any.whl