PyPI - py2ls - Versions diffs - 0.2.4.12__py3-none-any.whl → 0.2.4.14__py3-none-any.whl - Mend

py2ls 0.2.4.12py3-none-any.whl → 0.2.4.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

py2ls/ips.py +141 -16
py2ls/ml2ls.py +88 -5
py2ls/netfinder.py +80 -34
py2ls/plot.py +6 -11
{py2ls-0.2.4.12.dist-info → py2ls-0.2.4.14.dist-info}/METADATA +2 -2
{py2ls-0.2.4.12.dist-info → py2ls-0.2.4.14.dist-info}/RECORD +7 -7
{py2ls-0.2.4.12.dist-info → py2ls-0.2.4.14.dist-info}/WHEEL +0 -0

py2ls/ips.py CHANGED Viewed

@@ -6289,6 +6289,12 @@ def df_reducer(
     hue: str = None,  # lda-specific
     scale: bool = True,
     fill_missing: bool = True,
+    size=2,# for plot marker size
+    markerscale=4,# for plot, legend marker size scale
+    edgecolor='none',# for plot,
+    legend_loc='best',# for plot,
+    bbox_to_anchor=None,
+    ncols=1,
     debug: bool = False,
     inplace: bool = True,  # replace the oringinal data
     plot_: bool = False,  # plot scatterplot, but no 'hue',so it is meaningless
@@ -6312,6 +6318,13 @@ def df_reducer(
         "ica": "ica(Independent Component Analysis):\n\tEffective for blind source separation (e.g., EEG, audio signal processing).is generally categorized under Non-linear Dimensionality Reduction, but it also serves a distinct role in Blind Source Separation. While ICA is commonly used for dimensionality reduction, particularly in contexts where data sources need to be disentangled (e.g., separating mixed signals like EEG or audio data), it focuses on finding statistically independent components rather than maximizing variance (like PCA) or preserving distances (like MDS or UMAP). Advantage: Extracts independent signals/components, useful in mixed signal scenarios. Limitation: Assumes statistical independence, sensitive to noise and algorithm choice.",
         #! Anomaly Detection: Specialized for detecting outliers or unusual patterns
         "isolation_forest": "Isolation Forest:\n\tDesigned for anomaly detection, especially in high-dimensional data. Advantage: Effective in detecting outliers, efficient for large datasets. Limitation: Sensitive to contamination ratio parameter, not ideal for highly structured or non-anomalous data.",
+        #! more methods
+        "truncated_svd": "Truncated Singular Value Decomposition (SVD):\n\tEfficient for large sparse datasets, useful for feature reduction in natural language processing (e.g., Latent Semantic Analysis). Advantage: Efficient in memory usage for large datasets. Limitation: Limited in non-linear transformation.",
+        "spectral_embedding": "Spectral Embedding:\n\tBased on graph theory, it can be useful for clustering and visualization, especially for data with connected structures. Advantage: Preserves global structure, good for graph-type data. Limitation: Sensitive to parameter choice, not ideal for arbitrary non-connected data.",
+        "autoencoder": "Autoencoder:\n\tA neural network-based approach for complex feature learning and non-linear dimensionality reduction. Advantage: Can capture very complex relationships. Limitation: Computationally expensive, requires neural network expertise for effective tuning.",
+        "nmf": "Non-negative Matrix Factorization:\n\tEffective for parts-based decomposition, commonly used for sparse and non-negative data, e.g., text data or images. Advantage: Interpretability with non-negativity, efficient with sparse data. Limitation: Less effective for negative or zero-centered data.",
+        "umap_hdbscan": "UMAP + HDBSCAN:\n\tCombination of UMAP for dimensionality reduction and HDBSCAN for density-based clustering, suitable for cluster discovery in high-dimensional data. Advantage: Effective in discovering clusters in embeddings. Limitation: Requires careful tuning of both UMAP and HDBSCAN parameters.",
+        "manifold_learning": "Manifold Learning (Isomap, Hessian LLE, etc.):\n\tMethods designed to capture intrinsic geometrical structure. Advantage: Preserves non-linear relationships in low dimensions. Limitation: Computationally expensive and sensitive to noise."
     }
     from sklearn.preprocessing import StandardScaler
@@ -6322,17 +6335,9 @@ def df_reducer(
         import seaborn as sns
     # Check valid method input
     methods = [
-        "pca",
-        "umap",
-        "tsne",
-        "factor",
-        "isolation_forest",
-        "lda",
-        "kpca",
-        "ica",
-        "mds",
-        "lle",
-        "svd",
+        "pca", "umap",  "umap_hdbscan", "tsne", "factor", "isolation_forest","manifold_learning", "lda", "kpca", "ica",
+        "mds", "lle", "svd", "truncated_svd", "spectral_embedding",
+        # "autoencoder","nmf",
     ]
     method = strcmp(method, methods)[0]
     print(f"\nprocessing with using {dict_methods[method]}:")
@@ -6637,12 +6642,131 @@ def df_reducer(
             index=data.index,
             columns=[f"SVD_{i+1}" for i in range(n_components)],
         )
+        colname_met = "SVD_"
         if hue:
             svd_df[hue] = y
         if debug:
             print("Singular Value Decomposition (SVD) completed.")
+    elif method=="truncated_svd":
+        from sklearn.decomposition import TruncatedSVD
+        svd = TruncatedSVD(n_components=n_components, random_state=random_state)
+        X_reduced = svd.fit_transform(X)
+        reduced_df = pd.DataFrame(
+            X_reduced,
+            columns=[f"SVD Component {i+1}" for i in range(n_components)],
+            index=data.index,
+        )
+        colname_met = "SVD Component "
+        if debug:
+            print("Truncated SVD completed.")
+            print("Explained Variance Ratio:", svd.explained_variance_ratio_)
+        if hue:
+            reduced_df[hue] = y
+    elif method == "spectral_embedding":
+        from sklearn.manifold import SpectralEmbedding
+        spectral = SpectralEmbedding(n_components=n_components, random_state=random_state)
+        X_reduced = spectral.fit_transform(X)
+        reduced_df = pd.DataFrame(
+            X_reduced,
+            columns=[f"Dimension_{i+1}" for i in range(n_components)],
+            index=data.index,
+        )
+        colname_met = "Dimension_"
+        if debug:
+            print("Spectral Embedding completed.")
+        if hue:
+            reduced_df[hue] = y
+    elif method == "autoencoder":
+        from tensorflow.keras.models import Model
+        from tensorflow.keras.layers import Input, Dense
+        input_dim = X.shape[1]
+        input_layer = Input(shape=(input_dim,))
+        encoded = Dense(n_components * 2, activation="relu")(input_layer)
+        encoded = Dense(n_components, activation="relu")(encoded)
+        autoencoder = Model(input_layer, encoded)
+        autoencoder.compile(optimizer="adam", loss="mean_squared_error")
+        autoencoder.fit(X, X, epochs=50, batch_size=256, shuffle=True, verbose=0)
+        X_reduced = autoencoder.predict(X)
+        reduced_df = pd.DataFrame(
+            X_reduced,
+            columns=[f"Score_{i+1}" for i in range(n_components)],
+            index=data.index,
+        )
+        colname_met = "Score_"
+        if debug:
+            print("Autoencoder reduction completed.")
+        if hue:
+            reduced_df[hue] = y
+    elif method == "nmf":
+        from sklearn.decomposition import NMF
+        nmf = NMF(n_components=n_components, random_state=random_state)
+        X_reduced = nmf.fit_transform(X)
+        reduced_df = pd.DataFrame(
+            X_reduced,
+            columns=[f"NMF_{i+1}" for i in range(n_components)],
+            index=data.index,
+        )
+        colname_met = "NMF_"
-    # Return reduced data and info as a new DataFrame with the same index
+        if debug:
+            print("Non-negative Matrix Factorization completed.")
+        if hue:
+            reduced_df[hue] = y
+    elif method == "umap_hdbscan":
+        import umap
+        import hdbscan
+        umap_model = umap.UMAP(
+            n_neighbors=umap_neighbors,
+            min_dist=umap_min_dist,
+            n_components=n_components,
+        )
+        X_umap = umap_model.fit_transform(X)
+        clusterer = hdbscan.HDBSCAN()
+        clusters = clusterer.fit_predict(X_umap)
+        reduced_df = pd.DataFrame(
+            X_umap,
+            columns=[f"UMAP_{i+1}" for i in range(n_components)],
+            index=data.index,
+        )
+        reduced_df["Cluster"] = clusters
+        colname_met = "UMAP_"
+        if debug:
+            print("UMAP + HDBSCAN reduction and clustering completed.")
+        if hue:
+            reduced_df[hue] = y
+    elif method == "manifold_learning":
+        from sklearn.manifold import Isomap
+        isomap = Isomap(n_components=n_components)
+        X_reduced = isomap.fit_transform(X)
+        reduced_df = pd.DataFrame(
+            X_reduced,
+            columns=[f"Manifold_{i+1}" for i in range(n_components)],
+            index=data.index,
+        )
+        colname_met = "Manifold_"
+        if debug:
+            print("Manifold Learning (Isomap) completed.")
+        if hue:
+            reduced_df[hue] = y
+    #! Return reduced data and info as a new DataFrame with the same index
     if method == "pca":
         reduced_df = pca_df
         colname_met = "PC_"
@@ -6699,7 +6823,6 @@ def df_reducer(
     # Quick plots
     if plot_ and (not method in ["isolation_forest"]):
         from .plot import plotxy
         if ax is None:
             if figsize is None:
                 _, ax = plt.subplots(figsize=cm2inch(8, 8))
@@ -6707,16 +6830,18 @@ def df_reducer(
                 _, ax = plt.subplots(figsize=figsize)
         else:
             ax = ax.cla()
+        xlabel = f"{colname_met}1" if xlabel is None else xlabel
+        ylabel = f"{colname_met}2" if ylabel is None else ylabel
         ax = plotxy(
             data=reduced_df,
             x=colname_met + "1",
             y=colname_met + "2",
             hue=hue,
-            s=1,
-            edgecolor="none",
+            s=size,
+            edgecolor=edgecolor,
             kind="scater",
             figsets=dict(
-                legend=dict(loc="best", markerscale=4),
+                legend=dict(loc=legend_loc, markerscale=markerscale,bbox_to_anchor=bbox_to_anchor,ncols=ncols,fontsize=8),
                 xlabel=xlabel if xlabel else None,
                 ylabel=ylabel if ylabel else None,
             ),

py2ls/ml2ls.py CHANGED Viewed

@@ -1298,10 +1298,11 @@ def plot_validate_features_single(res_val, figsize=None):
         mean_auc = res_val["roc_curve"][model_name]["auc"]
         # Plotting
-        plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci, ax=nexttile())
+        plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci,
+            model_name=model_name, ax=nexttile())
         plot.figsets(title=model_name, sp=2)
-        plot_pr_curve(
+        plot_pr_binary(
             recall=res_val["pr_curve"][model_name]["recall"],
             precision=res_val["pr_curve"][model_name]["precision"],
             avg_precision=res_val["pr_curve"][model_name]["avg_precision"],
@@ -1410,7 +1411,6 @@ def plot_roc_curve(
 #     ml2ls.plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci)
 #     figsets(title=model_name)
 def plot_pr_curve(
     recall=None,
     precision=None,
@@ -1436,7 +1436,7 @@ def plot_pr_curve(
         precision,
         lw=lw,
         color=color,
-        label=(f"{model_name} (AUC={avg_precision:.2f})"),
+        label=(f"{model_name} (AP={avg_precision:.2f})"),
         clip_on=False,
         **kwargs,
     )
@@ -1453,7 +1453,6 @@ def plot_pr_curve(
     ax.legend(loc=legend_loc)
     return ax
 # * usage: ml2ls.plot_pr_curve()
 # for md_name in flatten(validation_results["pr_curve"].keys()):
 #     ml2ls.plot_pr_curve(
@@ -1466,7 +1465,91 @@ def plot_pr_curve(
 #         color="r",
 #     )
+def plot_pr_binary(
+    recall=None,
+    precision=None,
+    avg_precision=None,
+    model_name=None,
+    lw=2,
+    figsize=[5, 5],
+    title="Precision-Recall Curve",
+    xlabel="Recall",
+    ylabel="Precision",
+    alpha=0.1,
+    color="#FF8F00",
+    legend_loc="lower left",
+    ax=None,
+    show_avg_precision=False,
+    **kwargs,
+    ):
+    from scipy.interpolate import interp1d
+    if ax is None:
+        fig, ax = plt.subplots(figsize=figsize)
+    model_name = "Binary PR Curve" if model_name is None else model_name
+    #* use sklearn bulitin function 'PrecisionRecallDisplay'?
+    # from sklearn.metrics import PrecisionRecallDisplay
+    # disp = PrecisionRecallDisplay(precision=precision,
+    #                               recall=recall,
+    #                               average_precision=avg_precision,**kwargs)
+    # disp.plot(ax=ax, name=model_name, color=color)
+    # Plot Precision-Recall curve
+    ax.plot(
+        recall,
+        precision,
+        lw=lw,
+        color=color,
+        label=(f"{model_name} (AP={avg_precision:.2f})"),
+        clip_on=False,
+        **kwargs,
+    )
+    # Fill area under the curve
+    ax.fill_between(recall, precision, alpha=alpha, color=color)
+    # Add F1 score iso-contours
+    f_scores = np.linspace(0.2, 0.8, num=4)
+    # for f_score in f_scores:
+    #     x = np.linspace(0.01, 1)
+    #     y = f_score * x / (2 * x - f_score)
+    #     plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=1)
+    #     plt.annotate(f"$f_1={f_score:0.1f}$", xy=(0.8, y[45] + 0.02))
+    pr_boundary = interp1d(recall, precision, kind="linear", fill_value="extrapolate")
+    for f_score in f_scores:
+        x_vals = np.linspace(0.01, 1, 10000)
+        y_vals = f_score * x_vals / (2 * x_vals - f_score)
+        y_vals_clipped = np.minimum(y_vals, pr_boundary(x_vals))
+        y_vals_clipped = np.clip(y_vals_clipped, 1e-3, None)  # Prevent going to zero
+        valid =  y_vals_clipped < pr_boundary(x_vals)
+        valid_ = y_vals_clipped > 1e-3
+        valid = valid&valid_
+        x_vals = x_vals[valid]
+        y_vals_clipped = y_vals_clipped[valid]
+        if len(x_vals) > 0:  # Ensure annotation is placed only if line segment exists
+            ax.plot(x_vals, y_vals_clipped, color="gray", alpha=1)
+            plt.annotate(f"$f_1={f_score:0.1f}$", xy=(0.8, y_vals_clipped[-int(len(y_vals_clipped)*0.35)] + 0.02))
+    # # Plot the average precision line
+    if show_avg_precision:
+        plt.axhline(
+            y=avg_precision,
+            color="red",
+            ls="--",
+            lw=lw,
+            label=f"Avg. precision={avg_precision:.2f}",
+        )
+    # Customize axes
+    ax.set_title(title)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.set_xlim([-0.01, 1.0])
+    ax.set_ylim([0.0, 1.0])
+    ax.grid(False)
+    ax.legend(loc=legend_loc)
+    return ax
 def plot_cm(
     cm,
     labels_name=None,

py2ls/netfinder.py CHANGED Viewed

@@ -1,36 +1,11 @@
 from bs4 import BeautifulSoup
 import requests
-from requests.utils import dict_from_cookiejar
-from requests.exceptions import ChunkedEncodingError, ConnectionError
 import os
-from urllib.parse import urlparse, urljoin
-import base64
 import pandas as pd
-from collections import Counter
-import random
 import logging
-from time import sleep
-import stem.process
-from stem import Signal
-from stem.control import Controller
 import json
-from fake_useragent import UserAgent
-from selenium import webdriver
-from selenium.webdriver.chrome.service import Service
-from selenium.webdriver.common.by import By
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from webdriver_manager.chrome import ChromeDriverManager
-from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
-from pprint import pp
-import mimetypes
-import io
-import matplotlib.pyplot as plt
-from PIL import Image
-from duckduckgo_search import DDGS
-from datetime import datetime
 import time
+from selenium.webdriver.common.by import By
 from . import ips
 dir_save = "/Users/macjianfeng/Dropbox/Downloads/"
@@ -56,6 +31,8 @@ def user_agent(
     verbose=False,
     os=["windows", "macos", "linux"],
 ):
+    from fake_useragent import UserAgent
     ua = UserAgent(browsers=browsers, platforms=platforms, os=os)
     output_ua = ua.random
     if verbose:
@@ -109,6 +86,8 @@ def get_attr(content, where=None, attr=None, **kwargs):
             else:
                 print(f"The attribute '{attr}' is not found in the elements.")
         else:
+            from pprint import pp
             print(f"Cannot find tag '{where}' in the content.")
             print("Available tags:")
             pp(all_tags)
@@ -237,6 +216,8 @@ def flatten_json(y):
 def get_proxy():
+    import random
     list_ = []
     headers = {"User-Agent": user_agent()}
     response = requests.get(
@@ -275,6 +256,8 @@ def get_cookies(url, login={"username": "your_username", "password": "your_passw
 ### 更加平滑地移动鼠标, 这样更容易反爬
 def scroll_smth_steps(driver, scroll_pause=0.5, min_step=200, max_step=600):
+    import random
     """Smoothly scrolls down the page to trigger lazy loading."""
     current_scroll_position = 0
     end_of_page = driver.execute_script("return document.body.scrollHeight")
@@ -383,7 +366,7 @@ def fetch_all(
             if response.status_code == 403:
                 logger.warning("403 Forbidden error. Retrying...")
                 # Retry the request after a short delay
-                sleep(random.uniform(1, 3))
+                time.sleep(random.uniform(1, 3))
                 response = requests.get(
                     url, headers=headers, proxies=proxies_glob, timeout=30, stream=True
                 )
@@ -410,6 +393,18 @@ def fetch_all(
                 logger.warning("Unsupported content type")
                 return None, None
         elif "se" in driver.lower():
+            import random
+            from selenium import webdriver
+            from selenium.webdriver.chrome.service import Service
+            from selenium.webdriver.common.by import By
+            from selenium.webdriver.chrome.options import Options
+            from selenium.webdriver.support.ui import WebDriverWait
+            from selenium.webdriver.support import expected_conditions as EC
+            from webdriver_manager.chrome import ChromeDriverManager
+            from selenium.webdriver.common.desired_capabilities import (
+                DesiredCapabilities,
+            )
             chrome_options = Options()
             chrome_options.add_argument("--headless")
             chrome_options.add_argument("--no-sandbox")
@@ -501,7 +496,7 @@ def fetch_all(
                 content = BeautifulSoup(page_source, "html.parser")
                 if content and content.find_all(by):
                     break
-                sleep(
+                time.sleep(
                     random.uniform(2, 4)
                 )  # Wait for a random time before polling again
@@ -575,6 +570,8 @@ def fetch_all(
 #     else:
 #         return None
 def find_links(url, driver="request", booster=False):
+    from urllib.parse import urlparse, urljoin
     links_href, cond_ex = [], ["javascript:", "mailto:", "tel:", "fax:"]
     content_type, soup = fetch_all(url, driver=driver)
@@ -615,6 +612,8 @@ def find_links(url, driver="request", booster=False):
 # To determine which links are related to target domains(e.g., pages) you are interested in
 def filter_links(links, contains="html", driver="requ", booster=False):
+    from urllib.parse import urlparse, urljoin
     filtered_links = []
     if isinstance(contains, str):
         contains = [contains]
@@ -631,6 +630,9 @@ def filter_links(links, contains="html", driver="requ", booster=False):
 def find_domain(links):
+    from urllib.parse import urlparse, urljoin
+    from collections import Counter
     if not links:
         return None
     domains = [urlparse(link).netloc for link in links]
@@ -685,6 +687,8 @@ def pdf_detector(url, contains=None, dir_save=None, booster=False):
             pdf_links = filter_links(links=links_all, contains=["pdf"])
     if pdf_links:
+        from pprint import pp
         pp(f"pdf detected{pdf_links}")
     else:
         print("no pdf file")
@@ -719,6 +723,9 @@ def downloader(
     n_try=3,
     timestamp=False,
 ):
+    from requests.exceptions import ChunkedEncodingError, ConnectionError
     if verbose:
         print(
             "usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)"
@@ -742,14 +749,14 @@ def downloader(
                 counter_ = str(counter)
             new_filename = f"{base}_{counter_}{ext}"
             counter += 1
-        return new_filename
+        return new_filename
     if url.startswith("ftp"):
         import urllib.request
         if dir_save is None:
-            dir_save = "./"
-        dir_save+= os.path.basename(url)
+            dir_save = "./"
+        dir_save += os.path.basename(url)
         print(dir_save)
         urllib.request.urlretrieve(url, dir_save)
         print(f"Downloaded file to: {dir_save}")
@@ -807,6 +814,8 @@ def downloader(
                 file_links = filter_links(links_all, contains=kind_)
         if verbose:
             if file_links:
+                from pprint import pp
                 print("Files detected:")
                 pp(file_links)
             else:
@@ -845,6 +854,8 @@ def downloader(
                                 dir_save, corrected_fname
                             )
                             if timestamp:
+                                from datetime import datetime
                                 corrected_fname = (
                                     datetime.now().strftime("%y%m%d_%H%M%S_")
                                     + corrected_fname
@@ -878,6 +889,8 @@ def downloader(
         # print(f"\n{len(fnames)} files were downloaded:")
         if verbose:
+            from pprint import pp
             if corrected_fname:
                 pp(corrected_fname)
                 print(f"\n\nsaved @:\n{dir_save}")
@@ -896,6 +909,9 @@ def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=
     Returns:
         str: HTML content with updated image URLs pointing to local files.
     """
+    from urllib.parse import urlparse, urljoin
+    import base64
     if rm_folder:
         ips.rm_folder(dir_save)
     content_type, content = fetch_all(url, driver=driver)
@@ -961,6 +977,9 @@ def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=
 def svg_to_png(svg_file):
+    import io
+    from PIL import Image
     with WandImage(filename=svg_file, resolution=300) as img:
         img.format = "png"
         png_image = img.make_blob()
@@ -1026,6 +1045,16 @@ def fetch_selenium(
     iframe_name=None,  # Add option to handle iframe
     **kwargs,
 ):
+    import random
+    from selenium import webdriver
+    from selenium.webdriver.chrome.service import Service
+    from selenium.webdriver.common.by import By
+    from selenium.webdriver.chrome.options import Options
+    from selenium.webdriver.support.ui import WebDriverWait
+    from selenium.webdriver.support import expected_conditions as EC
+    from webdriver_manager.chrome import ChromeDriverManager
+    from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
     chrome_options = Options()
     chrome_options.add_argument("--headless")
     chrome_options.add_argument("--no-sandbox")
@@ -1085,7 +1114,7 @@ def fetch_selenium(
             if attempt == retry - 1:
                 logger.error("Failed to fetch the content after all retries")
                 return []
-        sleep(random.uniform(1, 3))
+        time.sleep(random.uniform(1, 3))
     # Return empty list if nothing found after all retries
     return []
@@ -1102,6 +1131,9 @@ def fetch(
     output="text",
     **kws,
 ):
+    import random
+    from urllib.parse import urlparse, urljoin
     if "xt" in output.lower():
         for attempt in range(retry):
             if verbose and attempt == 0:
@@ -1127,7 +1159,7 @@ def fetch(
             else:
                 if texts:
                     break
-                sleep(random.uniform(0.5, 1.5))
+                time.sleep(random.uniform(0.5, 1.5))
         if isinstance(texts, pd.core.frame.DataFrame):
             condition_ = [texts.empty, booster]
         else:
@@ -1453,6 +1485,8 @@ def isa(fpath, kind="img"):
 def is_image(fpath):
+    import mimetypes
     mime_type, _ = mimetypes.guess_type(fpath)
     if mime_type and mime_type.startswith("image"):
         return True
@@ -1461,6 +1495,8 @@ def is_image(fpath):
 def is_document(fpath):
+    import mimetypes
     mime_type, _ = mimetypes.guess_type(fpath)
     if mime_type and (
         mime_type.startswith("text/")
@@ -1481,6 +1517,8 @@ def is_document(fpath):
 def is_zip(fpath):
+    import mimetypes
     mime_type, _ = mimetypes.guess_type(fpath)
     if mime_type == "application/zip":
         return True
@@ -1500,6 +1538,8 @@ def search(
 ):
     if "te" in kind.lower():
+        from duckduckgo_search import DDGS
         results = DDGS().text(query, max_results=limit)
         res = pd.DataFrame(results)
         res.rename(columns={"href": "links"}, inplace=True)
@@ -1517,6 +1557,8 @@ def search(
 def echo(query, model="gpt", verbose=True, log=True, dir_save=dir_save):
+    from duckduckgo_search import DDGS
     def is_in_any(str_candi_short, str_full, ignore_case=True):
         if isinstance(str_candi_short, str):
             str_candi_short = [str_candi_short]
@@ -1545,8 +1587,12 @@ def echo(query, model="gpt", verbose=True, log=True, dir_save=dir_save):
     model_valid = valid_mod_name(model)
     res = DDGS().chat(query, model=model_valid)
     if verbose:
+        from pprint import pp
         pp(res)
     if log:
+        from datetime import datetime
         dt_str = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d_%H:%M:%S")
         res_ = f"###{dt_str}\n\n>{res}\n"
         os.makedirs(dir_save, exist_ok=True)

py2ls/plot.py CHANGED Viewed

@@ -735,7 +735,10 @@ def catplot(data, *args, **kwargs):
             bx_opt["EdgeColor"] = "none"
         else:
             bx_opt["EdgeColor"] = bx_opt["EdgeColor"]
+        if not isinstance(bx_opt["FaceColor"], list):
+            bx_opt["FaceColor"]=[bx_opt["FaceColor"]]
+        if len(bxp["boxes"])!= len(bx_opt["FaceColor"]) and (len(bx_opt["FaceColor"])==1):
+            bx_opt["FaceColor"]=bx_opt["FaceColor"] *len(bxp["boxes"])
         for patch, color in zip(bxp["boxes"], bx_opt["FaceColor"]):
             patch.set_facecolor(to_rgba(color, bx_opt["FaceAlpha"]))
@@ -2315,16 +2318,8 @@ def split_legend(ax, n=2, loc=None, title=None, bbox=None, ncol=1, **kwargs):
     return legends
-def get_colors(
-    n: int = 1,
-    cmap: str = "auto",
-    by: str = "start",
-    alpha: float = 1.0,
-    output: str = "hue",
-    *args,
-    **kwargs,
-):
-    return get_color(n, cmap, alpha, output, *args, **kwargs)
+def get_colors(n: int = 1,cmap: str = "auto",by: str = "start",alpha: float = 1.0,output: str = "hue",*args,**kwargs):
+    return get_color(n=n, cmap=cmap, alpha=alpha, output=output, *args, **kwargs)
 def get_color(

{py2ls-0.2.4.12.dist-info → py2ls-0.2.4.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: py2ls
-Version: 0.2.4.12
+Version: 0.2.4.14
 Summary: py(thon)2(too)ls
 Author: Jianfeng
 Author-email: Jianfeng.Liu0413@gmail.com
@@ -132,7 +132,7 @@ Requires-Dist: nltk (>=3.8.1)
 Requires-Dist: numba (>=0.59.1)
 Requires-Dist: numcodecs (>=0.13.0)
 Requires-Dist: numerizer (>=0.2.3)
-Requires-Dist: numpy (>=1.26.4)
+Requires-Dist: numpy (>=1.26.4,<2.0.0)
 Requires-Dist: onnxruntime (>=1.18.1)
 Requires-Dist: opencv-contrib-python (>=4.10.0.84)
 Requires-Dist: opencv-python (>=4.10.0.84)

{py2ls-0.2.4.12.dist-info → py2ls-0.2.4.14.dist-info}/RECORD RENAMED Viewed

@@ -234,17 +234,17 @@ py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,
 py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
 py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
 py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
-py2ls/ips.py,sha256=eXDteBS2ODd4qOjKhEQAgvLWerPXOjBvIe1kHQnI-Ww,265294
-py2ls/ml2ls.py,sha256=DPVbitW1Z-YwMXl6DR4ciB-OoCHFMNv5oWnEIi918LA,109898
+py2ls/ips.py,sha256=O2QdLo6-vPbHvWtlVdtMA49LAn2y0CNVM27cxLbqqYA,271496
+py2ls/ml2ls.py,sha256=LovnWDV9ptdWuWwJF5EEdf3sGY4EniGBBNxRJJbzStw,112784
 py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
-py2ls/netfinder.py,sha256=RJFr80tGEJiuwEx99IBOhI5-ZuXnPdWnGUYpF7XCEwI,56426
+py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
 py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
-py2ls/plot.py,sha256=5eoCgyQ7Bi4HyB60nrWdOh0tEJJEkLlFHfxM6ydT9PA,171262
+py2ls/plot.py,sha256=X0R1KK_UTdeJazjnqTqYvP-uWu6wY8szQHyJMsDDz2s,171515
 py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
 py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso68,52145
 py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
 py2ls/translator.py,sha256=zBeq4pYZeroqw3DT-5g7uHfVqKd-EQptT6LJ-Adi8JY,34244
 py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
-py2ls-0.2.4.12.dist-info/METADATA,sha256=mvipE6Wd7de3FX-AU3u2IV9oL3zTQuAbcn8yhLcRA_4,20039
-py2ls-0.2.4.12.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-py2ls-0.2.4.12.dist-info/RECORD,,
+py2ls-0.2.4.14.dist-info/METADATA,sha256=SSjNh_FXmxwIF_Xx2fZvSGKZaX997x4sfJxUQckMuGY,20046
+py2ls-0.2.4.14.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+py2ls-0.2.4.14.dist-info/RECORD,,

{py2ls-0.2.4.12.dist-info → py2ls-0.2.4.14.dist-info}/WHEEL RENAMED Viewed

File without changes

py2ls 0.2.4.12__py3-none-any.whl → 0.2.4.14__py3-none-any.whl

py2ls 0.2.4.12py3-none-any.whl → 0.2.4.14py3-none-any.whl