PyPI - py2ls - Versions diffs - 0.1.10.12__py3-none-any.whl → 0.2.7.10__py3-none-any.whl - Mend

py2ls 0.1.10.12py3-none-any.whl → 0.2.7.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of py2ls might be problematic. Click here for more details.

Files changed (72) hide show

py2ls/.DS_Store +0 -0
py2ls/.git/.DS_Store +0 -0
py2ls/.git/index +0 -0
py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
py2ls/.git/objects/.DS_Store +0 -0
py2ls/.git/refs/.DS_Store +0 -0
py2ls/ImageLoader.py +621 -0
py2ls/__init__.py +7 -5
py2ls/apptainer2ls.py +3940 -0
py2ls/batman.py +164 -42
py2ls/bio.py +2595 -0
py2ls/cell_image_clf.py +1632 -0
py2ls/container2ls.py +4635 -0
py2ls/corr.py +475 -0
py2ls/data/.DS_Store +0 -0
py2ls/data/email/email_html_template.html +88 -0
py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
py2ls/data/hyper_param_tabrepo_2024.py +1753 -0
py2ls/data/mygenes_fields_241022.txt +355 -0
py2ls/data/re_common_pattern.json +173 -0
py2ls/data/sns_info.json +74 -0
py2ls/data/styles/.DS_Store +0 -0
py2ls/data/styles/example/.DS_Store +0 -0
py2ls/data/styles/stylelib/.DS_Store +0 -0
py2ls/data/styles/stylelib/grid.mplstyle +15 -0
py2ls/data/styles/stylelib/high-contrast.mplstyle +6 -0
py2ls/data/styles/stylelib/high-vis.mplstyle +4 -0
py2ls/data/styles/stylelib/ieee.mplstyle +15 -0
py2ls/data/styles/stylelib/light.mplstyl +6 -0
py2ls/data/styles/stylelib/muted.mplstyle +6 -0
py2ls/data/styles/stylelib/nature-reviews-latex.mplstyle +616 -0
py2ls/data/styles/stylelib/nature-reviews.mplstyle +616 -0
py2ls/data/styles/stylelib/nature.mplstyle +31 -0
py2ls/data/styles/stylelib/no-latex.mplstyle +10 -0
py2ls/data/styles/stylelib/notebook.mplstyle +36 -0
py2ls/data/styles/stylelib/paper.mplstyle +290 -0
py2ls/data/styles/stylelib/paper2.mplstyle +305 -0
py2ls/data/styles/stylelib/retro.mplstyle +4 -0
py2ls/data/styles/stylelib/sans.mplstyle +10 -0
py2ls/data/styles/stylelib/scatter.mplstyle +7 -0
py2ls/data/styles/stylelib/science.mplstyle +48 -0
py2ls/data/styles/stylelib/std-colors.mplstyle +4 -0
py2ls/data/styles/stylelib/vibrant.mplstyle +6 -0
py2ls/data/tiles.csv +146 -0
py2ls/data/usages_pd.json +1417 -0
py2ls/data/usages_sns.json +31 -0
py2ls/docker2ls.py +5446 -0
py2ls/ec2ls.py +61 -0
py2ls/fetch_update.py +145 -0
py2ls/ich2ls.py +1955 -296
py2ls/im2.py +8242 -0
py2ls/image_ml2ls.py +2100 -0
py2ls/ips.py +33909 -3418
py2ls/ml2ls.py +7700 -0
py2ls/mol.py +289 -0
py2ls/mount2ls.py +1307 -0
py2ls/netfinder.py +873 -351
py2ls/nl2ls.py +283 -0
py2ls/ocr.py +1581 -458
py2ls/plot.py +10394 -314
py2ls/rna2ls.py +311 -0
py2ls/ssh2ls.md +456 -0
py2ls/ssh2ls.py +5933 -0
py2ls/ssh2ls_v01.py +2204 -0
py2ls/stats.py +66 -172
py2ls/temp20251124.py +509 -0
py2ls/translator.py +2 -0
py2ls/utils/decorators.py +3564 -0
py2ls/utils_bio.py +3453 -0
{py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/METADATA +113 -224
{py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/RECORD +72 -16
{py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/WHEEL +0 -0

py2ls/stats.py CHANGED Viewed

@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
 import warnings
 warnings.filterwarnings("ignore", category=RuntimeWarning)
+from .ips import df2array
 # FuncStars --v 0.1.1
 def FuncStars(
@@ -37,7 +37,7 @@ def FuncStars(
     report=None,
     report_scale=-0.1,
     report_loc=None,
-    **kwargs
+    **kwargs,
 ):
     if ax is None:
         ax = plt.gca()
@@ -53,7 +53,7 @@ def FuncStars(
     if y_loc is None:
         y_loc = np.min(ylim) + yscale * (np.max(ylim) - np.min(ylim))
     else:
-        y_loc=y_loc+(1-yscale) * np.abs(np.diff(ylim))+0.1 *y_loc
+        y_loc = y_loc + (1 - yscale) * np.abs(np.diff(ylim)) + 0.1 * y_loc
     xcenter = np.mean([x1, x2])
     if pval is not None:
         # ns / *
@@ -79,7 +79,7 @@ def FuncStars(
                 y_loc,
                 symbol,
                 ha="center",
-                va="top",#"center_baseline",
+                va="top",  # "center_baseline",
                 fontsize=fontsize,
                 fontname=fontname,
                 color=symbolcolor,
@@ -90,28 +90,31 @@ def FuncStars(
                 y_loc,
                 symbol * 2,
                 ha="center",
-                va="top",#"center_baseline",
+                va="top",  # "center_baseline",
                 fontsize=fontsize,
                 fontname=fontname,
                 color=symbolcolor,
             )
-        elif 0 < pval <= 0.001:
+        elif 0 <= pval <= 0.001:
             ax.text(
                 xcenter,
                 y_loc,
                 symbol * 3,
                 ha="center",
-                va="top",#"center_baseline",
+                va="top",  # "center_baseline",
                 fontsize=fontsize,
                 fontname=fontname,
                 color=symbolcolor,
             )
         # lines indicators
-        if linego and 0 < pval <= 0.05:
+        if linego and 0 <= pval <= 0.05:
             # horizontal line
             if yscale <= 0.99:
                 ax.plot(
-                    [x1 + np.abs(np.diff(xlim)) * 0.01, x2 - np.abs(np.diff(xlim)) * 0.01],
+                    [
+                        x1 + np.abs(np.diff(xlim)) * 0.01,
+                        x2 - np.abs(np.diff(xlim)) * 0.01,
+                    ],
                     [
                         y_loc - np.abs(np.diff(ylim)) * 0.03,
                         y_loc - np.abs(np.diff(ylim)) * 0.03,
@@ -122,7 +125,10 @@ def FuncStars(
                 )
                 # vertical line
                 ax.plot(
-                    [x1 + np.abs(np.diff(xlim)) * 0.01, x1 + np.abs(np.diff(xlim)) * 0.01],
+                    [
+                        x1 + np.abs(np.diff(xlim)) * 0.01,
+                        x1 + np.abs(np.diff(xlim)) * 0.01,
+                    ],
                     [
                         y_loc - np.abs(np.diff(ylim)) * tailindicator[0],
                         y_loc - np.abs(np.diff(ylim)) * 0.03,
@@ -132,7 +138,10 @@ def FuncStars(
                     linewidth=linewidth,
                 )
                 ax.plot(
-                    [x2 - np.abs(np.diff(xlim)) * 0.01, x2 - np.abs(np.diff(xlim)) * 0.01],
+                    [
+                        x2 - np.abs(np.diff(xlim)) * 0.01,
+                        x2 - np.abs(np.diff(xlim)) * 0.01,
+                    ],
                     [
                         y_loc - np.abs(np.diff(ylim)) * tailindicator[1],
                         y_loc - np.abs(np.diff(ylim)) * 0.03,
@@ -143,7 +152,10 @@ def FuncStars(
                 )
             else:
                 ax.plot(
-                    [x1 + np.abs(np.diff(xlim)) * 0.01, x2 - np.abs(np.diff(xlim)) * 0.01],
+                    [
+                        x1 + np.abs(np.diff(xlim)) * 0.01,
+                        x2 - np.abs(np.diff(xlim)) * 0.01,
+                    ],
                     [
                         np.min(ylim)
                         + 0.95 * (np.max(ylim) - np.min(ylim))
@@ -158,7 +170,10 @@ def FuncStars(
                 )
                 # vertical line
                 ax.plot(
-                    [x1 + np.abs(np.diff(xlim)) * 0.01, x1 + np.abs(np.diff(xlim)) * 0.01],
+                    [
+                        x1 + np.abs(np.diff(xlim)) * 0.01,
+                        x1 + np.abs(np.diff(xlim)) * 0.01,
+                    ],
                     [
                         np.min(ylim)
                         + 0.95 * (np.max(ylim) - np.min(ylim))
@@ -172,7 +187,10 @@ def FuncStars(
                     linewidth=linewidth,
                 )
                 ax.plot(
-                    [x2 - np.abs(np.diff(xlim)) * 0.01, x2 - np.abs(np.diff(xlim)) * 0.01],
+                    [
+                        x2 - np.abs(np.diff(xlim)) * 0.01,
+                        x2 - np.abs(np.diff(xlim)) * 0.01,
+                    ],
                     [
                         np.min(ylim)
                         + 0.95 * (np.max(ylim) - np.min(ylim))
@@ -312,9 +330,7 @@ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
                 )
                 notes_stat = "paired t test"
                 # note: APA FORMAT
-                notes_APA = (
-                    f"t({sum([nX1-1])})={round(stat_value,3)},p={round(pval,3)}"
-                )
+                notes_APA = f"t({sum([nX1-1])})={round(stat_value,3)},p={round(pval,3)}"
         elif cfg_pmc == "non-parametric":
             if "np" in pair:  # Perform Mann-Whitney
                 stat_value, pval = stats.mannwhitneyu(
@@ -324,7 +340,9 @@ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
                 if nX1 == nX2:
                     notes_APA = f"U(n={nX1})={round(stat_value,3)},p={round(pval,3)}"
                 else:
-                    notes_APA = f"U(n1={nX1},n2={nX2})={round(stat_value,3)},p={round(pval,3)}"
+                    notes_APA = (
+                        f"U(n1={nX1},n2={nX2})={round(stat_value,3)},p={round(pval,3)}"
+                    )
             elif "pa" in pair and "np" not in pair:  # Wilcoxon signed-rank test
                 stat_value, pval = stats.wilcoxon(
                     x1, x2, method="exact", nan_policy="omit"
@@ -333,7 +351,9 @@ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
                 if nX1 == nX2:
                     notes_APA = f"Z(n={nX1})={round(stat_value,3)},p={round(pval,3)}"
                 else:
-                    notes_APA = f"Z(n1={nX1},n2={nX2})={round(stat_value,3)},p={round(pval,3)}"
+                    notes_APA = (
+                        f"Z(n1={nX1},n2={nX2})={round(stat_value,3)},p={round(pval,3)}"
+                    )
         # filling output
         output["stat"] = stat_value
@@ -408,7 +428,7 @@ def FuncMultiCmpt(
     subject=None,
     group=None,
     verbose=True,
-    post_hoc=False
+    post_hoc=False,
 ):
     if group is None:
         group = factor
@@ -520,12 +540,16 @@ def FuncMultiCmpt(
         if "np" in cfg_pair:  # 'unpaired'
             res_tab = run_kruskal(data, dv, factor)
             notes_stat = f"Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA"
-            notes_APA = [f'H({res_tab.ddof1[0]},N={data.shape[0]})={round(res_tab.H[0],3)},p={round(res_tab["p-unc"][0],3)}']
+            notes_APA = [
+                f'H({res_tab.ddof1[0]},N={data.shape[0]})={round(res_tab.H[0],3)},p={round(res_tab["p-unc"][0],3)}'
+            ]
         elif "pa" in cfg_pair and "np" not in cfg_pair:  # 'paired'
             res_tab = run_friedman(data, dv, factor, subject, method="chisq")
             notes_stat = f"Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA"
-            notes_APA = [f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0],3)},p={round(res_tab["p-unc"][0],3)}']
+            notes_APA = [
+                f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0],3)},p={round(res_tab["p-unc"][0],3)}'
+            ]
     # =============================================================================
     # # Post-hoc
@@ -542,7 +566,7 @@ def FuncMultiCmpt(
     go_mix_within = factor if ("pa" in cfg_pair) or ("np" not in cfg_pair) else None
     if res_tab["p-unc"][0] <= 0.05:
-        post_hoc=True
+        post_hoc = True
     if post_hoc:
         # Pairwise Comparisons
         method_post_hoc = [
@@ -610,9 +634,9 @@ def FuncMultiCmpt(
     #     # filling output
     # =============================================================================
-    pd.set_option('display.max_columns', None)  # Show all columns
-    pd.set_option('display.max_colwidth', None)  # No limit on column width
-    pd.set_option('display.expand_frame_repr', False)  # Prevent line-wrapping
+    pd.set_option("display.max_columns", None)  # Show all columns
+    pd.set_option("display.max_colwidth", None)  # No limit on column width
+    pd.set_option("display.expand_frame_repr", False)  # Prevent line-wrapping
     output["stat"] = notes_stat
     # print(output['APA'])
@@ -627,7 +651,7 @@ def FuncMultiCmpt(
 def display_output(output: dict):
     if isinstance(output, pd.DataFrame):
         output = output.to_dict(orient="list")
-    # ['res_posthoc', 'stat', 'APA', 'pval', 'res_tab']
+    # ['res_posthoc', 'stat', 'APA', 'pval', 'res_tab']
     # ? show APA
     # print(f"\n\ndisplay stat_output")
@@ -641,7 +665,7 @@ def display_output(output: dict):
     except:
         pass
     try:
-        print(f"APA  ⤵\n{output["APA"][0]}  ⤵\npost-hoc analysis ⤵")
+        print(f"APA  ⤵\n{output['APA'][0]}  ⤵\npost-hoc analysis ⤵")
         display(output["res_posthoc"])
     except:
         pass
@@ -659,14 +683,24 @@ def corr_pair(pair):
 def check_normality(data, verbose=True):
-    stat_shapiro, pval_shapiro = stats.shapiro(data)
-    if pval_shapiro > 0.05:
+    if len(data) <= 5000:
+        # Shapiro-Wilk test is designed to test the normality of a small sample, typically less than 5000 observations.
+        stat_shapiro, pval4norm = stats.shapiro(data)
+        method = "Shapiro-Wilk test"
+    else:
+        from scipy.stats import kstest, zscore
+        data_scaled = zscore(data)  # a standard normal distribution(mean=0,sd=1)
+        stat_kstest, pval4norm = kstest(data_scaled, "norm")
+        method = "Kolmogorov–Smirnov test"
+    if pval4norm >= 0.05:
         Normality = True
     else:
         Normality = False
     if verbose:
+        print(f"'{method}' was used to test for normality")
         (
-            print(f"\n normally distributed\n")
+            print("\nnormally distributed")
             if Normality
             else print(f"\n NOT normally distributed\n")
         )
@@ -705,7 +739,7 @@ def extract_apa(res_tab):
         for irow in range(res_tab.shape[0]):
             note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow],3)},p={round(res_tab["p-unc"][irow],3)}'
             notes_APA.append(note_tmp)
-    elif "DF" in res_tab:
+    elif "DF" in res_tab:
         for irow in range(res_tab.shape[0] - 1):
             note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow],3)},p={round(res_tab["p-unc"][irow],3)}'
             notes_APA.append(note_tmp)
@@ -867,146 +901,6 @@ def df_wide_long(df):
     elif rows > columns:
         return "Long"
-def sort_rows_move_nan(arr, sort=False):
-    # Handle edge cases where all values are NaN
-    if np.all(np.isnan(arr)):
-        return arr  # Return unchanged if the entire array is NaN
-    if sort:
-        # Replace NaNs with a temporary large value for sorting
-        temp_value = (
-            np.nanmax(arr[np.isfinite(arr)]) + 1 if np.any(np.isfinite(arr)) else np.inf
-        )
-        arr_no_nan = np.where(np.isnan(arr), temp_value, arr)
-        # Sort each row
-        sorted_arr = np.sort(arr_no_nan, axis=1)
-        # Move NaNs to the end
-        result_arr = np.where(sorted_arr == temp_value, np.nan, sorted_arr)
-    else:
-        result_rows = []
-        for row in arr:
-            # Separate non-NaN and NaN values
-            non_nan_values = row[~np.isnan(row)]
-            nan_count = np.isnan(row).sum()
-            # Create a new row with non-NaN values followed by NaNs
-            new_row = np.concatenate([non_nan_values, [np.nan] * nan_count])
-            result_rows.append(new_row)
-        # Convert the list of rows back into a 2D NumPy array
-        result_arr = np.array(result_rows)
-    # Remove rows/columns that contain only NaNs
-    clean_arr = result_arr[~np.isnan(result_arr).all(axis=1)]
-    clean_arr_ = clean_arr[:, ~np.isnan(clean_arr).all(axis=0)]
-    return clean_arr_
-def df2array(data: pd.DataFrame, x=None, y=None, hue=None, sort=False):
-    if hue is None:
-        a = []
-        if sort:
-            cat_x=np.sort(data[x].unique().tolist()).tolist()
-        else:
-            cat_x = data[x].unique().tolist()
-        for i, x_ in enumerate(cat_x):
-            new_ = data.loc[data[x] == x_, y].to_list()
-            a = padcat(a, new_, axis=0)
-        return sort_rows_move_nan(a).T
-    else:
-        a = []
-        if sort:
-            cat_x = np.sort(data[x].unique().tolist()).tolist()
-            cat_hue = np.sort(data[hue].unique().tolist()).tolist()
-        else:
-            cat_x = data[x].unique().tolist()
-            cat_hue = data[hue].unique().tolist()
-        for i, x_ in enumerate(cat_x):
-            for j, hue_ in enumerate(cat_hue):
-                new_ = data.loc[(data[x] == x_) & (data[hue] == hue_), y].to_list()
-                a = padcat(a, new_, axis=0)
-        return sort_rows_move_nan(a).T
-def array2df(data: np.ndarray):
-    df = pd.DataFrame()
-    df["group"] = (
-        np.tile(
-            ["group" + str(i) for i in range(1, data.shape[1] + 1)], [data.shape[0], 1]
-        )
-        .reshape(-1, 1, order="F")[:, 0]
-        .tolist()
-    )
-    df["value"] = data.reshape(-1, 1, order="F")
-    return df
-def padcat(*args, fill_value=np.nan, axis=1, order="row"):
-    """
-    Concatenate vectors with padding.
-    Parameters:
-    *args : variable number of list or 1D arrays
-        Input arrays to concatenate.
-    fill_value : scalar, optional
-        The value to use for padding the shorter lists (default is np.nan).
-    axis : int, optional
-        The axis along which to concatenate (0 for rows, 1 for columns, default is 1).
-    order : str, optional
-        The order for flattening when required: "row" or "column" (default is "row").
-    Returns:
-    np.ndarray
-        A 2D array with the input arrays concatenated along the specified axis,
-        padded with fill_value where necessary.
-    """
-    # Set the order for processing
-    if "ro" in order.lower():
-        order = "C"  # row-major order
-    else:
-        order = "F"  # column-major order
-    # Process input arrays based on their dimensions
-    processed_arrays = []
-    for arg in args:
-        arr = np.asarray(arg)
-        if arr.ndim == 1:
-            processed_arrays.append(arr)  # Keep 1D arrays as is
-        elif arr.ndim == 2:
-            if axis == 0:
-                # If concatenating along rows, split 2D arrays into 1D arrays row-wise
-                processed_arrays.extend(arr)
-            elif axis == 1:
-                # If concatenating along columns, split 2D arrays into 1D arrays column-wise
-                processed_arrays.extend(arr.T)
-            else:
-                raise ValueError("axis must be 0 or 1")
-        else:
-            raise ValueError("Input arrays must be 1D or 2D")
-    if axis == 0:
-        # Concatenate along rows
-        max_len = max(arr.size for arr in processed_arrays)
-        result = np.full((len(processed_arrays), max_len), fill_value)
-        for i, arr in enumerate(processed_arrays):
-            result[i, : arr.size] = arr
-    elif axis == 1:
-        # Concatenate along columns
-        max_len = max(arr.size for arr in processed_arrays)
-        result = np.full((max_len, len(processed_arrays)), fill_value)
-        for i, arr in enumerate(processed_arrays):
-            result[: arr.size, i] = arr
-    else:
-        raise ValueError("axis must be 0 or 1")
-    return result
-# # Example usage:
-# a = [1, np.nan]
-# b = [1, 3, 4, np.nan, 2, np.nan]
-# c = [1, 2, 3, 4, 5, 6, 7, 8, 10]
-# d = padcat(a, b)
-# result1 = padcat(d, c)
-# result2 = padcat(a, b, c)
-# print("Result of padcat(d, c):\n", result1)
-# print("Result of padcat(a, b, c):\n", result2)

py2ls 0.1.10.12__py3-none-any.whl → 0.2.7.10__py3-none-any.whl

Potentially problematic release.

py2ls 0.1.10.12py3-none-any.whl → 0.2.7.10py3-none-any.whl