PyPI - py2ls - Versions diffs - 0.1.9.1__py3-none-any.whl → 0.1.9.3__py3-none-any.whl - Mend

py2ls 0.1.9.1py3-none-any.whl → 0.1.9.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

py2ls/plot.py +147 -188
py2ls/stats.py +806 -412
{py2ls-0.1.9.1.dist-info → py2ls-0.1.9.3.dist-info}/METADATA +1 -1
{py2ls-0.1.9.1.dist-info → py2ls-0.1.9.3.dist-info}/RECORD +5 -5
{py2ls-0.1.9.1.dist-info → py2ls-0.1.9.3.dist-info}/WHEEL +0 -0

py2ls/plot.py CHANGED Viewed

@@ -10,6 +10,7 @@ from cycler import cycler
 import logging
 import os
 from .ips import fsave, fload, mkdir
+from .stats import *
 # Suppress INFO messages from fontTools
 logging.getLogger("fontTools").setLevel(logging.WARNING)
@@ -220,8 +221,11 @@ def catplot(data, *args, **kwargs):
         # MeanLine or MedianLine only keep only one
         if bx_opt["MeanLine"]:  # MeanLine has priority
             bx_opt["MedianLine"] = False
+        # rm NaNs
+        cleaned_data = [data[~np.isnan(data[:, i]), i] for i in range(data.shape[1])]
         bxp = ax.boxplot(
-            data,
+            cleaned_data,
             positions=X_bx,
             notch=bx_opt["Notch"],
             patch_artist=True,
@@ -463,6 +467,9 @@ def catplot(data, *args, **kwargs):
         # custom_order = ['s', 'bx', 'e']
         # full_order = sort_catplot_layers(custom_order)
+    ax = kwargs.get("ax", None)
+    if "ax" not in locals() or ax is None:
+        ax = plt.gca()
     col = kwargs.get("col", None)
     if not col:
         # figsets
@@ -488,7 +495,41 @@ def catplot(data, *args, **kwargs):
                 default_x_width = 0.85
                 legend_hue = df[hue].unique().tolist()
                 default_colors = get_color(hue_len)
+                # ! stats info
+                stats_param = kwargs.get("stats", False)
+                res = pd.DataFrame()  # Initialize an empty DataFrame to store results
+                ihue = 1
+                for i in df[x].unique().tolist():
+                    print(i)  # to indicate which 'x'
+                    if hue and stats_param:
+                        if isinstance(stats_param, dict):
+                            if "factor" in stats_param.keys():
+                                res_tmp = FuncMultiCmpt(data=df, dv=y, **stats_param)
+                            else:
+                                res_tmp = FuncMultiCmpt(
+                                    data=df[df[x] == i], dv=y, factor=hue, **stats_param
+                                )
+                        elif bool(stats_param):
+                            res_tmp = FuncMultiCmpt(data=df, dv=y, factor=hue)
+                        else:
+                            res_tmp = "did not work properly"
+                        display_output(res_tmp)
+                        xloc_curr = hue_len * (ihue - 1)
+                        # add_asterisks(ax,res_tmp,xticks[xloc_curr:xloc_curr+hue_len],legend_hue)
+                        # res_tmp = [{"x": i, **res_tmp}]
+                        # print("here")
+                        # df_=pd.DataFrame([res_tmp])
+                        # display(df_['pval'][0].tolist()[0])
+                        res = pd.concat(
+                            [res, pd.DataFrame([res_tmp])], ignore_index=True
+                        )
+                    ihue += 1
+                display_output(res)
             else:
+                # ! stats info
+                stats_param = kwargs.get("stats", False)
                 for i in df[x].unique().tolist():
                     xticklabels.append(i)
                 xticks = np.arange(1, len(xticklabels) + 1).tolist()
@@ -496,9 +537,17 @@ def catplot(data, *args, **kwargs):
                 legend_hue = xticklabels
                 default_colors = get_color(len(xticklabels))
                 default_x_width = 0.5
+                res = None
+                if x and stats_param:
+                    if isinstance(stats_param, dict):
+                        res = FuncMultiCmpt(data=df, dv=y, factor=x, **stats_param)
+                    elif bool(stats_param):
+                        res = FuncMultiCmpt(data=df, dv=y, factor=x)
+                    else:
+                        res = "did not work properly"
+                display_output(res)
             # when the xticklabels are too long, rotate the labels a bit
             xangle = 30 if max([len(i) for i in xticklabels]) > 50 else 0
             if kw_figsets is not None:
                 kw_figsets = {
@@ -526,9 +575,23 @@ def catplot(data, *args, **kwargs):
         # full_order
         opt = kwargs.get("opt", {})
-        ax = kwargs.get("ax", None)
-        if "ax" not in locals() or ax is None:
-            ax = plt.gca()
+        # load style:
+        style_use = None
+        for k, v in kwargs.items():
+            if "style" in k and "exp" not in k:
+                style_use = v
+                break
+        if style_use:
+            try:
+                dir_curr_script = os.path.dirname(os.path.abspath(__file__))
+                dir_style = dir_curr_script + "/data/styles/"
+                style_load = fload(dir_style + style_use + ".json")
+                style_load = remove_colors_in_dict(style_load)
+                opt.update(style_load)
+            except:
+                print(f"cannot find the style'{style_name}'")
         opt.setdefault("c", default_colors)
         # if len(opt["c"]) < data.shape[1]:
         #     additional_colors = plt.cm.winter(
@@ -654,16 +717,6 @@ def catplot(data, *args, **kwargs):
         opt["v"].setdefault("NumPoints", 500)
         opt["v"].setdefault("BoundaryCorrection", "reflection")
-        # load style:
-        style_use = kwargs.get("style_use", None)
-        if style_use:
-            try:
-                dir_curr_script = os.path.dirname(os.path.abspath(__file__))
-                dir_style = dir_curr_script + "/data/styles/"
-                style_load = fload(dir_style + style_use + ".json")
-                opt.update(style_load)
-            except:
-                print(f"cannot find the style'{style_name}'")
         data_m = np.nanmean(data, axis=0)
         nr, nc = data.shape
@@ -692,7 +745,6 @@ def catplot(data, *args, **kwargs):
             legend_which = "v"
         else:
             legend_which = None
         for layer in layers:
             if layer == "b" and opt["b"]["go"]:
                 if legend_which == "b":
@@ -721,19 +773,47 @@ def catplot(data, *args, **kwargs):
                     plot_violin(data, opt["v"], xloc, ax, label=None)
             elif all([layer == "l", opt["l"]["go"], opt["s"]["go"]]):
                 plot_lines(data, opt["l"], opt["s"], ax)
-            else:
-                print("layers run some problems")
         if kw_figsets is not None:
             figsets(ax=ax, **kw_figsets)
         show_legend = kwargs.get("show_legend", True)
         if show_legend:
             ax.legend()
+        # ! add asterisks in the plot
+        if stats_param:
+            if hue is None:
+                display(res)
+                add_asterisks(ax, res, xticks_x_loc, xticklabels)
+            else:  # hue is not None
+                ihue = 1
+                for i in df[x].unique().tolist():
+                    if hue and stats_param:
+                        if isinstance(stats_param, dict):
+                            if "factor" in stats_param.keys():
+                                res_tmp = FuncMultiCmpt(data=df, dv=y, **stats_param)
+                            else:
+                                res_tmp = FuncMultiCmpt(
+                                    data=df[df[x] == i], dv=y, factor=hue, **stats_param
+                                )
+                        elif bool(stats_param):
+                            res_tmp = FuncMultiCmpt(data=df, dv=y, factor=hue)
+                        else:
+                            res_tmp = "did not work properly"
+                        xloc_curr = hue_len * (ihue - 1)
+                        add_asterisks(
+                            ax,
+                            res_tmp,
+                            xticks[xloc_curr : xloc_curr + hue_len],
+                            legend_hue,
+                        )
+                    ihue += 1
         style_export = kwargs.get("style_export", None)
         if style_export and (style_export != style_use):
             dir_curr_script = os.path.dirname(os.path.abspath(__file__))
             dir_style = dir_curr_script + "/data/styles/"
             fsave(dir_style + style_export + ".json", opt)
         return ax, opt
     else:
         col_names = data[col].unique().tolist()
@@ -750,7 +830,10 @@ def catplot(data, *args, **kwargs):
             if i < len(col_names):
                 df_sub = data.loc[data[col] == col_names[i]]
                 _, opt = catplot(ax=ax, data=df_sub, **kwargs)
-                ax.set_title(col_names[i])
+                ax.set_title(f"{col}={col_names[i]}")
+                x_label = kwargs.get("x", None)
+                if x_label:
+                    ax.set_xlabel(x_label)
         print(f"Axis layout shape: {axs.shape}")
         return axs, opt
@@ -1530,175 +1613,6 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
     return fig.colorbar(im, cax=cax, **kwargs)  # draw cbar
-# def padcat(*args, fill_value=np.nan, axis=1):
-#     """
-#     Concatenate vectors with padding.
-#     Parameters:
-#     *args : variable number of list or 1D arrays
-#         Input arrays to concatenate.
-#     fill_value : scalar, optional
-#         The value to use for padding the shorter lists (default is np.nan).
-#     axis : int, optional
-#         The axis along which to concatenate (0 for rows, 1 for columns, default is 0).
-#     Returns:
-#     np.ndarray
-#         A 2D array with the input arrays concatenated along the specified axis, padded with fill_value where necessary.
-#     """
-#     if axis == 0:
-#         # Concatenate along rows
-#         max_len = max(len(lst) for lst in args)
-#         result = np.full((len(args), max_len), fill_value)
-#         for i, lst in enumerate(args):
-#             result[i, : len(lst)] = lst
-#     elif axis == 1:
-#         # Concatenate along columns
-#         max_len = max(len(lst) for lst in args)
-#         result = np.full((max_len, len(args)), fill_value)
-#         for i, lst in enumerate(args):
-#             result[: len(lst), i] = lst
-#     else:
-#         raise ValueError("axis must be 0 or 1")
-#     return result
-import numpy as np
-def padcat(*args, fill_value=np.nan, axis=1, order="row"):
-    """
-    Concatenate vectors with padding.
-    Parameters:
-    *args : variable number of list or 1D arrays
-        Input arrays to concatenate.
-    fill_value : scalar, optional
-        The value to use for padding the shorter lists (default is np.nan).
-    axis : int, optional
-        The axis along which to concatenate (0 for rows, 1 for columns, default is 1).
-    order : str, optional
-        The order for flattening when required: "row" or "column" (default is "row").
-    Returns:
-    np.ndarray
-        A 2D array with the input arrays concatenated along the specified axis,
-        padded with fill_value where necessary.
-    """
-    # Set the order for processing
-    if "ro" in order.lower():
-        order = "C"  # row-major order
-    else:
-        order = "F"  # column-major order
-    # Process input arrays based on their dimensions
-    processed_arrays = []
-    for arg in args:
-        arr = np.asarray(arg)
-        if arr.ndim == 1:
-            processed_arrays.append(arr)  # Keep 1D arrays as is
-        elif arr.ndim == 2:
-            if axis == 0:
-                # If concatenating along rows, split 2D arrays into 1D arrays row-wise
-                processed_arrays.extend(arr)
-            elif axis == 1:
-                # If concatenating along columns, split 2D arrays into 1D arrays column-wise
-                processed_arrays.extend(arr.T)
-            else:
-                raise ValueError("axis must be 0 or 1")
-        else:
-            raise ValueError("Input arrays must be 1D or 2D")
-    if axis == 0:
-        # Concatenate along rows
-        max_len = max(arr.size for arr in processed_arrays)
-        result = np.full((len(processed_arrays), max_len), fill_value)
-        for i, arr in enumerate(processed_arrays):
-            result[i, : arr.size] = arr
-    elif axis == 1:
-        # Concatenate along columns
-        max_len = max(arr.size for arr in processed_arrays)
-        result = np.full((max_len, len(processed_arrays)), fill_value)
-        for i, arr in enumerate(processed_arrays):
-            result[: arr.size, i] = arr
-    else:
-        raise ValueError("axis must be 0 or 1")
-    return result
-# # Example usage:
-# a = [1, np.nan]
-# b = [1, 3, 4, np.nan, 2, np.nan]
-# c = [1, 2, 3, 4, 5, 6, 7, 8, 10]
-# d = padcat(a, b)
-# result1 = padcat(d, c)
-# result2 = padcat(a, b, c)
-# print("Result of padcat(d, c):\n", result1)
-# print("Result of padcat(a, b, c):\n", result2)
-def sort_rows_move_nan(arr, sort=False):
-    # Handle edge cases where all values are NaN
-    if np.all(np.isnan(arr)):
-        return arr  # Return unchanged if the entire array is NaN
-    if sort:
-        # Replace NaNs with a temporary large value for sorting
-        temp_value = (
-            np.nanmax(arr[np.isfinite(arr)]) + 1 if np.any(np.isfinite(arr)) else np.inf
-        )
-        arr_no_nan = np.where(np.isnan(arr), temp_value, arr)
-        # Sort each row
-        sorted_arr = np.sort(arr_no_nan, axis=1)
-        # Move NaNs to the end
-        result_arr = np.where(sorted_arr == temp_value, np.nan, sorted_arr)
-    else:
-        result_rows = []
-        for row in arr:
-            # Separate non-NaN and NaN values
-            non_nan_values = row[~np.isnan(row)]
-            nan_count = np.isnan(row).sum()
-            # Create a new row with non-NaN values followed by NaNs
-            new_row = np.concatenate([non_nan_values, [np.nan] * nan_count])
-            result_rows.append(new_row)
-        # Convert the list of rows back into a 2D NumPy array
-        result_arr = np.array(result_rows)
-    # Remove rows/columns that contain only NaNs
-    clean_arr = result_arr[~np.isnan(result_arr).all(axis=1)]
-    clean_arr_ = clean_arr[:, ~np.isnan(clean_arr).all(axis=0)]
-    return clean_arr_
-def df2array(data: pd.DataFrame, x, y, hue=None, sort=False):
-    if hue is None:
-        a = []
-        if sort:
-            np.sort(data[x].unique().tolist()).tolist()
-        else:
-            cat_x = data[x].unique().tolist()
-        for i, x_ in enumerate(cat_x):
-            new_ = data.loc[data[x] == x_, y].to_list()
-            a = padcat(a, new_, axis=0)
-        return sort_rows_move_nan(a).T
-    else:
-        a = []
-        if sort:
-            cat_x = np.sort(data[x].unique().tolist()).tolist()
-            cat_hue = np.sort(data[hue].unique().tolist()).tolist()
-        else:
-            cat_x = data[x].unique().tolist()
-            cat_hue = data[hue].unique().tolist()
-        for i, x_ in enumerate(cat_x):
-            for j, hue_ in enumerate(cat_hue):
-                new_ = data.loc[(data[x] == x_) & (data[hue] == hue_), y].to_list()
-                a = padcat(a, new_, axis=0)
-        return sort_rows_move_nan(a).T
 def generate_xticks_with_gap(x_len, hue_len):
     """
     Generate a concatenated array based on x_len and hue_len,
@@ -1728,3 +1642,48 @@ def generate_xticks_x_labels(x_len, hue_len):
         for i in range(max(x_len, hue_len), 0, -1)  # i iterates from 3 to 1
     ]
     return [np.mean(i) for i in arrays if np.mean(i) > 0]
+def remove_colors_in_dict(
+    data: dict, sections_to_remove_facecolor=["b", "e", "s", "bx", "v"]
+):
+    # Remove "FaceColor" from specified sections
+    for section in sections_to_remove_facecolor:
+        if section in data and ("FaceColor" in data[section]):
+            del data[section]["FaceColor"]
+    if "c" in data:
+        del data["c"]
+    if "loc" in data:
+        del data["loc"]
+    return data
+def add_asterisks(ax, res, xticks_x_loc, xticklabels, **kwargs_funcstars):
+    pval_groups = res["res_tab"]["PR(>F)"].tolist()[0]
+    # print(f"p=:{pval_groups}")
+    # print(f"xticks:{xticks}")
+    # print(f"xticks_x_loc:{xticks_x_loc}")
+    if pval_groups <= 0.05:
+        A_list = res["res_posthoc"]["A"].tolist()
+        B_list = res["res_posthoc"]["B"].tolist()
+        xticklabels_array = np.array(xticklabels)
+        yscal_ = 0.99
+        for A, B, P in zip(
+            res["res_posthoc"]["A"].tolist(),
+            res["res_posthoc"]["B"].tolist(),
+            res["res_posthoc"]["p-unc"].tolist(),
+        ):
+            index_A = np.where(xticklabels_array == A)[0][0]
+            index_B = np.where(xticklabels_array == B)[0][0]
+            print(index_A, A, index_B, B, P)
+            FuncStars(
+                ax=ax,
+                x1=xticks_x_loc[index_A],
+                x2=xticks_x_loc[index_B],
+                pval=P,
+                yscale=yscal_,
+                **kwargs_funcstars,
+            )
+            if P <= 0.05:
+                yscal_ -= 0.1

py2ls 0.1.9.1__py3-none-any.whl → 0.1.9.3__py3-none-any.whl

py2ls 0.1.9.1py3-none-any.whl → 0.1.9.3py3-none-any.whl