py2ls 0.1.9.1__py3-none-any.whl → 0.1.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/plot.py
CHANGED
@@ -10,6 +10,7 @@ from cycler import cycler
|
|
10
10
|
import logging
|
11
11
|
import os
|
12
12
|
from .ips import fsave, fload, mkdir
|
13
|
+
from .stats import *
|
13
14
|
|
14
15
|
# Suppress INFO messages from fontTools
|
15
16
|
logging.getLogger("fontTools").setLevel(logging.WARNING)
|
@@ -220,8 +221,11 @@ def catplot(data, *args, **kwargs):
|
|
220
221
|
# MeanLine or MedianLine only keep only one
|
221
222
|
if bx_opt["MeanLine"]: # MeanLine has priority
|
222
223
|
bx_opt["MedianLine"] = False
|
224
|
+
# rm NaNs
|
225
|
+
cleaned_data = [data[~np.isnan(data[:, i]), i] for i in range(data.shape[1])]
|
226
|
+
|
223
227
|
bxp = ax.boxplot(
|
224
|
-
|
228
|
+
cleaned_data,
|
225
229
|
positions=X_bx,
|
226
230
|
notch=bx_opt["Notch"],
|
227
231
|
patch_artist=True,
|
@@ -463,6 +467,9 @@ def catplot(data, *args, **kwargs):
|
|
463
467
|
# custom_order = ['s', 'bx', 'e']
|
464
468
|
# full_order = sort_catplot_layers(custom_order)
|
465
469
|
|
470
|
+
ax = kwargs.get("ax", None)
|
471
|
+
if "ax" not in locals() or ax is None:
|
472
|
+
ax = plt.gca()
|
466
473
|
col = kwargs.get("col", None)
|
467
474
|
if not col:
|
468
475
|
# figsets
|
@@ -488,7 +495,41 @@ def catplot(data, *args, **kwargs):
|
|
488
495
|
default_x_width = 0.85
|
489
496
|
legend_hue = df[hue].unique().tolist()
|
490
497
|
default_colors = get_color(hue_len)
|
498
|
+
|
499
|
+
# ! stats info
|
500
|
+
stats_param = kwargs.get("stats", False)
|
501
|
+
res = pd.DataFrame() # Initialize an empty DataFrame to store results
|
502
|
+
ihue = 1
|
503
|
+
for i in df[x].unique().tolist():
|
504
|
+
print(i) # to indicate which 'x'
|
505
|
+
if hue and stats_param:
|
506
|
+
if isinstance(stats_param, dict):
|
507
|
+
if "factor" in stats_param.keys():
|
508
|
+
res_tmp = FuncMultiCmpt(data=df, dv=y, **stats_param)
|
509
|
+
else:
|
510
|
+
res_tmp = FuncMultiCmpt(
|
511
|
+
data=df[df[x] == i], dv=y, factor=hue, **stats_param
|
512
|
+
)
|
513
|
+
elif bool(stats_param):
|
514
|
+
res_tmp = FuncMultiCmpt(data=df, dv=y, factor=hue)
|
515
|
+
else:
|
516
|
+
res_tmp = "did not work properly"
|
517
|
+
display_output(res_tmp)
|
518
|
+
xloc_curr = hue_len * (ihue - 1)
|
519
|
+
# add_asterisks(ax,res_tmp,xticks[xloc_curr:xloc_curr+hue_len],legend_hue)
|
520
|
+
# res_tmp = [{"x": i, **res_tmp}]
|
521
|
+
# print("here")
|
522
|
+
# df_=pd.DataFrame([res_tmp])
|
523
|
+
# display(df_['pval'][0].tolist()[0])
|
524
|
+
res = pd.concat(
|
525
|
+
[res, pd.DataFrame([res_tmp])], ignore_index=True
|
526
|
+
)
|
527
|
+
ihue += 1
|
528
|
+
display_output(res)
|
529
|
+
|
491
530
|
else:
|
531
|
+
# ! stats info
|
532
|
+
stats_param = kwargs.get("stats", False)
|
492
533
|
for i in df[x].unique().tolist():
|
493
534
|
xticklabels.append(i)
|
494
535
|
xticks = np.arange(1, len(xticklabels) + 1).tolist()
|
@@ -496,9 +537,17 @@ def catplot(data, *args, **kwargs):
|
|
496
537
|
legend_hue = xticklabels
|
497
538
|
default_colors = get_color(len(xticklabels))
|
498
539
|
default_x_width = 0.5
|
540
|
+
res = None
|
541
|
+
if x and stats_param:
|
542
|
+
if isinstance(stats_param, dict):
|
543
|
+
res = FuncMultiCmpt(data=df, dv=y, factor=x, **stats_param)
|
544
|
+
elif bool(stats_param):
|
545
|
+
res = FuncMultiCmpt(data=df, dv=y, factor=x)
|
546
|
+
else:
|
547
|
+
res = "did not work properly"
|
548
|
+
display_output(res)
|
499
549
|
|
500
550
|
# when the xticklabels are too long, rotate the labels a bit
|
501
|
-
|
502
551
|
xangle = 30 if max([len(i) for i in xticklabels]) > 50 else 0
|
503
552
|
if kw_figsets is not None:
|
504
553
|
kw_figsets = {
|
@@ -526,9 +575,23 @@ def catplot(data, *args, **kwargs):
|
|
526
575
|
|
527
576
|
# full_order
|
528
577
|
opt = kwargs.get("opt", {})
|
529
|
-
|
530
|
-
|
531
|
-
|
578
|
+
|
579
|
+
# load style:
|
580
|
+
style_use = None
|
581
|
+
for k, v in kwargs.items():
|
582
|
+
if "style" in k and "exp" not in k:
|
583
|
+
style_use = v
|
584
|
+
break
|
585
|
+
if style_use:
|
586
|
+
try:
|
587
|
+
dir_curr_script = os.path.dirname(os.path.abspath(__file__))
|
588
|
+
dir_style = dir_curr_script + "/data/styles/"
|
589
|
+
style_load = fload(dir_style + style_use + ".json")
|
590
|
+
style_load = remove_colors_in_dict(style_load)
|
591
|
+
opt.update(style_load)
|
592
|
+
except:
|
593
|
+
print(f"cannot find the style'{style_name}'")
|
594
|
+
|
532
595
|
opt.setdefault("c", default_colors)
|
533
596
|
# if len(opt["c"]) < data.shape[1]:
|
534
597
|
# additional_colors = plt.cm.winter(
|
@@ -654,16 +717,6 @@ def catplot(data, *args, **kwargs):
|
|
654
717
|
opt["v"].setdefault("NumPoints", 500)
|
655
718
|
opt["v"].setdefault("BoundaryCorrection", "reflection")
|
656
719
|
|
657
|
-
# load style:
|
658
|
-
style_use = kwargs.get("style_use", None)
|
659
|
-
if style_use:
|
660
|
-
try:
|
661
|
-
dir_curr_script = os.path.dirname(os.path.abspath(__file__))
|
662
|
-
dir_style = dir_curr_script + "/data/styles/"
|
663
|
-
style_load = fload(dir_style + style_use + ".json")
|
664
|
-
opt.update(style_load)
|
665
|
-
except:
|
666
|
-
print(f"cannot find the style'{style_name}'")
|
667
720
|
data_m = np.nanmean(data, axis=0)
|
668
721
|
nr, nc = data.shape
|
669
722
|
|
@@ -692,7 +745,6 @@ def catplot(data, *args, **kwargs):
|
|
692
745
|
legend_which = "v"
|
693
746
|
else:
|
694
747
|
legend_which = None
|
695
|
-
|
696
748
|
for layer in layers:
|
697
749
|
if layer == "b" and opt["b"]["go"]:
|
698
750
|
if legend_which == "b":
|
@@ -721,19 +773,47 @@ def catplot(data, *args, **kwargs):
|
|
721
773
|
plot_violin(data, opt["v"], xloc, ax, label=None)
|
722
774
|
elif all([layer == "l", opt["l"]["go"], opt["s"]["go"]]):
|
723
775
|
plot_lines(data, opt["l"], opt["s"], ax)
|
724
|
-
|
725
|
-
print("layers run some problems")
|
776
|
+
|
726
777
|
if kw_figsets is not None:
|
727
778
|
figsets(ax=ax, **kw_figsets)
|
728
779
|
show_legend = kwargs.get("show_legend", True)
|
729
780
|
if show_legend:
|
730
781
|
ax.legend()
|
731
782
|
|
783
|
+
# ! add asterisks in the plot
|
784
|
+
if stats_param:
|
785
|
+
if hue is None:
|
786
|
+
display(res)
|
787
|
+
add_asterisks(ax, res, xticks_x_loc, xticklabels)
|
788
|
+
else: # hue is not None
|
789
|
+
ihue = 1
|
790
|
+
for i in df[x].unique().tolist():
|
791
|
+
if hue and stats_param:
|
792
|
+
if isinstance(stats_param, dict):
|
793
|
+
if "factor" in stats_param.keys():
|
794
|
+
res_tmp = FuncMultiCmpt(data=df, dv=y, **stats_param)
|
795
|
+
else:
|
796
|
+
res_tmp = FuncMultiCmpt(
|
797
|
+
data=df[df[x] == i], dv=y, factor=hue, **stats_param
|
798
|
+
)
|
799
|
+
elif bool(stats_param):
|
800
|
+
res_tmp = FuncMultiCmpt(data=df, dv=y, factor=hue)
|
801
|
+
else:
|
802
|
+
res_tmp = "did not work properly"
|
803
|
+
xloc_curr = hue_len * (ihue - 1)
|
804
|
+
add_asterisks(
|
805
|
+
ax,
|
806
|
+
res_tmp,
|
807
|
+
xticks[xloc_curr : xloc_curr + hue_len],
|
808
|
+
legend_hue,
|
809
|
+
)
|
810
|
+
ihue += 1
|
732
811
|
style_export = kwargs.get("style_export", None)
|
733
812
|
if style_export and (style_export != style_use):
|
734
813
|
dir_curr_script = os.path.dirname(os.path.abspath(__file__))
|
735
814
|
dir_style = dir_curr_script + "/data/styles/"
|
736
815
|
fsave(dir_style + style_export + ".json", opt)
|
816
|
+
|
737
817
|
return ax, opt
|
738
818
|
else:
|
739
819
|
col_names = data[col].unique().tolist()
|
@@ -750,7 +830,10 @@ def catplot(data, *args, **kwargs):
|
|
750
830
|
if i < len(col_names):
|
751
831
|
df_sub = data.loc[data[col] == col_names[i]]
|
752
832
|
_, opt = catplot(ax=ax, data=df_sub, **kwargs)
|
753
|
-
ax.set_title(col_names[i])
|
833
|
+
ax.set_title(f"{col}={col_names[i]}")
|
834
|
+
x_label = kwargs.get("x", None)
|
835
|
+
if x_label:
|
836
|
+
ax.set_xlabel(x_label)
|
754
837
|
print(f"Axis layout shape: {axs.shape}")
|
755
838
|
return axs, opt
|
756
839
|
|
@@ -1530,175 +1613,6 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
|
|
1530
1613
|
return fig.colorbar(im, cax=cax, **kwargs) # draw cbar
|
1531
1614
|
|
1532
1615
|
|
1533
|
-
# def padcat(*args, fill_value=np.nan, axis=1):
|
1534
|
-
# """
|
1535
|
-
# Concatenate vectors with padding.
|
1536
|
-
|
1537
|
-
# Parameters:
|
1538
|
-
# *args : variable number of list or 1D arrays
|
1539
|
-
# Input arrays to concatenate.
|
1540
|
-
# fill_value : scalar, optional
|
1541
|
-
# The value to use for padding the shorter lists (default is np.nan).
|
1542
|
-
# axis : int, optional
|
1543
|
-
# The axis along which to concatenate (0 for rows, 1 for columns, default is 0).
|
1544
|
-
|
1545
|
-
# Returns:
|
1546
|
-
# np.ndarray
|
1547
|
-
# A 2D array with the input arrays concatenated along the specified axis, padded with fill_value where necessary.
|
1548
|
-
# """
|
1549
|
-
# if axis == 0:
|
1550
|
-
# # Concatenate along rows
|
1551
|
-
# max_len = max(len(lst) for lst in args)
|
1552
|
-
# result = np.full((len(args), max_len), fill_value)
|
1553
|
-
# for i, lst in enumerate(args):
|
1554
|
-
# result[i, : len(lst)] = lst
|
1555
|
-
# elif axis == 1:
|
1556
|
-
# # Concatenate along columns
|
1557
|
-
# max_len = max(len(lst) for lst in args)
|
1558
|
-
# result = np.full((max_len, len(args)), fill_value)
|
1559
|
-
# for i, lst in enumerate(args):
|
1560
|
-
# result[: len(lst), i] = lst
|
1561
|
-
# else:
|
1562
|
-
# raise ValueError("axis must be 0 or 1")
|
1563
|
-
|
1564
|
-
# return result
|
1565
|
-
import numpy as np
|
1566
|
-
|
1567
|
-
|
1568
|
-
def padcat(*args, fill_value=np.nan, axis=1, order="row"):
|
1569
|
-
"""
|
1570
|
-
Concatenate vectors with padding.
|
1571
|
-
|
1572
|
-
Parameters:
|
1573
|
-
*args : variable number of list or 1D arrays
|
1574
|
-
Input arrays to concatenate.
|
1575
|
-
fill_value : scalar, optional
|
1576
|
-
The value to use for padding the shorter lists (default is np.nan).
|
1577
|
-
axis : int, optional
|
1578
|
-
The axis along which to concatenate (0 for rows, 1 for columns, default is 1).
|
1579
|
-
order : str, optional
|
1580
|
-
The order for flattening when required: "row" or "column" (default is "row").
|
1581
|
-
|
1582
|
-
Returns:
|
1583
|
-
np.ndarray
|
1584
|
-
A 2D array with the input arrays concatenated along the specified axis,
|
1585
|
-
padded with fill_value where necessary.
|
1586
|
-
"""
|
1587
|
-
# Set the order for processing
|
1588
|
-
if "ro" in order.lower():
|
1589
|
-
order = "C" # row-major order
|
1590
|
-
else:
|
1591
|
-
order = "F" # column-major order
|
1592
|
-
|
1593
|
-
# Process input arrays based on their dimensions
|
1594
|
-
processed_arrays = []
|
1595
|
-
for arg in args:
|
1596
|
-
arr = np.asarray(arg)
|
1597
|
-
if arr.ndim == 1:
|
1598
|
-
processed_arrays.append(arr) # Keep 1D arrays as is
|
1599
|
-
elif arr.ndim == 2:
|
1600
|
-
if axis == 0:
|
1601
|
-
# If concatenating along rows, split 2D arrays into 1D arrays row-wise
|
1602
|
-
processed_arrays.extend(arr)
|
1603
|
-
elif axis == 1:
|
1604
|
-
# If concatenating along columns, split 2D arrays into 1D arrays column-wise
|
1605
|
-
processed_arrays.extend(arr.T)
|
1606
|
-
else:
|
1607
|
-
raise ValueError("axis must be 0 or 1")
|
1608
|
-
else:
|
1609
|
-
raise ValueError("Input arrays must be 1D or 2D")
|
1610
|
-
|
1611
|
-
if axis == 0:
|
1612
|
-
# Concatenate along rows
|
1613
|
-
max_len = max(arr.size for arr in processed_arrays)
|
1614
|
-
result = np.full((len(processed_arrays), max_len), fill_value)
|
1615
|
-
for i, arr in enumerate(processed_arrays):
|
1616
|
-
result[i, : arr.size] = arr
|
1617
|
-
elif axis == 1:
|
1618
|
-
# Concatenate along columns
|
1619
|
-
max_len = max(arr.size for arr in processed_arrays)
|
1620
|
-
result = np.full((max_len, len(processed_arrays)), fill_value)
|
1621
|
-
for i, arr in enumerate(processed_arrays):
|
1622
|
-
result[: arr.size, i] = arr
|
1623
|
-
else:
|
1624
|
-
raise ValueError("axis must be 0 or 1")
|
1625
|
-
|
1626
|
-
return result
|
1627
|
-
|
1628
|
-
|
1629
|
-
# # Example usage:
|
1630
|
-
# a = [1, np.nan]
|
1631
|
-
# b = [1, 3, 4, np.nan, 2, np.nan]
|
1632
|
-
# c = [1, 2, 3, 4, 5, 6, 7, 8, 10]
|
1633
|
-
# d = padcat(a, b)
|
1634
|
-
# result1 = padcat(d, c)
|
1635
|
-
# result2 = padcat(a, b, c)
|
1636
|
-
# print("Result of padcat(d, c):\n", result1)
|
1637
|
-
# print("Result of padcat(a, b, c):\n", result2)
|
1638
|
-
|
1639
|
-
|
1640
|
-
def sort_rows_move_nan(arr, sort=False):
|
1641
|
-
# Handle edge cases where all values are NaN
|
1642
|
-
if np.all(np.isnan(arr)):
|
1643
|
-
return arr # Return unchanged if the entire array is NaN
|
1644
|
-
|
1645
|
-
if sort:
|
1646
|
-
# Replace NaNs with a temporary large value for sorting
|
1647
|
-
temp_value = (
|
1648
|
-
np.nanmax(arr[np.isfinite(arr)]) + 1 if np.any(np.isfinite(arr)) else np.inf
|
1649
|
-
)
|
1650
|
-
arr_no_nan = np.where(np.isnan(arr), temp_value, arr)
|
1651
|
-
|
1652
|
-
# Sort each row
|
1653
|
-
sorted_arr = np.sort(arr_no_nan, axis=1)
|
1654
|
-
|
1655
|
-
# Move NaNs to the end
|
1656
|
-
result_arr = np.where(sorted_arr == temp_value, np.nan, sorted_arr)
|
1657
|
-
else:
|
1658
|
-
result_rows = []
|
1659
|
-
for row in arr:
|
1660
|
-
# Separate non-NaN and NaN values
|
1661
|
-
non_nan_values = row[~np.isnan(row)]
|
1662
|
-
nan_count = np.isnan(row).sum()
|
1663
|
-
# Create a new row with non-NaN values followed by NaNs
|
1664
|
-
new_row = np.concatenate([non_nan_values, [np.nan] * nan_count])
|
1665
|
-
result_rows.append(new_row)
|
1666
|
-
# Convert the list of rows back into a 2D NumPy array
|
1667
|
-
result_arr = np.array(result_rows)
|
1668
|
-
|
1669
|
-
# Remove rows/columns that contain only NaNs
|
1670
|
-
clean_arr = result_arr[~np.isnan(result_arr).all(axis=1)]
|
1671
|
-
clean_arr_ = clean_arr[:, ~np.isnan(clean_arr).all(axis=0)]
|
1672
|
-
|
1673
|
-
return clean_arr_
|
1674
|
-
|
1675
|
-
|
1676
|
-
def df2array(data: pd.DataFrame, x, y, hue=None, sort=False):
|
1677
|
-
if hue is None:
|
1678
|
-
a = []
|
1679
|
-
if sort:
|
1680
|
-
np.sort(data[x].unique().tolist()).tolist()
|
1681
|
-
else:
|
1682
|
-
cat_x = data[x].unique().tolist()
|
1683
|
-
for i, x_ in enumerate(cat_x):
|
1684
|
-
new_ = data.loc[data[x] == x_, y].to_list()
|
1685
|
-
a = padcat(a, new_, axis=0)
|
1686
|
-
return sort_rows_move_nan(a).T
|
1687
|
-
else:
|
1688
|
-
a = []
|
1689
|
-
if sort:
|
1690
|
-
cat_x = np.sort(data[x].unique().tolist()).tolist()
|
1691
|
-
cat_hue = np.sort(data[hue].unique().tolist()).tolist()
|
1692
|
-
else:
|
1693
|
-
cat_x = data[x].unique().tolist()
|
1694
|
-
cat_hue = data[hue].unique().tolist()
|
1695
|
-
for i, x_ in enumerate(cat_x):
|
1696
|
-
for j, hue_ in enumerate(cat_hue):
|
1697
|
-
new_ = data.loc[(data[x] == x_) & (data[hue] == hue_), y].to_list()
|
1698
|
-
a = padcat(a, new_, axis=0)
|
1699
|
-
return sort_rows_move_nan(a).T
|
1700
|
-
|
1701
|
-
|
1702
1616
|
def generate_xticks_with_gap(x_len, hue_len):
|
1703
1617
|
"""
|
1704
1618
|
Generate a concatenated array based on x_len and hue_len,
|
@@ -1728,3 +1642,48 @@ def generate_xticks_x_labels(x_len, hue_len):
|
|
1728
1642
|
for i in range(max(x_len, hue_len), 0, -1) # i iterates from 3 to 1
|
1729
1643
|
]
|
1730
1644
|
return [np.mean(i) for i in arrays if np.mean(i) > 0]
|
1645
|
+
|
1646
|
+
|
1647
|
+
def remove_colors_in_dict(
|
1648
|
+
data: dict, sections_to_remove_facecolor=["b", "e", "s", "bx", "v"]
|
1649
|
+
):
|
1650
|
+
# Remove "FaceColor" from specified sections
|
1651
|
+
for section in sections_to_remove_facecolor:
|
1652
|
+
if section in data and ("FaceColor" in data[section]):
|
1653
|
+
del data[section]["FaceColor"]
|
1654
|
+
|
1655
|
+
if "c" in data:
|
1656
|
+
del data["c"]
|
1657
|
+
if "loc" in data:
|
1658
|
+
del data["loc"]
|
1659
|
+
return data
|
1660
|
+
|
1661
|
+
|
1662
|
+
def add_asterisks(ax, res, xticks_x_loc, xticklabels, **kwargs_funcstars):
|
1663
|
+
pval_groups = res["res_tab"]["PR(>F)"].tolist()[0]
|
1664
|
+
# print(f"p=:{pval_groups}")
|
1665
|
+
# print(f"xticks:{xticks}")
|
1666
|
+
# print(f"xticks_x_loc:{xticks_x_loc}")
|
1667
|
+
if pval_groups <= 0.05:
|
1668
|
+
A_list = res["res_posthoc"]["A"].tolist()
|
1669
|
+
B_list = res["res_posthoc"]["B"].tolist()
|
1670
|
+
xticklabels_array = np.array(xticklabels)
|
1671
|
+
yscal_ = 0.99
|
1672
|
+
for A, B, P in zip(
|
1673
|
+
res["res_posthoc"]["A"].tolist(),
|
1674
|
+
res["res_posthoc"]["B"].tolist(),
|
1675
|
+
res["res_posthoc"]["p-unc"].tolist(),
|
1676
|
+
):
|
1677
|
+
index_A = np.where(xticklabels_array == A)[0][0]
|
1678
|
+
index_B = np.where(xticklabels_array == B)[0][0]
|
1679
|
+
print(index_A, A, index_B, B, P)
|
1680
|
+
FuncStars(
|
1681
|
+
ax=ax,
|
1682
|
+
x1=xticks_x_loc[index_A],
|
1683
|
+
x2=xticks_x_loc[index_B],
|
1684
|
+
pval=P,
|
1685
|
+
yscale=yscal_,
|
1686
|
+
**kwargs_funcstars,
|
1687
|
+
)
|
1688
|
+
if P <= 0.05:
|
1689
|
+
yscal_ -= 0.1
|