py2ls 0.1.9.1__py3-none-any.whl → 0.1.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/plot.py CHANGED
@@ -10,6 +10,7 @@ from cycler import cycler
10
10
  import logging
11
11
  import os
12
12
  from .ips import fsave, fload, mkdir
13
+ from .stats import *
13
14
 
14
15
  # Suppress INFO messages from fontTools
15
16
  logging.getLogger("fontTools").setLevel(logging.WARNING)
@@ -220,8 +221,11 @@ def catplot(data, *args, **kwargs):
220
221
  # MeanLine or MedianLine only keep only one
221
222
  if bx_opt["MeanLine"]: # MeanLine has priority
222
223
  bx_opt["MedianLine"] = False
224
+ # rm NaNs
225
+ cleaned_data = [data[~np.isnan(data[:, i]), i] for i in range(data.shape[1])]
226
+
223
227
  bxp = ax.boxplot(
224
- data,
228
+ cleaned_data,
225
229
  positions=X_bx,
226
230
  notch=bx_opt["Notch"],
227
231
  patch_artist=True,
@@ -463,6 +467,9 @@ def catplot(data, *args, **kwargs):
463
467
  # custom_order = ['s', 'bx', 'e']
464
468
  # full_order = sort_catplot_layers(custom_order)
465
469
 
470
+ ax = kwargs.get("ax", None)
471
+ if "ax" not in locals() or ax is None:
472
+ ax = plt.gca()
466
473
  col = kwargs.get("col", None)
467
474
  if not col:
468
475
  # figsets
@@ -488,7 +495,41 @@ def catplot(data, *args, **kwargs):
488
495
  default_x_width = 0.85
489
496
  legend_hue = df[hue].unique().tolist()
490
497
  default_colors = get_color(hue_len)
498
+
499
+ # ! stats info
500
+ stats_param = kwargs.get("stats", False)
501
+ res = pd.DataFrame() # Initialize an empty DataFrame to store results
502
+ ihue = 1
503
+ for i in df[x].unique().tolist():
504
+ print(i) # to indicate which 'x'
505
+ if hue and stats_param:
506
+ if isinstance(stats_param, dict):
507
+ if "factor" in stats_param.keys():
508
+ res_tmp = FuncMultiCmpt(data=df, dv=y, **stats_param)
509
+ else:
510
+ res_tmp = FuncMultiCmpt(
511
+ data=df[df[x] == i], dv=y, factor=hue, **stats_param
512
+ )
513
+ elif bool(stats_param):
514
+ res_tmp = FuncMultiCmpt(data=df, dv=y, factor=hue)
515
+ else:
516
+ res_tmp = "did not work properly"
517
+ display_output(res_tmp)
518
+ xloc_curr = hue_len * (ihue - 1)
519
+ # add_asterisks(ax,res_tmp,xticks[xloc_curr:xloc_curr+hue_len],legend_hue)
520
+ # res_tmp = [{"x": i, **res_tmp}]
521
+ # print("here")
522
+ # df_=pd.DataFrame([res_tmp])
523
+ # display(df_['pval'][0].tolist()[0])
524
+ res = pd.concat(
525
+ [res, pd.DataFrame([res_tmp])], ignore_index=True
526
+ )
527
+ ihue += 1
528
+ display_output(res)
529
+
491
530
  else:
531
+ # ! stats info
532
+ stats_param = kwargs.get("stats", False)
492
533
  for i in df[x].unique().tolist():
493
534
  xticklabels.append(i)
494
535
  xticks = np.arange(1, len(xticklabels) + 1).tolist()
@@ -496,9 +537,17 @@ def catplot(data, *args, **kwargs):
496
537
  legend_hue = xticklabels
497
538
  default_colors = get_color(len(xticklabels))
498
539
  default_x_width = 0.5
540
+ res = None
541
+ if x and stats_param:
542
+ if isinstance(stats_param, dict):
543
+ res = FuncMultiCmpt(data=df, dv=y, factor=x, **stats_param)
544
+ elif bool(stats_param):
545
+ res = FuncMultiCmpt(data=df, dv=y, factor=x)
546
+ else:
547
+ res = "did not work properly"
548
+ display_output(res)
499
549
 
500
550
  # when the xticklabels are too long, rotate the labels a bit
501
-
502
551
  xangle = 30 if max([len(i) for i in xticklabels]) > 50 else 0
503
552
  if kw_figsets is not None:
504
553
  kw_figsets = {
@@ -526,9 +575,23 @@ def catplot(data, *args, **kwargs):
526
575
 
527
576
  # full_order
528
577
  opt = kwargs.get("opt", {})
529
- ax = kwargs.get("ax", None)
530
- if "ax" not in locals() or ax is None:
531
- ax = plt.gca()
578
+
579
+ # load style:
580
+ style_use = None
581
+ for k, v in kwargs.items():
582
+ if "style" in k and "exp" not in k:
583
+ style_use = v
584
+ break
585
+ if style_use:
586
+ try:
587
+ dir_curr_script = os.path.dirname(os.path.abspath(__file__))
588
+ dir_style = dir_curr_script + "/data/styles/"
589
+ style_load = fload(dir_style + style_use + ".json")
590
+ style_load = remove_colors_in_dict(style_load)
591
+ opt.update(style_load)
592
+ except:
593
+ print(f"cannot find the style'{style_name}'")
594
+
532
595
  opt.setdefault("c", default_colors)
533
596
  # if len(opt["c"]) < data.shape[1]:
534
597
  # additional_colors = plt.cm.winter(
@@ -654,16 +717,6 @@ def catplot(data, *args, **kwargs):
654
717
  opt["v"].setdefault("NumPoints", 500)
655
718
  opt["v"].setdefault("BoundaryCorrection", "reflection")
656
719
 
657
- # load style:
658
- style_use = kwargs.get("style_use", None)
659
- if style_use:
660
- try:
661
- dir_curr_script = os.path.dirname(os.path.abspath(__file__))
662
- dir_style = dir_curr_script + "/data/styles/"
663
- style_load = fload(dir_style + style_use + ".json")
664
- opt.update(style_load)
665
- except:
666
- print(f"cannot find the style'{style_name}'")
667
720
  data_m = np.nanmean(data, axis=0)
668
721
  nr, nc = data.shape
669
722
 
@@ -692,7 +745,6 @@ def catplot(data, *args, **kwargs):
692
745
  legend_which = "v"
693
746
  else:
694
747
  legend_which = None
695
-
696
748
  for layer in layers:
697
749
  if layer == "b" and opt["b"]["go"]:
698
750
  if legend_which == "b":
@@ -721,19 +773,47 @@ def catplot(data, *args, **kwargs):
721
773
  plot_violin(data, opt["v"], xloc, ax, label=None)
722
774
  elif all([layer == "l", opt["l"]["go"], opt["s"]["go"]]):
723
775
  plot_lines(data, opt["l"], opt["s"], ax)
724
- else:
725
- print("layers run some problems")
776
+
726
777
  if kw_figsets is not None:
727
778
  figsets(ax=ax, **kw_figsets)
728
779
  show_legend = kwargs.get("show_legend", True)
729
780
  if show_legend:
730
781
  ax.legend()
731
782
 
783
+ # ! add asterisks in the plot
784
+ if stats_param:
785
+ if hue is None:
786
+ display(res)
787
+ add_asterisks(ax, res, xticks_x_loc, xticklabels)
788
+ else: # hue is not None
789
+ ihue = 1
790
+ for i in df[x].unique().tolist():
791
+ if hue and stats_param:
792
+ if isinstance(stats_param, dict):
793
+ if "factor" in stats_param.keys():
794
+ res_tmp = FuncMultiCmpt(data=df, dv=y, **stats_param)
795
+ else:
796
+ res_tmp = FuncMultiCmpt(
797
+ data=df[df[x] == i], dv=y, factor=hue, **stats_param
798
+ )
799
+ elif bool(stats_param):
800
+ res_tmp = FuncMultiCmpt(data=df, dv=y, factor=hue)
801
+ else:
802
+ res_tmp = "did not work properly"
803
+ xloc_curr = hue_len * (ihue - 1)
804
+ add_asterisks(
805
+ ax,
806
+ res_tmp,
807
+ xticks[xloc_curr : xloc_curr + hue_len],
808
+ legend_hue,
809
+ )
810
+ ihue += 1
732
811
  style_export = kwargs.get("style_export", None)
733
812
  if style_export and (style_export != style_use):
734
813
  dir_curr_script = os.path.dirname(os.path.abspath(__file__))
735
814
  dir_style = dir_curr_script + "/data/styles/"
736
815
  fsave(dir_style + style_export + ".json", opt)
816
+
737
817
  return ax, opt
738
818
  else:
739
819
  col_names = data[col].unique().tolist()
@@ -750,7 +830,10 @@ def catplot(data, *args, **kwargs):
750
830
  if i < len(col_names):
751
831
  df_sub = data.loc[data[col] == col_names[i]]
752
832
  _, opt = catplot(ax=ax, data=df_sub, **kwargs)
753
- ax.set_title(col_names[i])
833
+ ax.set_title(f"{col}={col_names[i]}")
834
+ x_label = kwargs.get("x", None)
835
+ if x_label:
836
+ ax.set_xlabel(x_label)
754
837
  print(f"Axis layout shape: {axs.shape}")
755
838
  return axs, opt
756
839
 
@@ -1530,175 +1613,6 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
1530
1613
  return fig.colorbar(im, cax=cax, **kwargs) # draw cbar
1531
1614
 
1532
1615
 
1533
- # def padcat(*args, fill_value=np.nan, axis=1):
1534
- # """
1535
- # Concatenate vectors with padding.
1536
-
1537
- # Parameters:
1538
- # *args : variable number of list or 1D arrays
1539
- # Input arrays to concatenate.
1540
- # fill_value : scalar, optional
1541
- # The value to use for padding the shorter lists (default is np.nan).
1542
- # axis : int, optional
1543
- # The axis along which to concatenate (0 for rows, 1 for columns, default is 0).
1544
-
1545
- # Returns:
1546
- # np.ndarray
1547
- # A 2D array with the input arrays concatenated along the specified axis, padded with fill_value where necessary.
1548
- # """
1549
- # if axis == 0:
1550
- # # Concatenate along rows
1551
- # max_len = max(len(lst) for lst in args)
1552
- # result = np.full((len(args), max_len), fill_value)
1553
- # for i, lst in enumerate(args):
1554
- # result[i, : len(lst)] = lst
1555
- # elif axis == 1:
1556
- # # Concatenate along columns
1557
- # max_len = max(len(lst) for lst in args)
1558
- # result = np.full((max_len, len(args)), fill_value)
1559
- # for i, lst in enumerate(args):
1560
- # result[: len(lst), i] = lst
1561
- # else:
1562
- # raise ValueError("axis must be 0 or 1")
1563
-
1564
- # return result
1565
- import numpy as np
1566
-
1567
-
1568
- def padcat(*args, fill_value=np.nan, axis=1, order="row"):
1569
- """
1570
- Concatenate vectors with padding.
1571
-
1572
- Parameters:
1573
- *args : variable number of list or 1D arrays
1574
- Input arrays to concatenate.
1575
- fill_value : scalar, optional
1576
- The value to use for padding the shorter lists (default is np.nan).
1577
- axis : int, optional
1578
- The axis along which to concatenate (0 for rows, 1 for columns, default is 1).
1579
- order : str, optional
1580
- The order for flattening when required: "row" or "column" (default is "row").
1581
-
1582
- Returns:
1583
- np.ndarray
1584
- A 2D array with the input arrays concatenated along the specified axis,
1585
- padded with fill_value where necessary.
1586
- """
1587
- # Set the order for processing
1588
- if "ro" in order.lower():
1589
- order = "C" # row-major order
1590
- else:
1591
- order = "F" # column-major order
1592
-
1593
- # Process input arrays based on their dimensions
1594
- processed_arrays = []
1595
- for arg in args:
1596
- arr = np.asarray(arg)
1597
- if arr.ndim == 1:
1598
- processed_arrays.append(arr) # Keep 1D arrays as is
1599
- elif arr.ndim == 2:
1600
- if axis == 0:
1601
- # If concatenating along rows, split 2D arrays into 1D arrays row-wise
1602
- processed_arrays.extend(arr)
1603
- elif axis == 1:
1604
- # If concatenating along columns, split 2D arrays into 1D arrays column-wise
1605
- processed_arrays.extend(arr.T)
1606
- else:
1607
- raise ValueError("axis must be 0 or 1")
1608
- else:
1609
- raise ValueError("Input arrays must be 1D or 2D")
1610
-
1611
- if axis == 0:
1612
- # Concatenate along rows
1613
- max_len = max(arr.size for arr in processed_arrays)
1614
- result = np.full((len(processed_arrays), max_len), fill_value)
1615
- for i, arr in enumerate(processed_arrays):
1616
- result[i, : arr.size] = arr
1617
- elif axis == 1:
1618
- # Concatenate along columns
1619
- max_len = max(arr.size for arr in processed_arrays)
1620
- result = np.full((max_len, len(processed_arrays)), fill_value)
1621
- for i, arr in enumerate(processed_arrays):
1622
- result[: arr.size, i] = arr
1623
- else:
1624
- raise ValueError("axis must be 0 or 1")
1625
-
1626
- return result
1627
-
1628
-
1629
- # # Example usage:
1630
- # a = [1, np.nan]
1631
- # b = [1, 3, 4, np.nan, 2, np.nan]
1632
- # c = [1, 2, 3, 4, 5, 6, 7, 8, 10]
1633
- # d = padcat(a, b)
1634
- # result1 = padcat(d, c)
1635
- # result2 = padcat(a, b, c)
1636
- # print("Result of padcat(d, c):\n", result1)
1637
- # print("Result of padcat(a, b, c):\n", result2)
1638
-
1639
-
1640
- def sort_rows_move_nan(arr, sort=False):
1641
- # Handle edge cases where all values are NaN
1642
- if np.all(np.isnan(arr)):
1643
- return arr # Return unchanged if the entire array is NaN
1644
-
1645
- if sort:
1646
- # Replace NaNs with a temporary large value for sorting
1647
- temp_value = (
1648
- np.nanmax(arr[np.isfinite(arr)]) + 1 if np.any(np.isfinite(arr)) else np.inf
1649
- )
1650
- arr_no_nan = np.where(np.isnan(arr), temp_value, arr)
1651
-
1652
- # Sort each row
1653
- sorted_arr = np.sort(arr_no_nan, axis=1)
1654
-
1655
- # Move NaNs to the end
1656
- result_arr = np.where(sorted_arr == temp_value, np.nan, sorted_arr)
1657
- else:
1658
- result_rows = []
1659
- for row in arr:
1660
- # Separate non-NaN and NaN values
1661
- non_nan_values = row[~np.isnan(row)]
1662
- nan_count = np.isnan(row).sum()
1663
- # Create a new row with non-NaN values followed by NaNs
1664
- new_row = np.concatenate([non_nan_values, [np.nan] * nan_count])
1665
- result_rows.append(new_row)
1666
- # Convert the list of rows back into a 2D NumPy array
1667
- result_arr = np.array(result_rows)
1668
-
1669
- # Remove rows/columns that contain only NaNs
1670
- clean_arr = result_arr[~np.isnan(result_arr).all(axis=1)]
1671
- clean_arr_ = clean_arr[:, ~np.isnan(clean_arr).all(axis=0)]
1672
-
1673
- return clean_arr_
1674
-
1675
-
1676
- def df2array(data: pd.DataFrame, x, y, hue=None, sort=False):
1677
- if hue is None:
1678
- a = []
1679
- if sort:
1680
- np.sort(data[x].unique().tolist()).tolist()
1681
- else:
1682
- cat_x = data[x].unique().tolist()
1683
- for i, x_ in enumerate(cat_x):
1684
- new_ = data.loc[data[x] == x_, y].to_list()
1685
- a = padcat(a, new_, axis=0)
1686
- return sort_rows_move_nan(a).T
1687
- else:
1688
- a = []
1689
- if sort:
1690
- cat_x = np.sort(data[x].unique().tolist()).tolist()
1691
- cat_hue = np.sort(data[hue].unique().tolist()).tolist()
1692
- else:
1693
- cat_x = data[x].unique().tolist()
1694
- cat_hue = data[hue].unique().tolist()
1695
- for i, x_ in enumerate(cat_x):
1696
- for j, hue_ in enumerate(cat_hue):
1697
- new_ = data.loc[(data[x] == x_) & (data[hue] == hue_), y].to_list()
1698
- a = padcat(a, new_, axis=0)
1699
- return sort_rows_move_nan(a).T
1700
-
1701
-
1702
1616
  def generate_xticks_with_gap(x_len, hue_len):
1703
1617
  """
1704
1618
  Generate a concatenated array based on x_len and hue_len,
@@ -1728,3 +1642,48 @@ def generate_xticks_x_labels(x_len, hue_len):
1728
1642
  for i in range(max(x_len, hue_len), 0, -1) # i iterates from 3 to 1
1729
1643
  ]
1730
1644
  return [np.mean(i) for i in arrays if np.mean(i) > 0]
1645
+
1646
+
1647
+ def remove_colors_in_dict(
1648
+ data: dict, sections_to_remove_facecolor=["b", "e", "s", "bx", "v"]
1649
+ ):
1650
+ # Remove "FaceColor" from specified sections
1651
+ for section in sections_to_remove_facecolor:
1652
+ if section in data and ("FaceColor" in data[section]):
1653
+ del data[section]["FaceColor"]
1654
+
1655
+ if "c" in data:
1656
+ del data["c"]
1657
+ if "loc" in data:
1658
+ del data["loc"]
1659
+ return data
1660
+
1661
+
1662
+ def add_asterisks(ax, res, xticks_x_loc, xticklabels, **kwargs_funcstars):
1663
+ pval_groups = res["res_tab"]["PR(>F)"].tolist()[0]
1664
+ # print(f"p=:{pval_groups}")
1665
+ # print(f"xticks:{xticks}")
1666
+ # print(f"xticks_x_loc:{xticks_x_loc}")
1667
+ if pval_groups <= 0.05:
1668
+ A_list = res["res_posthoc"]["A"].tolist()
1669
+ B_list = res["res_posthoc"]["B"].tolist()
1670
+ xticklabels_array = np.array(xticklabels)
1671
+ yscal_ = 0.99
1672
+ for A, B, P in zip(
1673
+ res["res_posthoc"]["A"].tolist(),
1674
+ res["res_posthoc"]["B"].tolist(),
1675
+ res["res_posthoc"]["p-unc"].tolist(),
1676
+ ):
1677
+ index_A = np.where(xticklabels_array == A)[0][0]
1678
+ index_B = np.where(xticklabels_array == B)[0][0]
1679
+ print(index_A, A, index_B, B, P)
1680
+ FuncStars(
1681
+ ax=ax,
1682
+ x1=xticks_x_loc[index_A],
1683
+ x2=xticks_x_loc[index_B],
1684
+ pval=P,
1685
+ yscale=yscal_,
1686
+ **kwargs_funcstars,
1687
+ )
1688
+ if P <= 0.05:
1689
+ yscal_ -= 0.1