py2ls 0.1.4.9__py3-none-any.whl → 0.1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py CHANGED
@@ -1,8 +1,3 @@
1
- from scipy.ndimage import convolve1d
2
- from scipy.signal import savgol_filter
3
- import pingouin as pg
4
- from scipy import stats
5
-
6
1
  import numpy as np
7
2
  import pandas as pd
8
3
 
@@ -11,7 +6,7 @@ import matplotlib
11
6
  import matplotlib.pyplot as plt
12
7
  import matplotlib.ticker as tck
13
8
  from mpl_toolkits.mplot3d import Axes3D
14
- import seaborn as sns
9
+ # import seaborn as sns
15
10
 
16
11
  import sys, os,shutil,re, yaml,json
17
12
  from cycler import cycler
@@ -1483,120 +1478,6 @@ def figsave(*args,dpi=300):
1483
1478
  print(f'\nSaved @: dpi={dpi}\n{fname}')
1484
1479
 
1485
1480
 
1486
- # ==============FuncStars(ax,x1=1,x2=2, yscale=0.9, pval=0.01)====================================================
1487
- # Usage:
1488
- # FuncStars(ax, x1=2, x2=3, yscale=0.99, pval=0.02)
1489
- # =============================================================================
1490
-
1491
- # FuncStars --v 0.1.1
1492
- def FuncStars(ax,
1493
- pval=None,
1494
- Ylim=None,
1495
- Xlim=None,
1496
- symbol='*',
1497
- yscale=0.95,
1498
- x1=0,
1499
- x2=1,
1500
- alpha=0.05,
1501
- fontsize=14,
1502
- fontsize_note=6,
1503
- rotation=0,
1504
- fontname='Arial',
1505
- values_below=None,
1506
- linego=True,
1507
- linestyle='-',
1508
- linecolor='k',
1509
- linewidth=.8,
1510
- nsshow='off',
1511
- symbolcolor='k',
1512
- tailindicator=[0.06, 0.06],
1513
- report=None,
1514
- report_scale=-0.1,
1515
- report_loc=None):
1516
- if ax is None:
1517
- ax = plt.gca()
1518
- if Ylim is None:
1519
- Ylim = plt.gca().get_ylim()
1520
- if Xlim is None:
1521
- Xlim = ax.get_xlim()
1522
- if report_loc is None and report is not None:
1523
- report_loc = np.min(Ylim) + report_scale*np.abs(np.diff(Ylim))
1524
- if report_scale > 0:
1525
- report_scale = -np.abs(report_scale)
1526
- yscale = np.float64(yscale)
1527
- y_loc = np.min(Ylim) + yscale*(np.max(Ylim)-np.min(Ylim))
1528
- xcenter = np.mean([x1, x2])
1529
- # ns / *
1530
- if alpha < pval:
1531
- if nsshow == 'on':
1532
- ns_str = f'p={round(pval, 3)}' if pval < 0.9 else 'ns'
1533
- color = 'm' if pval < 0.1 else 'k'
1534
- plt.text(xcenter, y_loc, ns_str,
1535
- ha='center', va='bottom', # 'center_baseline',
1536
- fontsize=fontsize-6 if fontsize > 6 else fontsize,
1537
- fontname=fontname, color=color, rotation=rotation
1538
- # bbox=dict(facecolor=None, edgecolor=None, color=None, linewidth=None)
1539
- )
1540
- elif 0.01 < pval <= alpha:
1541
- plt.text(xcenter, y_loc, symbol,
1542
- ha='center', va='center_baseline',
1543
- fontsize=fontsize, fontname=fontname, color=symbolcolor)
1544
- elif 0.001 < pval <= 0.01:
1545
- plt.text(xcenter, y_loc, symbol * 2,
1546
- ha='center', va='center_baseline',
1547
- fontsize=fontsize, fontname=fontname, color=symbolcolor)
1548
- elif 0 < pval <= 0.001:
1549
- plt.text(xcenter, y_loc, symbol * 3,
1550
- ha='center', va='center_baseline',
1551
- fontsize=fontsize, fontname=fontname, color=symbolcolor)
1552
- # lines indicators
1553
- if linego: # and 0 < pval <= 0.05:
1554
- print(pval)
1555
- print(linego)
1556
- # horizontal line
1557
- if yscale < 0.99:
1558
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1559
- x2 - np.abs(np.diff(Xlim)) * 0.01],
1560
- [y_loc - np.abs(np.diff(Ylim)) * .03,
1561
- y_loc - np.abs(np.diff(Ylim)) * .03],
1562
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1563
- # vertical line
1564
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1565
- x1 + np.abs(np.diff(Xlim)) * 0.01],
1566
- [y_loc - np.abs(np.diff(Ylim)) * tailindicator[0],
1567
- y_loc - np.abs(np.diff(Ylim)) * .03],
1568
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1569
- plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
1570
- x2 - np.abs(np.diff(Xlim)) * 0.01],
1571
- [y_loc - np.abs(np.diff(Ylim)) * tailindicator[1],
1572
- y_loc - np.abs(np.diff(Ylim)) * .03],
1573
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1574
- else:
1575
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1576
- x2 - np.abs(np.diff(Xlim)) * 0.01],
1577
- [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002,
1578
- np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
1579
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1580
- # vertical line
1581
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1582
- x1 + np.abs(np.diff(Xlim)) * 0.01],
1583
- [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[0],
1584
- np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
1585
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1586
- plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
1587
- x2 - np.abs(np.diff(Xlim)) * 0.01],
1588
- [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[1],
1589
- np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
1590
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1591
- if values_below is not None:
1592
- plt.text(xcenter, y_loc * (-0.1), values_below,
1593
- ha='center', va='bottom', # 'center_baseline', rotation=rotation,
1594
- fontsize=fontsize_note, fontname=fontname, color='k')
1595
- # report / comments
1596
- if report is not None:
1597
- plt.text(xcenter, report_loc, report,
1598
- ha='left', va='bottom', # 'center_baseline', rotation=rotation,
1599
- fontsize=fontsize_note, fontname=fontname, color='.7')
1600
1481
  def is_str_color(s):
1601
1482
  # Regular expression pattern for hexadecimal color codes
1602
1483
  color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
@@ -1643,163 +1524,6 @@ def is_zip(fpath):
1643
1524
  return True
1644
1525
  else:
1645
1526
  return False
1646
-
1647
- def stdshade(ax=None,*args, **kwargs):
1648
- if (
1649
- isinstance(ax, np.ndarray)
1650
- and ax.ndim == 2
1651
- and min(ax.shape) > 1
1652
- and max(ax.shape) > 1
1653
- ):
1654
- y = ax
1655
- ax = plt.gca()
1656
- if ax is None:
1657
- ax = plt.gca()
1658
- alpha = 0.5
1659
- acolor = "k"
1660
- paraStdSem = "sem"
1661
- plotStyle = "-"
1662
- plotMarker = "none"
1663
- smth = 1
1664
- l_c_one = ["r", "g", "b", "m", "c", "y", "k", "w"]
1665
- l_style2 = ["--", "-."]
1666
- l_style1 = ["-", ":"]
1667
- l_mark = ["o", "+", "*", ".", "x", "_", "|", "s", "d", "^", "v", ">", "<", "p", "h"]
1668
- # Check each argument
1669
- for iarg in range(len(args)):
1670
- if (
1671
- isinstance(args[iarg], np.ndarray)
1672
- and args[iarg].ndim == 2
1673
- and min(args[iarg].shape) > 1
1674
- and max(args[iarg].shape) > 1
1675
- ):
1676
- y = args[iarg]
1677
- # Except y, continuous data is 'F'
1678
- if (isinstance(args[iarg], np.ndarray) and args[iarg].ndim == 1) or isinstance(
1679
- args[iarg], range
1680
- ):
1681
- x = args[iarg]
1682
- if isinstance(x, range):
1683
- x = np.arange(start=x.start, stop=x.stop, step=x.step)
1684
- # Only one number( 0~1), 'alpha' / color
1685
- if isinstance(args[iarg], (int, float)):
1686
- if np.size(args[iarg]) == 1 and 0 <= args[iarg] <= 1:
1687
- alpha = args[iarg]
1688
- if isinstance(args[iarg], (list, tuple)) and np.size(args[iarg]) == 3:
1689
- acolor = args[iarg]
1690
- acolor = tuple(acolor) if isinstance(acolor, list) else acolor
1691
- # Color / plotStyle /
1692
- if (
1693
- isinstance(args[iarg], str)
1694
- and len(args[iarg]) == 1
1695
- and args[iarg] in l_c_one
1696
- ):
1697
- acolor = args[iarg]
1698
- else:
1699
- if isinstance(args[iarg], str):
1700
- if args[iarg] in ["sem", "std"]:
1701
- paraStdSem = args[iarg]
1702
- if args[iarg].startswith("#"):
1703
- acolor=hue2rgb(args[iarg])
1704
- if str2list(args[iarg])[0] in l_c_one:
1705
- if len(args[iarg]) == 3:
1706
- k = [i for i in str2list(args[iarg]) if i in l_c_one]
1707
- if k != []:
1708
- acolor = k[0]
1709
- st = [i for i in l_style2 if i in args[iarg]]
1710
- if st != []:
1711
- plotStyle = st[0]
1712
- elif len(args[iarg]) == 2:
1713
- k = [i for i in str2list(args[iarg]) if i in l_c_one]
1714
- if k != []:
1715
- acolor = k[0]
1716
- mk = [i for i in str2list(args[iarg]) if i in l_mark]
1717
- if mk != []:
1718
- plotMarker = mk[0]
1719
- st = [i for i in l_style1 if i in args[iarg]]
1720
- if st != []:
1721
- plotStyle = st[0]
1722
- if len(args[iarg]) == 1:
1723
- k = [i for i in str2list(args[iarg]) if i in l_c_one]
1724
- if k != []:
1725
- acolor = k[0]
1726
- mk = [i for i in str2list(args[iarg]) if i in l_mark]
1727
- if mk != []:
1728
- plotMarker = mk[0]
1729
- st = [i for i in l_style1 if i in args[iarg]]
1730
- if st != []:
1731
- plotStyle = st[0]
1732
- if len(args[iarg]) == 2:
1733
- st = [i for i in l_style2 if i in args[iarg]]
1734
- if st != []:
1735
- plotStyle = st[0]
1736
- # smth
1737
- if (
1738
- isinstance(args[iarg], (int, float))
1739
- and np.size(args[iarg]) == 1
1740
- and args[iarg] >= 1
1741
- ):
1742
- smth = args[iarg]
1743
-
1744
- if "x" not in locals() or x is None:
1745
- x = np.arange(1, y.shape[1] + 1)
1746
- elif len(x) < y.shape[1]:
1747
- y = y[:, x]
1748
- nRow = y.shape[0]
1749
- nCol = y.shape[1]
1750
- print(f"y was corrected, please confirm that {nRow} row, {nCol} col")
1751
- else:
1752
- x = np.arange(1, y.shape[1] + 1)
1753
-
1754
- if x.shape[0] != 1:
1755
- x = x.T
1756
- yMean = np.nanmean(y, axis=0)
1757
- if smth > 1:
1758
- yMean = savgol_filter(np.nanmean(y, axis=0), smth, 1)
1759
- else:
1760
- yMean = np.nanmean(y, axis=0)
1761
- if paraStdSem == "sem":
1762
- if smth > 1:
1763
- wings = savgol_filter(np.nanstd(y, axis=0) / np.sqrt(y.shape[0]), smth, 1)
1764
- else:
1765
- wings = np.nanstd(y, axis=0) / np.sqrt(y.shape[0])
1766
- elif paraStdSem == "std":
1767
- if smth > 1:
1768
- wings = savgol_filter(np.nanstd(y, axis=0), smth, 1)
1769
- else:
1770
- wings = np.nanstd(y, axis=0)
1771
-
1772
- fill_kws = kwargs.get('fill_kws', {})
1773
- line_kws = kwargs.get('line_kws', {})
1774
- fill = ax.fill_between(x, yMean + wings, yMean - wings, color=acolor, alpha=alpha, lw=0,**fill_kws)
1775
- if line_kws != {} and not any(key.lower() in ['lw', 'linewidth'] for key in line_kws.keys()):
1776
- line = ax.plot(x, yMean, color=acolor, lw=1.5, ls=plotStyle, marker=plotMarker, **line_kws)
1777
- else:
1778
- line = ax.plot(x, yMean, color=acolor, ls=plotStyle, marker=plotMarker, **line_kws)
1779
- return line[0], fill
1780
- # =============================================================================
1781
- # # for plot figures {Qiu et al.2023}
1782
- # =============================================================================
1783
- # =============================================================================
1784
- # plt.rcParams.update({'figure.max_open_warning': 0})
1785
- # # Output matplotlib figure to SVG with text as text, not curves
1786
- # plt.rcParams['svg.fonttype'] = 'none'
1787
- # plt.rcParams['pdf.fonttype'] = 42
1788
- #
1789
- # plt.rc('text', usetex=False)
1790
- # # plt.style.use('ggplot')
1791
- # plt.style.use('science')
1792
- # plt.rc('font', family='serif')
1793
- # plt.rcParams.update({
1794
- # "font.family": "serif", # specify font family here
1795
- # "font.serif": ["Arial"], # specify font here
1796
- # "font.size": 11})
1797
- # # plt.tight_layout()
1798
- # =============================================================================
1799
- # =============================================================================
1800
- # # axis spine
1801
- # # use it like: adjust_spines(ax, ['left', 'bottom'])
1802
- # =============================================================================
1803
1527
 
1804
1528
 
1805
1529
  def adjust_spines(ax=None, spines=['left', 'bottom'],distance=2):
@@ -1842,688 +1566,6 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
1842
1566
  # =============================================================================
1843
1567
 
1844
1568
 
1845
- def FuncCmpt(X1, X2, pmc='auto', pair='unpaired'):
1846
- # output = {}
1847
-
1848
- # pmc correction: 'parametric'/'non-parametric'/'auto'
1849
- # meawhile get the opposite setting (to compare the results)
1850
- def corr_pmc(pmc):
1851
- cfg_pmc = None
1852
- if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'npmc', 'nonparametric', 'non-parametric'}:
1853
- cfg_pmc = 'parametric'
1854
- elif pmc.lower() in {'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
1855
- cfg_pmc = 'non-parametric'
1856
- else:
1857
- cfg_pmc = 'auto'
1858
- return cfg_pmc
1859
-
1860
- def corr_pair(pair):
1861
- cfg_pair = None
1862
- if 'pa' in pair.lower() and 'np' not in pair.lower():
1863
- cfg_pair = 'paired'
1864
- elif 'np' in pair.lower():
1865
- cfg_pair = 'unpaired'
1866
- return cfg_pair
1867
-
1868
- def check_normality(data):
1869
- stat_shapiro, pval_shapiro = stats.shapiro(data)
1870
- if pval_shapiro > 0.05:
1871
- Normality = True
1872
- else:
1873
- Normality = False
1874
- print(f'\n normally distributed\n') if Normality else print(
1875
- f'\n NOT normally distributed\n')
1876
- return Normality
1877
-
1878
- def sub_cmpt_2group(X1, X2, cfg_pmc='pmc', pair='unpaired'):
1879
- output = {}
1880
- nX1 = np.sum(~np.isnan(X1))
1881
- nX2 = np.sum(~np.isnan(X2))
1882
- if cfg_pmc == 'parametric' or cfg_pmc == 'auto':
1883
- # VarType correction by checking variance Type via "levene"
1884
- stat_lev, pval_lev = stats.levene(
1885
- X1, X2, center='median', proportiontocut=0.05)
1886
- VarType = True if pval_lev > 0.05 and nX1 == nX2 else False
1887
-
1888
- if 'np' in pair: # 'unpaired'
1889
- if VarType and Normality:
1890
- # The independent t-test requires that the dependent variable is approximately normally
1891
- # distributed within each group
1892
- # Note: Technically, it is the residuals that need to be normally distributed, but for
1893
- # an independent t-test, both will give you the same result.
1894
- stat_value, pval= stats.ttest_ind(
1895
- X1, X2, axis=0, equal_var=True, nan_policy='omit', alternative='two-sided')
1896
- notes_stat = 'unpaired t test'
1897
- notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
1898
- else:
1899
- # If the Levene's Test for Equality of Variances is statistically significant,
1900
- # which indicates that the group variances are unequal in the population, you
1901
- # can correct for this violation by not using the pooled estimate for the error
1902
- # term for the t-statistic, but instead using an adjustment to the degrees of
1903
- # freedom using the Welch-Satterthwaite method
1904
- stat_value, pval= stats.ttest_ind(
1905
- X1, X2, axis=0, equal_var=False, nan_policy='omit', alternative='two-sided')
1906
- notes_stat = 'Welchs t-test'
1907
- # note: APA FORMAT
1908
- notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
1909
- elif 'pa' in pair and 'np' not in pair: # 'paired'
1910
- # the paired-samples t-test is considered “robust” in handling violations of normality
1911
- # to some extent. It can still yield valid results even if the data is not normally
1912
- # distributed. Therefore, this test typically requires only approximately normal data
1913
- stat_value, pval= stats.ttest_rel(
1914
- X1, X2, axis=0, nan_policy='omit', alternative='two-sided')
1915
- notes_stat = 'paired t test'
1916
- # note: APA FORMAT
1917
- notes_APA = f't({sum([nX1-1])})={round(stat_value, 5)},p={round(pval, 5)}'
1918
- elif cfg_pmc == 'non-parametric':
1919
- if 'np' in pair: # Perform Mann-Whitney
1920
- stat_value, pval = stats.mannwhitneyu(
1921
- X1, X2, method='exact', nan_policy='omit')
1922
- notes_stat = 'Mann-Whitney U'
1923
- if nX1 == nX2:
1924
- notes_APA = f'U(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
1925
- else:
1926
- notes_APA = f'U(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
1927
- elif 'pa' in pair and 'np' not in pair: # Wilcoxon signed-rank test
1928
- stat_value, pval = stats.wilcoxon(
1929
- X1, X2, method='exact', nan_policy='omit')
1930
- notes_stat = 'Wilcoxon signed-rank'
1931
- if nX1 == nX2:
1932
- notes_APA = f'Z(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
1933
- else:
1934
- notes_APA = f'Z(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
1935
-
1936
- # filling output
1937
- output['stat'] = stat_value
1938
- output['pval'] = pval
1939
- output['method'] = notes_stat
1940
- output['APA'] = notes_APA
1941
-
1942
- print(f"{output['method']}\n {notes_APA}\n\n")
1943
-
1944
- return output, pval
1945
-
1946
- Normality1 = check_normality(X1)
1947
- Normality2 = check_normality(X2)
1948
- Normality = True if all([Normality1, Normality2]) else False
1949
-
1950
- nX1 = np.sum(~np.isnan(X1))
1951
- nX2 = np.sum(~np.isnan(X2))
1952
-
1953
- cfg_pmc = corr_pmc(pmc)
1954
- cfg_pair = corr_pair(pair)
1955
-
1956
- output, p = sub_cmpt_2group(
1957
- X1, X2, cfg_pmc=cfg_pmc, pair=cfg_pair)
1958
- return p, output
1959
-
1960
- # ======compare 2 group test===================================================
1961
- # # Example
1962
- # X1 = [19, 22, 16, 29, 24]
1963
- # X2 = [20, 11, 17, 12, 22]
1964
-
1965
- # p, res= FuncCmpt(X1, X2, pmc='pmc', pair='unparrr')
1966
-
1967
- # =============================================================================
1968
-
1969
- # =============================================================================
1970
- # # method = ['anova', # 'One-way and N-way ANOVA',
1971
- # # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
1972
- # # 'mixed_anova', # 'Two way mixed ANOVA',
1973
- # # 'welch_anova', # 'One-way Welch ANOVA',
1974
- # # 'kruskal', # 'Non-parametric one-way ANOVA'
1975
- # # 'friedman', # Non-parametric one-way repeated measures ANOVA
1976
- # # ]
1977
- # =============================================================================
1978
-
1979
-
1980
- # =============================================================================
1981
- # # method = ['anova', # 'One-way and N-way ANOVA',
1982
- # # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
1983
- # # 'mixed_anova', # 'Two way mixed ANOVA',
1984
- # # 'welch_anova', # 'One-way Welch ANOVA',
1985
- # # 'kruskal', # 'Non-parametric one-way ANOVA'
1986
- # # 'friedman', # Non-parametric one-way repeated measures ANOVA
1987
- # # ]
1988
- # =============================================================================
1989
- def df_wide_long(df):
1990
- rows, columns = df.shape
1991
- if columns > rows:
1992
- return "Wide"
1993
- elif rows > columns:
1994
- return "Long"
1995
-
1996
- def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
1997
- ss_type=2, detailed=True, effsize='np2',
1998
- correction='auto', between=None, within=None,
1999
- subject=None, group=None
2000
- ):
2001
-
2002
- def corr_pair(pair):
2003
- cfg_pair = None
2004
- if 'pa' in pair.lower() and 'np' not in pair.lower():
2005
- cfg_pair = 'paired'
2006
- elif 'np' in pair.lower():
2007
- cfg_pair = 'unpaired'
2008
- elif 'mix' in pair.lower():
2009
- cfg_pair = 'mix'
2010
- return cfg_pair
2011
-
2012
- def check_normality(data):
2013
- stat_shapiro, pval_shapiro = stats.shapiro(data)
2014
- if pval_shapiro > 0.05:
2015
- Normality = True
2016
- else:
2017
- Normality = False
2018
- print(f'\n normally distributed\n') if Normality else print(
2019
- f'\n NOT normally distributed\n')
2020
- return Normality
2021
-
2022
- def corr_pmc(pmc):
2023
- cfg_pmc = None
2024
- if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'upmc', 'npmc', 'nonparametric', 'non-parametric'}:
2025
- cfg_pmc = 'parametric'
2026
- elif pmc.lower() in {'upmc', 'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
2027
- cfg_pmc = 'non-parametric'
2028
- else:
2029
- cfg_pmc = 'auto'
2030
- return cfg_pmc
2031
-
2032
- def extract_apa(res_tab):
2033
- notes_APA = []
2034
- if "ddof1" in res_tab:
2035
- for irow in range(res_tab.shape[0]):
2036
- note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
2037
- notes_APA.append([note_tmp])
2038
- elif "DF" in res_tab:
2039
- print(res_tab.shape[0])
2040
- for irow in range(res_tab.shape[0]-1):
2041
- note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
2042
- notes_APA.append([note_tmp])
2043
- notes_APA.append(['NaN'])
2044
- elif "DF1" in res_tab: # in 'mix' case
2045
- for irow in range(res_tab.shape[0]):
2046
- note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF1[irow]),round(res_tab.DF2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
2047
- notes_APA.append([note_tmp])
2048
- return notes_APA
2049
-
2050
- def anovatable(res_tab):
2051
- if 'df' in res_tab: # statsmodels
2052
- res_tab['mean_sq'] = res_tab[:]['sum_sq']/res_tab[:]['df']
2053
- res_tab['est_sq'] = res_tab[:-1]['sum_sq'] / \
2054
- sum(res_tab['sum_sq'])
2055
- res_tab['omega_sq'] = (res_tab[:-1]['sum_sq']-(res_tab[:-1]['df'] *
2056
- res_tab['mean_sq'][-1]))/(sum(res_tab['sum_sq'])+res_tab['mean_sq'][-1])
2057
- elif 'DF' in res_tab:
2058
- res_tab['MS'] = res_tab[:]['SS']/res_tab[:]['DF']
2059
- res_tab['est_sq'] = res_tab[:-1]['SS']/sum(res_tab['SS'])
2060
- res_tab['omega_sq'] = (res_tab[:-1]['SS']-(res_tab[:-1]['DF'] *
2061
- res_tab['MS'][1]))/(sum(res_tab['SS'])+res_tab['MS'][1])
2062
- if 'p-unc' in res_tab:
2063
- if 'np2' in res_tab:
2064
- res_tab['est_sq'] = res_tab['np2']
2065
- if 'p-unc' in res_tab:
2066
- res_tab['PR(>F)'] = res_tab['p-unc']
2067
- return res_tab
2068
-
2069
- def run_anova(data, dv, factor, ss_type=2, detailed=True, effsize='np2'):
2070
- # perform ANOVA
2071
- # =============================================================================
2072
- # # # ANOVA (input: formula, dataset)
2073
- # =============================================================================
2074
- # # note: if the data is balanced (equal sample size for each group), Type 1, 2, and 3 sums of squares
2075
- # # (typ parameter) will produce similar results.
2076
- # lm = ols("values ~ C(group)", data=df).fit()
2077
- # res_tab = anova_lm(lm, typ=ss_type)
2078
-
2079
- # # however, it does not provide any effect size measures to tell if the
2080
- # # statistical significance is meaningful. The function below calculates
2081
- # # eta-squared () and omega-squared (). A quick note, is the exact same
2082
- # # thing as except when coming from the ANOVA framework people call it ;
2083
- # # is considered a better measure of effect size since it is unbiased in
2084
- # # it's calculation by accounting for the degrees of freedom in the model.
2085
- # # note: No effect sizes are calculated when using statsmodels.
2086
- # # to calculate eta squared, use the sum of squares from the table
2087
- # res_tab = anovatable(res_tab)
2088
-
2089
- # =============================================================================
2090
- # # alternativ for ANOVA
2091
- # =============================================================================
2092
- res_tab = pg.anova(dv=dv, between=factor, data=data,
2093
- detailed=detailed, ss_type=ss_type, effsize=effsize)
2094
- res_tab = anovatable(res_tab)
2095
- return res_tab
2096
-
2097
- def run_rmanova(data, dv, factor, subject, correction='auto', detailed=True, effsize='ng2'):
2098
- # One-way repeated-measures ANOVA using a long-format dataset.
2099
- res_tab = pg.rm_anova(data=data, dv=dv, within=factor,
2100
- subject=subject, detailed=detailed, effsize=effsize)
2101
- return res_tab
2102
-
2103
- def run_welchanova(data, dv, factor):
2104
- # When the groups are balanced and have equal variances, the optimal
2105
- # post-hoc test is the Tukey-HSD test (pingouin.pairwise_tukey()). If the
2106
- # groups have unequal variances, the Games-Howell test is more adequate
2107
- # (pingouin.pairwise_gameshowell()). Results have been tested against R.
2108
- res_tab = pg.welch_anova(data=data, dv=dv, between=factor)
2109
- res_tab = anovatable(res_tab)
2110
- return res_tab
2111
-
2112
- def run_mixedanova(data, dv, between, within, subject, correction='auto', effsize='np2'):
2113
- # Notes
2114
- # Data are expected to be in long-format (even the repeated measures).
2115
- # If your data is in wide-format, you can use the pandas.melt() function
2116
- # to convert from wide to long format.
2117
-
2118
- # Warning
2119
- # If the between-subject groups are unbalanced(=unequal sample sizes), a
2120
- # type II ANOVA will be computed. Note however that SPSS, JAMOVI and JASP
2121
- # by default return a type III ANOVA, which may lead to slightly different
2122
- # results.
2123
- res_tab = pg.mixed_anova(data=data, dv=dv, within=within, subject=subject,
2124
- between=between, correction=correction, effsize=effsize)
2125
- res_tab = anovatable(res_tab)
2126
- return res_tab
2127
-
2128
- def run_friedman(data, dv, factor, subject, method='chisq'):
2129
- # Friedman test for repeated measurements
2130
- # The Friedman test is used for non-parametric (rank-based) one-way
2131
- # repeated measures ANOVA
2132
-
2133
- # check df form ('long' or 'wide')
2134
- # df_long = data.melt(ignore_index=False).reset_index()
2135
- # if data.describe().shape[1] >= df_long.describe().shape[1]:
2136
- # res_tab = pg.friedman(data, method=method)
2137
- # else:
2138
- # res_tab = pg.friedman(data=df_long, dv='value',
2139
- # within="variable", subject="index", method=method)
2140
- if "Wide" in df_wide_long(data):
2141
- df_long = data.melt(ignore_index=False).reset_index()
2142
- res_tab = pg.friedman(data=df_long, dv='value',
2143
- within="variable", subject="index", method=method)
2144
- else:
2145
- res_tab = pg.friedman(data, dv=dv, within=factor, subject=subject,method=method)
2146
- res_tab = anovatable(res_tab)
2147
- return res_tab
2148
-
2149
- def run_kruskal(data, dv, factor):
2150
- # Kruskal-Wallis H-test for independent samples
2151
- res_tab = pg.kruskal(data=data, dv=dv, between=factor)
2152
- res_tab = anovatable(res_tab)
2153
- return res_tab
2154
-
2155
- # Normality Check:
2156
- # Conduct normality tests (Shapiro-Wilk) for each group.
2157
- # If the data is approximately normally distributed, ANOVA is robust to
2158
- # moderate departures from normality, especially with larger sample sizes.
2159
-
2160
- # print(data[factor])
2161
- # print(type(data[factor]))
2162
- # print(len(data[factor].columns))
2163
- # print(data[factor].nunique())
2164
- # print(data[factor[0]])
2165
- # print(data[factor[0]].unique())
2166
- if group is None:
2167
- group = factor
2168
-
2169
- # print(f'\ngroup is :\n{data[group]},\ndv is :\n{dv}\n')
2170
- norm_array = []
2171
- for sub_group in data[group].unique():
2172
- norm_curr = check_normality(
2173
- data.loc[data[group] == sub_group, dv])
2174
- norm_array.append(norm_curr)
2175
- norm_all = True if all(norm_array) else False
2176
-
2177
- # Homogeneity of Variances:
2178
- # Check for homogeneity of variances (homoscedasticity) among groups.
2179
- # Levene's test or Bartlett's test can be used for this purpose.
2180
- # If variances are significantly different, consider transformations or use a
2181
- # robust ANOVA method.
2182
-
2183
- # # =============================================================================
2184
- # # # method1: stats.levene
2185
- # # =============================================================================
2186
- # # data_array = []
2187
- # # for sub_group in df["group"].unique():
2188
- # # data_array.append(df.loc[df['group'] == sub_group, 'values'].values)
2189
- # # print(data_array)
2190
- # # variance_all = stats.levene(data_array[0],data_array[1],data_array[2])
2191
-
2192
- # =============================================================================
2193
- # # method2: pingouin.homoscedasticity
2194
- # =============================================================================
2195
- res_levene = None
2196
- variance_all = pg.homoscedasticity(
2197
- data, dv=dv, group=group, method='levene', alpha=0.05)
2198
- res_levene = True if variance_all.iloc[0,1] > 0.05 else False
2199
- # =============================================================================
2200
- # # ANOVA Assumptions:
2201
- # # Ensure that the assumptions of independence, homogeneity of variances, and
2202
- # # normality are reasonably met before proceeding.
2203
- # =============================================================================
2204
- notes_norm = 'normally' if norm_all else 'NOT-normally'
2205
- notes_variance = 'equal' if res_levene else 'unequal'
2206
- print(f'Data is {notes_norm} distributed, shows {notes_variance} variance')
2207
-
2208
- cfg_pmc = corr_pmc(pmc)
2209
- cfg_pair = corr_pair(pair)
2210
- output = {}
2211
- if (cfg_pmc == 'parametric') or (cfg_pmc == 'auto'):
2212
- if 'np' in cfg_pair: # 'unpaired'
2213
- if cfg_pmc == 'auto':
2214
- if norm_all:
2215
- if res_levene:
2216
- res_tab = run_anova(data, dv, factor, ss_type=ss_type,
2217
- detailed=True, effsize='np2')
2218
- notes_stat = f'{data[factor].nunique()} Way ANOVA'
2219
- notes_APA = extract_apa(res_tab)
2220
-
2221
- else:
2222
- res_tab = run_welchanova(data, dv, factor)
2223
- notes_stat = f'{data[factor].nunique()} Way Welch ANOVA'
2224
- notes_APA = extract_apa(res_tab)
2225
-
2226
- else:
2227
-
2228
- res_tab = run_kruskal(data, dv, factor)
2229
- notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
2230
- notes_APA = extract_apa(res_tab)
2231
-
2232
- elif cfg_pmc == 'parametric':
2233
- res_tab = run_anova(data, dv, factor, ss_type=ss_type,
2234
- detailed=True, effsize='np2')
2235
- notes_stat = f'{data[factor].nunique()} Way ANOVA'
2236
- notes_APA = extract_apa(res_tab)
2237
-
2238
- elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
2239
- res_tab = run_rmanova(data, dv, factor, subject, correction='auto',
2240
- detailed=True, effsize='ng2')
2241
- notes_stat = f'{data[factor].nunique()} Way Repeated measures ANOVA'
2242
- notes_APA = extract_apa(res_tab)
2243
-
2244
- elif 'mix' in cfg_pair or 'both' in cfg_pair:
2245
- res_tab = run_mixedanova(data, dv, between, within, subject)
2246
- # notes_stat = f'{len(sum(len(between)+sum(len(within))))} Way Mixed ANOVA'
2247
- notes_stat = ""
2248
- # n_inter = res_tab.loc(res_tab["Source"] == "Interaction")
2249
- # print(n_inter)
2250
- notes_APA = extract_apa(res_tab)
2251
-
2252
- elif cfg_pmc == 'non-parametric':
2253
- if 'np' in cfg_pair: # 'unpaired'
2254
- res_tab = run_kruskal(data, dv, factor)
2255
- notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
2256
- notes_APA = f'H({res_tab.ddof1[0]},n={data.shape[0]})={round(res_tab.H[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
2257
-
2258
- elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
2259
- res_tab = run_friedman(data, dv, factor, subject, method='chisq')
2260
- notes_stat = f'Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA'
2261
- notes_APA = f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
2262
-
2263
- # =============================================================================
2264
- # # Post-hoc
2265
- # Post-Hoc Tests (if significant):
2266
- # If ANOVA indicates significant differences, perform post-hoc tests (e.g.,
2267
- # Tukey's HSD, Bonferroni, or Scheffé) to identify which groups differ from each other.
2268
- # # https://pingouin-stats.org/build/html/generated/pingouin.pairwise_tests.html
2269
- # =============================================================================
2270
- go_pmc = True if cfg_pmc == 'parametric' else False
2271
- go_subject = subject if ('pa' in cfg_pair) and (
2272
- 'np' not in cfg_pair) else None
2273
- go_mix_between = between if ('mix' in cfg_pair) or (
2274
- 'both' in cfg_pair) else None
2275
- go_mix_between = None if ('pa' in cfg_pair) or (
2276
- 'np' not in cfg_pair) else factor
2277
- go_mix_within = within if ('mix' in cfg_pair) or (
2278
- 'both' in cfg_pair) else None
2279
- go_mix_within = factor if ('pa' in cfg_pair) or (
2280
- 'np' not in cfg_pair) else None
2281
- if res_tab['p-unc'][0] <= .05:
2282
- # Pairwise Comparisons
2283
- method_post_hoc = [
2284
- "bonf", # 'bonferroni', # : one-step correction
2285
- "sidak", # one-step correction
2286
- "holm", # step-down method using Bonferroni adjustments
2287
- "fdr_bh", # Benjamini/Hochberg (non-negative)
2288
- "fdr_by", # Benjamini/Yekutieli (negative)
2289
- ]
2290
- res_posthoc = pd.DataFrame()
2291
- for met in method_post_hoc:
2292
- post_curr = pg.pairwise_tests(data=data, dv=dv, between=go_mix_between, within=go_mix_within, subject=go_subject, parametric=go_pmc, marginal=True, alpha=0.05, alternative='two-sided',
2293
- padjust=met)
2294
- res_posthoc = pd.concat([res_posthoc, post_curr],
2295
- ignore_index=True)
2296
- else:
2297
- res_posthoc = None
2298
- output['res_posthoc'] = res_posthoc
2299
- # =============================================================================
2300
- # # filling output
2301
- # =============================================================================
2302
-
2303
- pd.set_option('display.max_columns', None)
2304
- output['stat'] = notes_stat
2305
- # print(output['APA'])
2306
- output['APA'] = notes_APA
2307
- output['pval'] = res_tab['p-unc']
2308
- output['res_tab'] = res_tab
2309
- if res_tab.shape[0] == len(notes_APA):
2310
- output['res_tab']['APA'] = output['APA'] # note APA in the table
2311
- # print(output['stat'])
2312
- # print(output['res_tab'])
2313
- return output
2314
-
2315
-
2316
- # =============================================================================
2317
- # # One-way ANOVA
2318
- # =============================================================================
2319
- # url = "http://stats191.stanford.edu/data/rehab.csv"
2320
- # rehab_table = pd.read_table(url, delimiter=",")
2321
- # rehab_table.to_csv("rehab.table")
2322
- # fig, ax = plt.subplots(figsize=(8, 6))
2323
- # fig = rehab_table.boxplot("Time", "Fitness", ax=ax, grid=False)
2324
- # # fig, ax = plt.subplots(figsize=(8, 6))
2325
- # # set_pub()
2326
- # # sns.boxenplot(x="Time",y="Fitness",data = rehab_table)
2327
-
2328
- # out2 = FuncMultiCmpt(pmc='pmc', pair='unpair',
2329
- # data=rehab_table, dv='Time', factor='Fitness')
2330
- # # print(out2['res_tab'])
2331
- # # print(out2['APA'])
2332
- # out2['res_posthoc']
2333
- # out2['res_posthoc']['p-unc'][0]
2334
- # out2['res_posthoc']['p-adjust'][0]
2335
- # out2['res_posthoc']['p-corr'][0]
2336
-
2337
-
2338
- # =============================================================================
2339
- # # Interactions and ANOVA
2340
- # https://www.statsmodels.org/dev/examples/notebooks/generated/interactions_anova.html
2341
- # url = "http://stats191.stanford.edu/data/salary.table"
2342
- # fh = urlopen(url)
2343
- # df = pd.read_table(fh)
2344
- # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
2345
- # dv='S', factor=['X', 'E', 'M'], group='M')
2346
- # # # two-way anova
2347
- # # https://www.statology.org/two-way-anova-python/
2348
- # # =============================================================================
2349
- # # df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
2350
- # # 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
2351
- # # 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
2352
- # # 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
2353
- # # 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
2354
- # # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
2355
- # # dv='height', factor=['water','sun'],group='water')
2356
-
2357
-
2358
- # =============================================================================
2359
- # # two way anova
2360
- # https://www.geeksforgeeks.org/how-to-perform-a-two-way-anova-in-python/
2361
- # =============================================================================
2362
- # df1=pd.DataFrame({'Fertilizer': np.repeat(['daily', 'weekly'], 15),
2363
- # 'Watering': np.repeat(['daily', 'weekly'], 15),
2364
- # 'height': [14, 16, 15, 15, 16, 13, 12, 11,
2365
- # 14, 15, 16, 16, 17, 18, 14, 13,
2366
- # 14, 14, 14, 15, 16, 16, 17, 18,
2367
- # 14, 13, 14, 14, 14, 15]})
2368
-
2369
- # df1['subject'] = np.tile(range(0, 15), (1, 2)).T
2370
- # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df1,
2371
- # dv='height', factor=['Fertilizer','Watering'],group='Watering')
2372
- # # print(out1['stat'])
2373
- # # print(out1['res_tab'])
2374
-
2375
- # =============================================================================
2376
- # # welch anova
2377
- # https://www.geeksforgeeks.org/how-to-perform-welchs-anova-in-python/
2378
- # =============================================================================
2379
- # df = pd.DataFrame({'score': [64, 66, 68, 75, 78, 94, 98, 79, 71, 80,
2380
- # 91, 92, 93, 90, 97, 94, 82, 88, 95, 96,
2381
- # 79, 78, 88, 94, 92, 85, 83, 85, 82, 81],
2382
- # 'group': np.repeat(['strat1', 'strat2', 'strat3'],repeats=10)})
2383
- # out1 = FuncMultiCmpt(pmc='auto',pair='unpaired',data=df, dv='score', factor='group', group='group')
2384
- # =============================================================================
2385
- # # two way anova
2386
- # https://www.statology.org/two-way-anova-python/
2387
- # =============================================================================
2388
- # df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
2389
- # 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
2390
- # 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
2391
- # 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
2392
- # 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
2393
- # df['subject'] = np.tile(range(0, 15), (1, 2)).T
2394
- # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
2395
- # dv='height', factor=['water', 'sun'], subject='subject', group='water')
2396
- # # print(out1['stat'])
2397
- # # print(out1['res_tab'])
2398
-
2399
- # =============================================================================
2400
- # # 3-way ANOVA
2401
- # =============================================================================
2402
- # df = pd.DataFrame({'program': np.repeat([1, 2], 20),
2403
- # 'gender': np.tile(np.repeat(['M', 'F'], 10), 2),
2404
- # 'division': np.tile(np.repeat([1, 2], 5), 4),
2405
- # 'height': [7, 7, 8, 8, 7, 6, 6, 5, 6, 5,
2406
- # 5, 5, 4, 5, 4, 3, 3, 4, 3, 3,
2407
- # 6, 6, 5, 4, 5, 4, 5, 4, 4, 3,
2408
- # 2, 2, 1, 4, 4, 2, 1, 1, 2, 1]})
2409
- # df['subject'] = np.tile(range(0, 20), (1, 2)).T
2410
- # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
2411
- # dv='height', factor=['gender', 'program', 'division'], subject='subject', group='program')
2412
- # # print(out1['stat'])
2413
- # # print(out1['res_tab'])
2414
-
2415
- # =============================================================================
2416
- # # Repeated Measures ANOVA in Python
2417
- # =============================================================================
2418
- # df = pd.DataFrame({'patient': np.repeat([1, 2, 3, 4, 5], 4),
2419
- # 'drug': np.tile([1, 2, 3, 4], 5),
2420
- # 'response': [30, 28, 16, 34,
2421
- # 14, 18, 10, 22,
2422
- # 24, 20, 18, 30,
2423
- # 38, 34, 20, 44,
2424
- # 26, 28, 14, 30]})
2425
- # # df['subject'] = np.tile(range(0, 20), (1, 2)).T
2426
- # out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
2427
- # dv='response', factor=['drug'], subject='patient', group='drug')
2428
- # print(out1['stat'])
2429
- # print(out1['res_tab'])
2430
- # print(out1['APA'])
2431
-
2432
- # =============================================================================
2433
- # # repeated anova
2434
- # https://www.geeksforgeeks.org/how-to-perform-a-repeated-measures-anova-in-python/
2435
- # =============================================================================
2436
- # df = pd.DataFrame({'Cars': np.repeat([1, 2, 3, 4, 5], 4),
2437
- # 'Engine Oil': np.tile([1, 2, 3, 4], 5),
2438
- # 'Mileage': [36, 38, 30, 29,
2439
- # 34, 38, 30, 29,
2440
- # 34, 28, 38, 32,
2441
- # 38, 34, 20, 44,
2442
- # 26, 28, 34, 50]})
2443
- # out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
2444
- # dv='Mileage', factor=['Engine Oil'], subject='Cars', group='Cars')
2445
- # =============================================================================
2446
- # #two-way repeated anova
2447
- # =============================================================================
2448
- # df = pd.read_csv(
2449
- # "https://reneshbedre.github.io/assets/posts/anova/plants_leaves_two_within.csv")
2450
- # df
2451
- # # df['subject'] = np.tile(range(0, 20), (1, 2)).T
2452
- # out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
2453
- # dv='num_leaves', factor=['year', 'time'], subject='plants', group='year')
2454
- # print(out1['stat'])
2455
- # print(out1['res_tab'])
2456
- # print(out1['APA'])
2457
-
2458
- # =============================================================================
2459
- # # repeated anova
2460
- # =============================================================================
2461
- # df = pd.read_csv('/Users/macjianfeng/Desktop/test.csv')
2462
- # df.head()
2463
- # df.loc[df['animal'].str.contains('Sleep'), 'experiment'] = 'sleep'
2464
- # df.loc[df['animal'].str.contains('Wake'), 'experiment'] = 'wake'
2465
- # df.loc[df['variable'].str.contains('hypo'), 'region'] = 'hypo'
2466
- # df.loc[df['variable'].str.contains('cort'), 'region'] = 'cort'
2467
- # df
2468
- # for i in range(4):
2469
- # match i:
2470
- # case 0:
2471
- # prot_name = 'A1'
2472
- # case 1:
2473
- # prot_name = 'A2'
2474
- # case 2:
2475
- # prot_name = '845'
2476
- # case 3:
2477
- # prot_name = '831'
2478
- # df_tmp = df[df["variable"].str.contains(prot_name)]
2479
- # df_tmp['protein'] = prot_name
2480
- # df_tmp = df_tmp.reset_index()
2481
- # print(df_tmp)
2482
-
2483
- # out1 = FuncMultiCmpt(pmc='pmc', pair='mix', data=df_tmp,
2484
- # dv='value', between='experiment', within='region', subject='animal', group='experiment')
2485
- # print(out1['stat'])
2486
- # print(out1['res_tab'])
2487
- # # =============================================================================
2488
- # One-way ANOVA
2489
- # df1 = pd.read_csv('/Users/macjianfeng/Desktop/Book2.csv')
2490
- # df2 = df1.melt()
2491
- # out1 = FuncMultiCmpt(pmc='npmc', pair='unpaired', data=df2,
2492
- # dv='libido', factor=['brand x', 'brand y', 'brand z'], subject='participant')
2493
- # print(out1['stat'])
2494
- # print(out1['res_tab'])
2495
- # =============================================================================
2496
-
2497
-
2498
- # =============================================================================
2499
- # # #One-way ANOVA new example: https://www.pythonfordatascience.org/anova-python/
2500
- # =============================================================================
2501
- # df1 = pd.read_csv(
2502
- # "https://raw.githubusercontent.com/researchpy/Data-sets/master/difficile.csv")
2503
- # df1.drop('person', axis=1, inplace=True)
2504
- # # Recoding value from numeric to string
2505
- # df1['dose'].replace({1: 'placebo', 2: 'low', 3: 'high'}, inplace=True)
2506
- # df1.head(10)
2507
-
2508
- # out3= FuncMultiCmpt(pmc='pmc', data=df1, dv='libido', factor='dose')
2509
- # # print(out3['res_tab'])
2510
- # # # print(out3['res_posthoc'])
2511
- # # print(out3['APA'])
2512
-
2513
- # =============================================================================
2514
- # https://lifewithdata.com/2023/06/08/how-to-perform-a-two-way-anova-in-python/
2515
- # =============================================================================
2516
- # data = {
2517
- # 'Diet': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C'],
2518
- # 'Workout': ['Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High'],
2519
- # 'WeightLoss': [3, 4, 5, 3.2, 5, 6, 5.2, 6, 5.5, 4, 5.5, 6.2]
2520
- # }
2521
- # df = pd.DataFrame(data)
2522
- # out4= FuncMultiCmpt(pmc='pmc', pair='unpaired',data=df, dv='WeightLoss', factor=['Diet','Workout'],group='Diet')
2523
-
2524
- # =============================================================================
2525
- # # convert to list to string
2526
- # =============================================================================
2527
1569
  def list2str(x_str):
2528
1570
  s = ''.join(str(x) for x in x_str)
2529
1571
  return s