py2ls 0.1.4.9__py3-none-any.whl → 0.1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
from scipy.ndimage import convolve1d
|
2
|
-
from scipy.signal import savgol_filter
|
3
|
-
import pingouin as pg
|
4
|
-
from scipy import stats
|
5
|
-
|
6
1
|
import numpy as np
|
7
2
|
import pandas as pd
|
8
3
|
|
@@ -11,7 +6,7 @@ import matplotlib
|
|
11
6
|
import matplotlib.pyplot as plt
|
12
7
|
import matplotlib.ticker as tck
|
13
8
|
from mpl_toolkits.mplot3d import Axes3D
|
14
|
-
import seaborn as sns
|
9
|
+
# import seaborn as sns
|
15
10
|
|
16
11
|
import sys, os,shutil,re, yaml,json
|
17
12
|
from cycler import cycler
|
@@ -1483,120 +1478,6 @@ def figsave(*args,dpi=300):
|
|
1483
1478
|
print(f'\nSaved @: dpi={dpi}\n{fname}')
|
1484
1479
|
|
1485
1480
|
|
1486
|
-
# ==============FuncStars(ax,x1=1,x2=2, yscale=0.9, pval=0.01)====================================================
|
1487
|
-
# Usage:
|
1488
|
-
# FuncStars(ax, x1=2, x2=3, yscale=0.99, pval=0.02)
|
1489
|
-
# =============================================================================
|
1490
|
-
|
1491
|
-
# FuncStars --v 0.1.1
|
1492
|
-
def FuncStars(ax,
|
1493
|
-
pval=None,
|
1494
|
-
Ylim=None,
|
1495
|
-
Xlim=None,
|
1496
|
-
symbol='*',
|
1497
|
-
yscale=0.95,
|
1498
|
-
x1=0,
|
1499
|
-
x2=1,
|
1500
|
-
alpha=0.05,
|
1501
|
-
fontsize=14,
|
1502
|
-
fontsize_note=6,
|
1503
|
-
rotation=0,
|
1504
|
-
fontname='Arial',
|
1505
|
-
values_below=None,
|
1506
|
-
linego=True,
|
1507
|
-
linestyle='-',
|
1508
|
-
linecolor='k',
|
1509
|
-
linewidth=.8,
|
1510
|
-
nsshow='off',
|
1511
|
-
symbolcolor='k',
|
1512
|
-
tailindicator=[0.06, 0.06],
|
1513
|
-
report=None,
|
1514
|
-
report_scale=-0.1,
|
1515
|
-
report_loc=None):
|
1516
|
-
if ax is None:
|
1517
|
-
ax = plt.gca()
|
1518
|
-
if Ylim is None:
|
1519
|
-
Ylim = plt.gca().get_ylim()
|
1520
|
-
if Xlim is None:
|
1521
|
-
Xlim = ax.get_xlim()
|
1522
|
-
if report_loc is None and report is not None:
|
1523
|
-
report_loc = np.min(Ylim) + report_scale*np.abs(np.diff(Ylim))
|
1524
|
-
if report_scale > 0:
|
1525
|
-
report_scale = -np.abs(report_scale)
|
1526
|
-
yscale = np.float64(yscale)
|
1527
|
-
y_loc = np.min(Ylim) + yscale*(np.max(Ylim)-np.min(Ylim))
|
1528
|
-
xcenter = np.mean([x1, x2])
|
1529
|
-
# ns / *
|
1530
|
-
if alpha < pval:
|
1531
|
-
if nsshow == 'on':
|
1532
|
-
ns_str = f'p={round(pval, 3)}' if pval < 0.9 else 'ns'
|
1533
|
-
color = 'm' if pval < 0.1 else 'k'
|
1534
|
-
plt.text(xcenter, y_loc, ns_str,
|
1535
|
-
ha='center', va='bottom', # 'center_baseline',
|
1536
|
-
fontsize=fontsize-6 if fontsize > 6 else fontsize,
|
1537
|
-
fontname=fontname, color=color, rotation=rotation
|
1538
|
-
# bbox=dict(facecolor=None, edgecolor=None, color=None, linewidth=None)
|
1539
|
-
)
|
1540
|
-
elif 0.01 < pval <= alpha:
|
1541
|
-
plt.text(xcenter, y_loc, symbol,
|
1542
|
-
ha='center', va='center_baseline',
|
1543
|
-
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1544
|
-
elif 0.001 < pval <= 0.01:
|
1545
|
-
plt.text(xcenter, y_loc, symbol * 2,
|
1546
|
-
ha='center', va='center_baseline',
|
1547
|
-
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1548
|
-
elif 0 < pval <= 0.001:
|
1549
|
-
plt.text(xcenter, y_loc, symbol * 3,
|
1550
|
-
ha='center', va='center_baseline',
|
1551
|
-
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1552
|
-
# lines indicators
|
1553
|
-
if linego: # and 0 < pval <= 0.05:
|
1554
|
-
print(pval)
|
1555
|
-
print(linego)
|
1556
|
-
# horizontal line
|
1557
|
-
if yscale < 0.99:
|
1558
|
-
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1559
|
-
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1560
|
-
[y_loc - np.abs(np.diff(Ylim)) * .03,
|
1561
|
-
y_loc - np.abs(np.diff(Ylim)) * .03],
|
1562
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1563
|
-
# vertical line
|
1564
|
-
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1565
|
-
x1 + np.abs(np.diff(Xlim)) * 0.01],
|
1566
|
-
[y_loc - np.abs(np.diff(Ylim)) * tailindicator[0],
|
1567
|
-
y_loc - np.abs(np.diff(Ylim)) * .03],
|
1568
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1569
|
-
plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
|
1570
|
-
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1571
|
-
[y_loc - np.abs(np.diff(Ylim)) * tailindicator[1],
|
1572
|
-
y_loc - np.abs(np.diff(Ylim)) * .03],
|
1573
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1574
|
-
else:
|
1575
|
-
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1576
|
-
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1577
|
-
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002,
|
1578
|
-
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1579
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1580
|
-
# vertical line
|
1581
|
-
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1582
|
-
x1 + np.abs(np.diff(Xlim)) * 0.01],
|
1583
|
-
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[0],
|
1584
|
-
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1585
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1586
|
-
plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
|
1587
|
-
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1588
|
-
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[1],
|
1589
|
-
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1590
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1591
|
-
if values_below is not None:
|
1592
|
-
plt.text(xcenter, y_loc * (-0.1), values_below,
|
1593
|
-
ha='center', va='bottom', # 'center_baseline', rotation=rotation,
|
1594
|
-
fontsize=fontsize_note, fontname=fontname, color='k')
|
1595
|
-
# report / comments
|
1596
|
-
if report is not None:
|
1597
|
-
plt.text(xcenter, report_loc, report,
|
1598
|
-
ha='left', va='bottom', # 'center_baseline', rotation=rotation,
|
1599
|
-
fontsize=fontsize_note, fontname=fontname, color='.7')
|
1600
1481
|
def is_str_color(s):
|
1601
1482
|
# Regular expression pattern for hexadecimal color codes
|
1602
1483
|
color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
|
@@ -1643,163 +1524,6 @@ def is_zip(fpath):
|
|
1643
1524
|
return True
|
1644
1525
|
else:
|
1645
1526
|
return False
|
1646
|
-
|
1647
|
-
def stdshade(ax=None,*args, **kwargs):
|
1648
|
-
if (
|
1649
|
-
isinstance(ax, np.ndarray)
|
1650
|
-
and ax.ndim == 2
|
1651
|
-
and min(ax.shape) > 1
|
1652
|
-
and max(ax.shape) > 1
|
1653
|
-
):
|
1654
|
-
y = ax
|
1655
|
-
ax = plt.gca()
|
1656
|
-
if ax is None:
|
1657
|
-
ax = plt.gca()
|
1658
|
-
alpha = 0.5
|
1659
|
-
acolor = "k"
|
1660
|
-
paraStdSem = "sem"
|
1661
|
-
plotStyle = "-"
|
1662
|
-
plotMarker = "none"
|
1663
|
-
smth = 1
|
1664
|
-
l_c_one = ["r", "g", "b", "m", "c", "y", "k", "w"]
|
1665
|
-
l_style2 = ["--", "-."]
|
1666
|
-
l_style1 = ["-", ":"]
|
1667
|
-
l_mark = ["o", "+", "*", ".", "x", "_", "|", "s", "d", "^", "v", ">", "<", "p", "h"]
|
1668
|
-
# Check each argument
|
1669
|
-
for iarg in range(len(args)):
|
1670
|
-
if (
|
1671
|
-
isinstance(args[iarg], np.ndarray)
|
1672
|
-
and args[iarg].ndim == 2
|
1673
|
-
and min(args[iarg].shape) > 1
|
1674
|
-
and max(args[iarg].shape) > 1
|
1675
|
-
):
|
1676
|
-
y = args[iarg]
|
1677
|
-
# Except y, continuous data is 'F'
|
1678
|
-
if (isinstance(args[iarg], np.ndarray) and args[iarg].ndim == 1) or isinstance(
|
1679
|
-
args[iarg], range
|
1680
|
-
):
|
1681
|
-
x = args[iarg]
|
1682
|
-
if isinstance(x, range):
|
1683
|
-
x = np.arange(start=x.start, stop=x.stop, step=x.step)
|
1684
|
-
# Only one number( 0~1), 'alpha' / color
|
1685
|
-
if isinstance(args[iarg], (int, float)):
|
1686
|
-
if np.size(args[iarg]) == 1 and 0 <= args[iarg] <= 1:
|
1687
|
-
alpha = args[iarg]
|
1688
|
-
if isinstance(args[iarg], (list, tuple)) and np.size(args[iarg]) == 3:
|
1689
|
-
acolor = args[iarg]
|
1690
|
-
acolor = tuple(acolor) if isinstance(acolor, list) else acolor
|
1691
|
-
# Color / plotStyle /
|
1692
|
-
if (
|
1693
|
-
isinstance(args[iarg], str)
|
1694
|
-
and len(args[iarg]) == 1
|
1695
|
-
and args[iarg] in l_c_one
|
1696
|
-
):
|
1697
|
-
acolor = args[iarg]
|
1698
|
-
else:
|
1699
|
-
if isinstance(args[iarg], str):
|
1700
|
-
if args[iarg] in ["sem", "std"]:
|
1701
|
-
paraStdSem = args[iarg]
|
1702
|
-
if args[iarg].startswith("#"):
|
1703
|
-
acolor=hue2rgb(args[iarg])
|
1704
|
-
if str2list(args[iarg])[0] in l_c_one:
|
1705
|
-
if len(args[iarg]) == 3:
|
1706
|
-
k = [i for i in str2list(args[iarg]) if i in l_c_one]
|
1707
|
-
if k != []:
|
1708
|
-
acolor = k[0]
|
1709
|
-
st = [i for i in l_style2 if i in args[iarg]]
|
1710
|
-
if st != []:
|
1711
|
-
plotStyle = st[0]
|
1712
|
-
elif len(args[iarg]) == 2:
|
1713
|
-
k = [i for i in str2list(args[iarg]) if i in l_c_one]
|
1714
|
-
if k != []:
|
1715
|
-
acolor = k[0]
|
1716
|
-
mk = [i for i in str2list(args[iarg]) if i in l_mark]
|
1717
|
-
if mk != []:
|
1718
|
-
plotMarker = mk[0]
|
1719
|
-
st = [i for i in l_style1 if i in args[iarg]]
|
1720
|
-
if st != []:
|
1721
|
-
plotStyle = st[0]
|
1722
|
-
if len(args[iarg]) == 1:
|
1723
|
-
k = [i for i in str2list(args[iarg]) if i in l_c_one]
|
1724
|
-
if k != []:
|
1725
|
-
acolor = k[0]
|
1726
|
-
mk = [i for i in str2list(args[iarg]) if i in l_mark]
|
1727
|
-
if mk != []:
|
1728
|
-
plotMarker = mk[0]
|
1729
|
-
st = [i for i in l_style1 if i in args[iarg]]
|
1730
|
-
if st != []:
|
1731
|
-
plotStyle = st[0]
|
1732
|
-
if len(args[iarg]) == 2:
|
1733
|
-
st = [i for i in l_style2 if i in args[iarg]]
|
1734
|
-
if st != []:
|
1735
|
-
plotStyle = st[0]
|
1736
|
-
# smth
|
1737
|
-
if (
|
1738
|
-
isinstance(args[iarg], (int, float))
|
1739
|
-
and np.size(args[iarg]) == 1
|
1740
|
-
and args[iarg] >= 1
|
1741
|
-
):
|
1742
|
-
smth = args[iarg]
|
1743
|
-
|
1744
|
-
if "x" not in locals() or x is None:
|
1745
|
-
x = np.arange(1, y.shape[1] + 1)
|
1746
|
-
elif len(x) < y.shape[1]:
|
1747
|
-
y = y[:, x]
|
1748
|
-
nRow = y.shape[0]
|
1749
|
-
nCol = y.shape[1]
|
1750
|
-
print(f"y was corrected, please confirm that {nRow} row, {nCol} col")
|
1751
|
-
else:
|
1752
|
-
x = np.arange(1, y.shape[1] + 1)
|
1753
|
-
|
1754
|
-
if x.shape[0] != 1:
|
1755
|
-
x = x.T
|
1756
|
-
yMean = np.nanmean(y, axis=0)
|
1757
|
-
if smth > 1:
|
1758
|
-
yMean = savgol_filter(np.nanmean(y, axis=0), smth, 1)
|
1759
|
-
else:
|
1760
|
-
yMean = np.nanmean(y, axis=0)
|
1761
|
-
if paraStdSem == "sem":
|
1762
|
-
if smth > 1:
|
1763
|
-
wings = savgol_filter(np.nanstd(y, axis=0) / np.sqrt(y.shape[0]), smth, 1)
|
1764
|
-
else:
|
1765
|
-
wings = np.nanstd(y, axis=0) / np.sqrt(y.shape[0])
|
1766
|
-
elif paraStdSem == "std":
|
1767
|
-
if smth > 1:
|
1768
|
-
wings = savgol_filter(np.nanstd(y, axis=0), smth, 1)
|
1769
|
-
else:
|
1770
|
-
wings = np.nanstd(y, axis=0)
|
1771
|
-
|
1772
|
-
fill_kws = kwargs.get('fill_kws', {})
|
1773
|
-
line_kws = kwargs.get('line_kws', {})
|
1774
|
-
fill = ax.fill_between(x, yMean + wings, yMean - wings, color=acolor, alpha=alpha, lw=0,**fill_kws)
|
1775
|
-
if line_kws != {} and not any(key.lower() in ['lw', 'linewidth'] for key in line_kws.keys()):
|
1776
|
-
line = ax.plot(x, yMean, color=acolor, lw=1.5, ls=plotStyle, marker=plotMarker, **line_kws)
|
1777
|
-
else:
|
1778
|
-
line = ax.plot(x, yMean, color=acolor, ls=plotStyle, marker=plotMarker, **line_kws)
|
1779
|
-
return line[0], fill
|
1780
|
-
# =============================================================================
|
1781
|
-
# # for plot figures {Qiu et al.2023}
|
1782
|
-
# =============================================================================
|
1783
|
-
# =============================================================================
|
1784
|
-
# plt.rcParams.update({'figure.max_open_warning': 0})
|
1785
|
-
# # Output matplotlib figure to SVG with text as text, not curves
|
1786
|
-
# plt.rcParams['svg.fonttype'] = 'none'
|
1787
|
-
# plt.rcParams['pdf.fonttype'] = 42
|
1788
|
-
#
|
1789
|
-
# plt.rc('text', usetex=False)
|
1790
|
-
# # plt.style.use('ggplot')
|
1791
|
-
# plt.style.use('science')
|
1792
|
-
# plt.rc('font', family='serif')
|
1793
|
-
# plt.rcParams.update({
|
1794
|
-
# "font.family": "serif", # specify font family here
|
1795
|
-
# "font.serif": ["Arial"], # specify font here
|
1796
|
-
# "font.size": 11})
|
1797
|
-
# # plt.tight_layout()
|
1798
|
-
# =============================================================================
|
1799
|
-
# =============================================================================
|
1800
|
-
# # axis spine
|
1801
|
-
# # use it like: adjust_spines(ax, ['left', 'bottom'])
|
1802
|
-
# =============================================================================
|
1803
1527
|
|
1804
1528
|
|
1805
1529
|
def adjust_spines(ax=None, spines=['left', 'bottom'],distance=2):
|
@@ -1842,688 +1566,6 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
|
|
1842
1566
|
# =============================================================================
|
1843
1567
|
|
1844
1568
|
|
1845
|
-
def FuncCmpt(X1, X2, pmc='auto', pair='unpaired'):
|
1846
|
-
# output = {}
|
1847
|
-
|
1848
|
-
# pmc correction: 'parametric'/'non-parametric'/'auto'
|
1849
|
-
# meawhile get the opposite setting (to compare the results)
|
1850
|
-
def corr_pmc(pmc):
|
1851
|
-
cfg_pmc = None
|
1852
|
-
if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'npmc', 'nonparametric', 'non-parametric'}:
|
1853
|
-
cfg_pmc = 'parametric'
|
1854
|
-
elif pmc.lower() in {'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
|
1855
|
-
cfg_pmc = 'non-parametric'
|
1856
|
-
else:
|
1857
|
-
cfg_pmc = 'auto'
|
1858
|
-
return cfg_pmc
|
1859
|
-
|
1860
|
-
def corr_pair(pair):
|
1861
|
-
cfg_pair = None
|
1862
|
-
if 'pa' in pair.lower() and 'np' not in pair.lower():
|
1863
|
-
cfg_pair = 'paired'
|
1864
|
-
elif 'np' in pair.lower():
|
1865
|
-
cfg_pair = 'unpaired'
|
1866
|
-
return cfg_pair
|
1867
|
-
|
1868
|
-
def check_normality(data):
|
1869
|
-
stat_shapiro, pval_shapiro = stats.shapiro(data)
|
1870
|
-
if pval_shapiro > 0.05:
|
1871
|
-
Normality = True
|
1872
|
-
else:
|
1873
|
-
Normality = False
|
1874
|
-
print(f'\n normally distributed\n') if Normality else print(
|
1875
|
-
f'\n NOT normally distributed\n')
|
1876
|
-
return Normality
|
1877
|
-
|
1878
|
-
def sub_cmpt_2group(X1, X2, cfg_pmc='pmc', pair='unpaired'):
|
1879
|
-
output = {}
|
1880
|
-
nX1 = np.sum(~np.isnan(X1))
|
1881
|
-
nX2 = np.sum(~np.isnan(X2))
|
1882
|
-
if cfg_pmc == 'parametric' or cfg_pmc == 'auto':
|
1883
|
-
# VarType correction by checking variance Type via "levene"
|
1884
|
-
stat_lev, pval_lev = stats.levene(
|
1885
|
-
X1, X2, center='median', proportiontocut=0.05)
|
1886
|
-
VarType = True if pval_lev > 0.05 and nX1 == nX2 else False
|
1887
|
-
|
1888
|
-
if 'np' in pair: # 'unpaired'
|
1889
|
-
if VarType and Normality:
|
1890
|
-
# The independent t-test requires that the dependent variable is approximately normally
|
1891
|
-
# distributed within each group
|
1892
|
-
# Note: Technically, it is the residuals that need to be normally distributed, but for
|
1893
|
-
# an independent t-test, both will give you the same result.
|
1894
|
-
stat_value, pval= stats.ttest_ind(
|
1895
|
-
X1, X2, axis=0, equal_var=True, nan_policy='omit', alternative='two-sided')
|
1896
|
-
notes_stat = 'unpaired t test'
|
1897
|
-
notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1898
|
-
else:
|
1899
|
-
# If the Levene's Test for Equality of Variances is statistically significant,
|
1900
|
-
# which indicates that the group variances are unequal in the population, you
|
1901
|
-
# can correct for this violation by not using the pooled estimate for the error
|
1902
|
-
# term for the t-statistic, but instead using an adjustment to the degrees of
|
1903
|
-
# freedom using the Welch-Satterthwaite method
|
1904
|
-
stat_value, pval= stats.ttest_ind(
|
1905
|
-
X1, X2, axis=0, equal_var=False, nan_policy='omit', alternative='two-sided')
|
1906
|
-
notes_stat = 'Welchs t-test'
|
1907
|
-
# note: APA FORMAT
|
1908
|
-
notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1909
|
-
elif 'pa' in pair and 'np' not in pair: # 'paired'
|
1910
|
-
# the paired-samples t-test is considered “robust” in handling violations of normality
|
1911
|
-
# to some extent. It can still yield valid results even if the data is not normally
|
1912
|
-
# distributed. Therefore, this test typically requires only approximately normal data
|
1913
|
-
stat_value, pval= stats.ttest_rel(
|
1914
|
-
X1, X2, axis=0, nan_policy='omit', alternative='two-sided')
|
1915
|
-
notes_stat = 'paired t test'
|
1916
|
-
# note: APA FORMAT
|
1917
|
-
notes_APA = f't({sum([nX1-1])})={round(stat_value, 5)},p={round(pval, 5)}'
|
1918
|
-
elif cfg_pmc == 'non-parametric':
|
1919
|
-
if 'np' in pair: # Perform Mann-Whitney
|
1920
|
-
stat_value, pval = stats.mannwhitneyu(
|
1921
|
-
X1, X2, method='exact', nan_policy='omit')
|
1922
|
-
notes_stat = 'Mann-Whitney U'
|
1923
|
-
if nX1 == nX2:
|
1924
|
-
notes_APA = f'U(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
|
1925
|
-
else:
|
1926
|
-
notes_APA = f'U(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1927
|
-
elif 'pa' in pair and 'np' not in pair: # Wilcoxon signed-rank test
|
1928
|
-
stat_value, pval = stats.wilcoxon(
|
1929
|
-
X1, X2, method='exact', nan_policy='omit')
|
1930
|
-
notes_stat = 'Wilcoxon signed-rank'
|
1931
|
-
if nX1 == nX2:
|
1932
|
-
notes_APA = f'Z(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
|
1933
|
-
else:
|
1934
|
-
notes_APA = f'Z(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1935
|
-
|
1936
|
-
# filling output
|
1937
|
-
output['stat'] = stat_value
|
1938
|
-
output['pval'] = pval
|
1939
|
-
output['method'] = notes_stat
|
1940
|
-
output['APA'] = notes_APA
|
1941
|
-
|
1942
|
-
print(f"{output['method']}\n {notes_APA}\n\n")
|
1943
|
-
|
1944
|
-
return output, pval
|
1945
|
-
|
1946
|
-
Normality1 = check_normality(X1)
|
1947
|
-
Normality2 = check_normality(X2)
|
1948
|
-
Normality = True if all([Normality1, Normality2]) else False
|
1949
|
-
|
1950
|
-
nX1 = np.sum(~np.isnan(X1))
|
1951
|
-
nX2 = np.sum(~np.isnan(X2))
|
1952
|
-
|
1953
|
-
cfg_pmc = corr_pmc(pmc)
|
1954
|
-
cfg_pair = corr_pair(pair)
|
1955
|
-
|
1956
|
-
output, p = sub_cmpt_2group(
|
1957
|
-
X1, X2, cfg_pmc=cfg_pmc, pair=cfg_pair)
|
1958
|
-
return p, output
|
1959
|
-
|
1960
|
-
# ======compare 2 group test===================================================
|
1961
|
-
# # Example
|
1962
|
-
# X1 = [19, 22, 16, 29, 24]
|
1963
|
-
# X2 = [20, 11, 17, 12, 22]
|
1964
|
-
|
1965
|
-
# p, res= FuncCmpt(X1, X2, pmc='pmc', pair='unparrr')
|
1966
|
-
|
1967
|
-
# =============================================================================
|
1968
|
-
|
1969
|
-
# =============================================================================
|
1970
|
-
# # method = ['anova', # 'One-way and N-way ANOVA',
|
1971
|
-
# # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
|
1972
|
-
# # 'mixed_anova', # 'Two way mixed ANOVA',
|
1973
|
-
# # 'welch_anova', # 'One-way Welch ANOVA',
|
1974
|
-
# # 'kruskal', # 'Non-parametric one-way ANOVA'
|
1975
|
-
# # 'friedman', # Non-parametric one-way repeated measures ANOVA
|
1976
|
-
# # ]
|
1977
|
-
# =============================================================================
|
1978
|
-
|
1979
|
-
|
1980
|
-
# =============================================================================
|
1981
|
-
# # method = ['anova', # 'One-way and N-way ANOVA',
|
1982
|
-
# # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
|
1983
|
-
# # 'mixed_anova', # 'Two way mixed ANOVA',
|
1984
|
-
# # 'welch_anova', # 'One-way Welch ANOVA',
|
1985
|
-
# # 'kruskal', # 'Non-parametric one-way ANOVA'
|
1986
|
-
# # 'friedman', # Non-parametric one-way repeated measures ANOVA
|
1987
|
-
# # ]
|
1988
|
-
# =============================================================================
|
1989
|
-
def df_wide_long(df):
|
1990
|
-
rows, columns = df.shape
|
1991
|
-
if columns > rows:
|
1992
|
-
return "Wide"
|
1993
|
-
elif rows > columns:
|
1994
|
-
return "Long"
|
1995
|
-
|
1996
|
-
def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
|
1997
|
-
ss_type=2, detailed=True, effsize='np2',
|
1998
|
-
correction='auto', between=None, within=None,
|
1999
|
-
subject=None, group=None
|
2000
|
-
):
|
2001
|
-
|
2002
|
-
def corr_pair(pair):
|
2003
|
-
cfg_pair = None
|
2004
|
-
if 'pa' in pair.lower() and 'np' not in pair.lower():
|
2005
|
-
cfg_pair = 'paired'
|
2006
|
-
elif 'np' in pair.lower():
|
2007
|
-
cfg_pair = 'unpaired'
|
2008
|
-
elif 'mix' in pair.lower():
|
2009
|
-
cfg_pair = 'mix'
|
2010
|
-
return cfg_pair
|
2011
|
-
|
2012
|
-
def check_normality(data):
|
2013
|
-
stat_shapiro, pval_shapiro = stats.shapiro(data)
|
2014
|
-
if pval_shapiro > 0.05:
|
2015
|
-
Normality = True
|
2016
|
-
else:
|
2017
|
-
Normality = False
|
2018
|
-
print(f'\n normally distributed\n') if Normality else print(
|
2019
|
-
f'\n NOT normally distributed\n')
|
2020
|
-
return Normality
|
2021
|
-
|
2022
|
-
def corr_pmc(pmc):
|
2023
|
-
cfg_pmc = None
|
2024
|
-
if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'upmc', 'npmc', 'nonparametric', 'non-parametric'}:
|
2025
|
-
cfg_pmc = 'parametric'
|
2026
|
-
elif pmc.lower() in {'upmc', 'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
|
2027
|
-
cfg_pmc = 'non-parametric'
|
2028
|
-
else:
|
2029
|
-
cfg_pmc = 'auto'
|
2030
|
-
return cfg_pmc
|
2031
|
-
|
2032
|
-
def extract_apa(res_tab):
|
2033
|
-
notes_APA = []
|
2034
|
-
if "ddof1" in res_tab:
|
2035
|
-
for irow in range(res_tab.shape[0]):
|
2036
|
-
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
|
2037
|
-
notes_APA.append([note_tmp])
|
2038
|
-
elif "DF" in res_tab:
|
2039
|
-
print(res_tab.shape[0])
|
2040
|
-
for irow in range(res_tab.shape[0]-1):
|
2041
|
-
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
|
2042
|
-
notes_APA.append([note_tmp])
|
2043
|
-
notes_APA.append(['NaN'])
|
2044
|
-
elif "DF1" in res_tab: # in 'mix' case
|
2045
|
-
for irow in range(res_tab.shape[0]):
|
2046
|
-
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF1[irow]),round(res_tab.DF2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
|
2047
|
-
notes_APA.append([note_tmp])
|
2048
|
-
return notes_APA
|
2049
|
-
|
2050
|
-
def anovatable(res_tab):
|
2051
|
-
if 'df' in res_tab: # statsmodels
|
2052
|
-
res_tab['mean_sq'] = res_tab[:]['sum_sq']/res_tab[:]['df']
|
2053
|
-
res_tab['est_sq'] = res_tab[:-1]['sum_sq'] / \
|
2054
|
-
sum(res_tab['sum_sq'])
|
2055
|
-
res_tab['omega_sq'] = (res_tab[:-1]['sum_sq']-(res_tab[:-1]['df'] *
|
2056
|
-
res_tab['mean_sq'][-1]))/(sum(res_tab['sum_sq'])+res_tab['mean_sq'][-1])
|
2057
|
-
elif 'DF' in res_tab:
|
2058
|
-
res_tab['MS'] = res_tab[:]['SS']/res_tab[:]['DF']
|
2059
|
-
res_tab['est_sq'] = res_tab[:-1]['SS']/sum(res_tab['SS'])
|
2060
|
-
res_tab['omega_sq'] = (res_tab[:-1]['SS']-(res_tab[:-1]['DF'] *
|
2061
|
-
res_tab['MS'][1]))/(sum(res_tab['SS'])+res_tab['MS'][1])
|
2062
|
-
if 'p-unc' in res_tab:
|
2063
|
-
if 'np2' in res_tab:
|
2064
|
-
res_tab['est_sq'] = res_tab['np2']
|
2065
|
-
if 'p-unc' in res_tab:
|
2066
|
-
res_tab['PR(>F)'] = res_tab['p-unc']
|
2067
|
-
return res_tab
|
2068
|
-
|
2069
|
-
def run_anova(data, dv, factor, ss_type=2, detailed=True, effsize='np2'):
|
2070
|
-
# perform ANOVA
|
2071
|
-
# =============================================================================
|
2072
|
-
# # # ANOVA (input: formula, dataset)
|
2073
|
-
# =============================================================================
|
2074
|
-
# # note: if the data is balanced (equal sample size for each group), Type 1, 2, and 3 sums of squares
|
2075
|
-
# # (typ parameter) will produce similar results.
|
2076
|
-
# lm = ols("values ~ C(group)", data=df).fit()
|
2077
|
-
# res_tab = anova_lm(lm, typ=ss_type)
|
2078
|
-
|
2079
|
-
# # however, it does not provide any effect size measures to tell if the
|
2080
|
-
# # statistical significance is meaningful. The function below calculates
|
2081
|
-
# # eta-squared () and omega-squared (). A quick note, is the exact same
|
2082
|
-
# # thing as except when coming from the ANOVA framework people call it ;
|
2083
|
-
# # is considered a better measure of effect size since it is unbiased in
|
2084
|
-
# # it's calculation by accounting for the degrees of freedom in the model.
|
2085
|
-
# # note: No effect sizes are calculated when using statsmodels.
|
2086
|
-
# # to calculate eta squared, use the sum of squares from the table
|
2087
|
-
# res_tab = anovatable(res_tab)
|
2088
|
-
|
2089
|
-
# =============================================================================
|
2090
|
-
# # alternativ for ANOVA
|
2091
|
-
# =============================================================================
|
2092
|
-
res_tab = pg.anova(dv=dv, between=factor, data=data,
|
2093
|
-
detailed=detailed, ss_type=ss_type, effsize=effsize)
|
2094
|
-
res_tab = anovatable(res_tab)
|
2095
|
-
return res_tab
|
2096
|
-
|
2097
|
-
def run_rmanova(data, dv, factor, subject, correction='auto', detailed=True, effsize='ng2'):
|
2098
|
-
# One-way repeated-measures ANOVA using a long-format dataset.
|
2099
|
-
res_tab = pg.rm_anova(data=data, dv=dv, within=factor,
|
2100
|
-
subject=subject, detailed=detailed, effsize=effsize)
|
2101
|
-
return res_tab
|
2102
|
-
|
2103
|
-
def run_welchanova(data, dv, factor):
|
2104
|
-
# When the groups are balanced and have equal variances, the optimal
|
2105
|
-
# post-hoc test is the Tukey-HSD test (pingouin.pairwise_tukey()). If the
|
2106
|
-
# groups have unequal variances, the Games-Howell test is more adequate
|
2107
|
-
# (pingouin.pairwise_gameshowell()). Results have been tested against R.
|
2108
|
-
res_tab = pg.welch_anova(data=data, dv=dv, between=factor)
|
2109
|
-
res_tab = anovatable(res_tab)
|
2110
|
-
return res_tab
|
2111
|
-
|
2112
|
-
def run_mixedanova(data, dv, between, within, subject, correction='auto', effsize='np2'):
|
2113
|
-
# Notes
|
2114
|
-
# Data are expected to be in long-format (even the repeated measures).
|
2115
|
-
# If your data is in wide-format, you can use the pandas.melt() function
|
2116
|
-
# to convert from wide to long format.
|
2117
|
-
|
2118
|
-
# Warning
|
2119
|
-
# If the between-subject groups are unbalanced(=unequal sample sizes), a
|
2120
|
-
# type II ANOVA will be computed. Note however that SPSS, JAMOVI and JASP
|
2121
|
-
# by default return a type III ANOVA, which may lead to slightly different
|
2122
|
-
# results.
|
2123
|
-
res_tab = pg.mixed_anova(data=data, dv=dv, within=within, subject=subject,
|
2124
|
-
between=between, correction=correction, effsize=effsize)
|
2125
|
-
res_tab = anovatable(res_tab)
|
2126
|
-
return res_tab
|
2127
|
-
|
2128
|
-
def run_friedman(data, dv, factor, subject, method='chisq'):
|
2129
|
-
# Friedman test for repeated measurements
|
2130
|
-
# The Friedman test is used for non-parametric (rank-based) one-way
|
2131
|
-
# repeated measures ANOVA
|
2132
|
-
|
2133
|
-
# check df form ('long' or 'wide')
|
2134
|
-
# df_long = data.melt(ignore_index=False).reset_index()
|
2135
|
-
# if data.describe().shape[1] >= df_long.describe().shape[1]:
|
2136
|
-
# res_tab = pg.friedman(data, method=method)
|
2137
|
-
# else:
|
2138
|
-
# res_tab = pg.friedman(data=df_long, dv='value',
|
2139
|
-
# within="variable", subject="index", method=method)
|
2140
|
-
if "Wide" in df_wide_long(data):
|
2141
|
-
df_long = data.melt(ignore_index=False).reset_index()
|
2142
|
-
res_tab = pg.friedman(data=df_long, dv='value',
|
2143
|
-
within="variable", subject="index", method=method)
|
2144
|
-
else:
|
2145
|
-
res_tab = pg.friedman(data, dv=dv, within=factor, subject=subject,method=method)
|
2146
|
-
res_tab = anovatable(res_tab)
|
2147
|
-
return res_tab
|
2148
|
-
|
2149
|
-
def run_kruskal(data, dv, factor):
|
2150
|
-
# Kruskal-Wallis H-test for independent samples
|
2151
|
-
res_tab = pg.kruskal(data=data, dv=dv, between=factor)
|
2152
|
-
res_tab = anovatable(res_tab)
|
2153
|
-
return res_tab
|
2154
|
-
|
2155
|
-
# Normality Check:
|
2156
|
-
# Conduct normality tests (Shapiro-Wilk) for each group.
|
2157
|
-
# If the data is approximately normally distributed, ANOVA is robust to
|
2158
|
-
# moderate departures from normality, especially with larger sample sizes.
|
2159
|
-
|
2160
|
-
# print(data[factor])
|
2161
|
-
# print(type(data[factor]))
|
2162
|
-
# print(len(data[factor].columns))
|
2163
|
-
# print(data[factor].nunique())
|
2164
|
-
# print(data[factor[0]])
|
2165
|
-
# print(data[factor[0]].unique())
|
2166
|
-
if group is None:
|
2167
|
-
group = factor
|
2168
|
-
|
2169
|
-
# print(f'\ngroup is :\n{data[group]},\ndv is :\n{dv}\n')
|
2170
|
-
norm_array = []
|
2171
|
-
for sub_group in data[group].unique():
|
2172
|
-
norm_curr = check_normality(
|
2173
|
-
data.loc[data[group] == sub_group, dv])
|
2174
|
-
norm_array.append(norm_curr)
|
2175
|
-
norm_all = True if all(norm_array) else False
|
2176
|
-
|
2177
|
-
# Homogeneity of Variances:
|
2178
|
-
# Check for homogeneity of variances (homoscedasticity) among groups.
|
2179
|
-
# Levene's test or Bartlett's test can be used for this purpose.
|
2180
|
-
# If variances are significantly different, consider transformations or use a
|
2181
|
-
# robust ANOVA method.
|
2182
|
-
|
2183
|
-
# # =============================================================================
|
2184
|
-
# # # method1: stats.levene
|
2185
|
-
# # =============================================================================
|
2186
|
-
# # data_array = []
|
2187
|
-
# # for sub_group in df["group"].unique():
|
2188
|
-
# # data_array.append(df.loc[df['group'] == sub_group, 'values'].values)
|
2189
|
-
# # print(data_array)
|
2190
|
-
# # variance_all = stats.levene(data_array[0],data_array[1],data_array[2])
|
2191
|
-
|
2192
|
-
# =============================================================================
|
2193
|
-
# # method2: pingouin.homoscedasticity
|
2194
|
-
# =============================================================================
|
2195
|
-
res_levene = None
|
2196
|
-
variance_all = pg.homoscedasticity(
|
2197
|
-
data, dv=dv, group=group, method='levene', alpha=0.05)
|
2198
|
-
res_levene = True if variance_all.iloc[0,1] > 0.05 else False
|
2199
|
-
# =============================================================================
|
2200
|
-
# # ANOVA Assumptions:
|
2201
|
-
# # Ensure that the assumptions of independence, homogeneity of variances, and
|
2202
|
-
# # normality are reasonably met before proceeding.
|
2203
|
-
# =============================================================================
|
2204
|
-
notes_norm = 'normally' if norm_all else 'NOT-normally'
|
2205
|
-
notes_variance = 'equal' if res_levene else 'unequal'
|
2206
|
-
print(f'Data is {notes_norm} distributed, shows {notes_variance} variance')
|
2207
|
-
|
2208
|
-
cfg_pmc = corr_pmc(pmc)
|
2209
|
-
cfg_pair = corr_pair(pair)
|
2210
|
-
output = {}
|
2211
|
-
if (cfg_pmc == 'parametric') or (cfg_pmc == 'auto'):
|
2212
|
-
if 'np' in cfg_pair: # 'unpaired'
|
2213
|
-
if cfg_pmc == 'auto':
|
2214
|
-
if norm_all:
|
2215
|
-
if res_levene:
|
2216
|
-
res_tab = run_anova(data, dv, factor, ss_type=ss_type,
|
2217
|
-
detailed=True, effsize='np2')
|
2218
|
-
notes_stat = f'{data[factor].nunique()} Way ANOVA'
|
2219
|
-
notes_APA = extract_apa(res_tab)
|
2220
|
-
|
2221
|
-
else:
|
2222
|
-
res_tab = run_welchanova(data, dv, factor)
|
2223
|
-
notes_stat = f'{data[factor].nunique()} Way Welch ANOVA'
|
2224
|
-
notes_APA = extract_apa(res_tab)
|
2225
|
-
|
2226
|
-
else:
|
2227
|
-
|
2228
|
-
res_tab = run_kruskal(data, dv, factor)
|
2229
|
-
notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
|
2230
|
-
notes_APA = extract_apa(res_tab)
|
2231
|
-
|
2232
|
-
elif cfg_pmc == 'parametric':
|
2233
|
-
res_tab = run_anova(data, dv, factor, ss_type=ss_type,
|
2234
|
-
detailed=True, effsize='np2')
|
2235
|
-
notes_stat = f'{data[factor].nunique()} Way ANOVA'
|
2236
|
-
notes_APA = extract_apa(res_tab)
|
2237
|
-
|
2238
|
-
elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
|
2239
|
-
res_tab = run_rmanova(data, dv, factor, subject, correction='auto',
|
2240
|
-
detailed=True, effsize='ng2')
|
2241
|
-
notes_stat = f'{data[factor].nunique()} Way Repeated measures ANOVA'
|
2242
|
-
notes_APA = extract_apa(res_tab)
|
2243
|
-
|
2244
|
-
elif 'mix' in cfg_pair or 'both' in cfg_pair:
|
2245
|
-
res_tab = run_mixedanova(data, dv, between, within, subject)
|
2246
|
-
# notes_stat = f'{len(sum(len(between)+sum(len(within))))} Way Mixed ANOVA'
|
2247
|
-
notes_stat = ""
|
2248
|
-
# n_inter = res_tab.loc(res_tab["Source"] == "Interaction")
|
2249
|
-
# print(n_inter)
|
2250
|
-
notes_APA = extract_apa(res_tab)
|
2251
|
-
|
2252
|
-
elif cfg_pmc == 'non-parametric':
|
2253
|
-
if 'np' in cfg_pair: # 'unpaired'
|
2254
|
-
res_tab = run_kruskal(data, dv, factor)
|
2255
|
-
notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
|
2256
|
-
notes_APA = f'H({res_tab.ddof1[0]},n={data.shape[0]})={round(res_tab.H[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
|
2257
|
-
|
2258
|
-
elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
|
2259
|
-
res_tab = run_friedman(data, dv, factor, subject, method='chisq')
|
2260
|
-
notes_stat = f'Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA'
|
2261
|
-
notes_APA = f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
|
2262
|
-
|
2263
|
-
# =============================================================================
|
2264
|
-
# # Post-hoc
|
2265
|
-
# Post-Hoc Tests (if significant):
|
2266
|
-
# If ANOVA indicates significant differences, perform post-hoc tests (e.g.,
|
2267
|
-
# Tukey's HSD, Bonferroni, or Scheffé) to identify which groups differ from each other.
|
2268
|
-
# # https://pingouin-stats.org/build/html/generated/pingouin.pairwise_tests.html
|
2269
|
-
# =============================================================================
|
2270
|
-
go_pmc = True if cfg_pmc == 'parametric' else False
|
2271
|
-
go_subject = subject if ('pa' in cfg_pair) and (
|
2272
|
-
'np' not in cfg_pair) else None
|
2273
|
-
go_mix_between = between if ('mix' in cfg_pair) or (
|
2274
|
-
'both' in cfg_pair) else None
|
2275
|
-
go_mix_between = None if ('pa' in cfg_pair) or (
|
2276
|
-
'np' not in cfg_pair) else factor
|
2277
|
-
go_mix_within = within if ('mix' in cfg_pair) or (
|
2278
|
-
'both' in cfg_pair) else None
|
2279
|
-
go_mix_within = factor if ('pa' in cfg_pair) or (
|
2280
|
-
'np' not in cfg_pair) else None
|
2281
|
-
if res_tab['p-unc'][0] <= .05:
|
2282
|
-
# Pairwise Comparisons
|
2283
|
-
method_post_hoc = [
|
2284
|
-
"bonf", # 'bonferroni', # : one-step correction
|
2285
|
-
"sidak", # one-step correction
|
2286
|
-
"holm", # step-down method using Bonferroni adjustments
|
2287
|
-
"fdr_bh", # Benjamini/Hochberg (non-negative)
|
2288
|
-
"fdr_by", # Benjamini/Yekutieli (negative)
|
2289
|
-
]
|
2290
|
-
res_posthoc = pd.DataFrame()
|
2291
|
-
for met in method_post_hoc:
|
2292
|
-
post_curr = pg.pairwise_tests(data=data, dv=dv, between=go_mix_between, within=go_mix_within, subject=go_subject, parametric=go_pmc, marginal=True, alpha=0.05, alternative='two-sided',
|
2293
|
-
padjust=met)
|
2294
|
-
res_posthoc = pd.concat([res_posthoc, post_curr],
|
2295
|
-
ignore_index=True)
|
2296
|
-
else:
|
2297
|
-
res_posthoc = None
|
2298
|
-
output['res_posthoc'] = res_posthoc
|
2299
|
-
# =============================================================================
|
2300
|
-
# # filling output
|
2301
|
-
# =============================================================================
|
2302
|
-
|
2303
|
-
pd.set_option('display.max_columns', None)
|
2304
|
-
output['stat'] = notes_stat
|
2305
|
-
# print(output['APA'])
|
2306
|
-
output['APA'] = notes_APA
|
2307
|
-
output['pval'] = res_tab['p-unc']
|
2308
|
-
output['res_tab'] = res_tab
|
2309
|
-
if res_tab.shape[0] == len(notes_APA):
|
2310
|
-
output['res_tab']['APA'] = output['APA'] # note APA in the table
|
2311
|
-
# print(output['stat'])
|
2312
|
-
# print(output['res_tab'])
|
2313
|
-
return output
|
2314
|
-
|
2315
|
-
|
2316
|
-
# =============================================================================
|
2317
|
-
# # One-way ANOVA
|
2318
|
-
# =============================================================================
|
2319
|
-
# url = "http://stats191.stanford.edu/data/rehab.csv"
|
2320
|
-
# rehab_table = pd.read_table(url, delimiter=",")
|
2321
|
-
# rehab_table.to_csv("rehab.table")
|
2322
|
-
# fig, ax = plt.subplots(figsize=(8, 6))
|
2323
|
-
# fig = rehab_table.boxplot("Time", "Fitness", ax=ax, grid=False)
|
2324
|
-
# # fig, ax = plt.subplots(figsize=(8, 6))
|
2325
|
-
# # set_pub()
|
2326
|
-
# # sns.boxenplot(x="Time",y="Fitness",data = rehab_table)
|
2327
|
-
|
2328
|
-
# out2 = FuncMultiCmpt(pmc='pmc', pair='unpair',
|
2329
|
-
# data=rehab_table, dv='Time', factor='Fitness')
|
2330
|
-
# # print(out2['res_tab'])
|
2331
|
-
# # print(out2['APA'])
|
2332
|
-
# out2['res_posthoc']
|
2333
|
-
# out2['res_posthoc']['p-unc'][0]
|
2334
|
-
# out2['res_posthoc']['p-adjust'][0]
|
2335
|
-
# out2['res_posthoc']['p-corr'][0]
|
2336
|
-
|
2337
|
-
|
2338
|
-
# =============================================================================
|
2339
|
-
# # Interactions and ANOVA
|
2340
|
-
# https://www.statsmodels.org/dev/examples/notebooks/generated/interactions_anova.html
|
2341
|
-
# url = "http://stats191.stanford.edu/data/salary.table"
|
2342
|
-
# fh = urlopen(url)
|
2343
|
-
# df = pd.read_table(fh)
|
2344
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
2345
|
-
# dv='S', factor=['X', 'E', 'M'], group='M')
|
2346
|
-
# # # two-way anova
|
2347
|
-
# # https://www.statology.org/two-way-anova-python/
|
2348
|
-
# # =============================================================================
|
2349
|
-
# # df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
|
2350
|
-
# # 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
|
2351
|
-
# # 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
|
2352
|
-
# # 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
|
2353
|
-
# # 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
|
2354
|
-
# # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
2355
|
-
# # dv='height', factor=['water','sun'],group='water')
|
2356
|
-
|
2357
|
-
|
2358
|
-
# =============================================================================
|
2359
|
-
# # two way anova
|
2360
|
-
# https://www.geeksforgeeks.org/how-to-perform-a-two-way-anova-in-python/
|
2361
|
-
# =============================================================================
|
2362
|
-
# df1=pd.DataFrame({'Fertilizer': np.repeat(['daily', 'weekly'], 15),
|
2363
|
-
# 'Watering': np.repeat(['daily', 'weekly'], 15),
|
2364
|
-
# 'height': [14, 16, 15, 15, 16, 13, 12, 11,
|
2365
|
-
# 14, 15, 16, 16, 17, 18, 14, 13,
|
2366
|
-
# 14, 14, 14, 15, 16, 16, 17, 18,
|
2367
|
-
# 14, 13, 14, 14, 14, 15]})
|
2368
|
-
|
2369
|
-
# df1['subject'] = np.tile(range(0, 15), (1, 2)).T
|
2370
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df1,
|
2371
|
-
# dv='height', factor=['Fertilizer','Watering'],group='Watering')
|
2372
|
-
# # print(out1['stat'])
|
2373
|
-
# # print(out1['res_tab'])
|
2374
|
-
|
2375
|
-
# =============================================================================
|
2376
|
-
# # welch anova
|
2377
|
-
# https://www.geeksforgeeks.org/how-to-perform-welchs-anova-in-python/
|
2378
|
-
# =============================================================================
|
2379
|
-
# df = pd.DataFrame({'score': [64, 66, 68, 75, 78, 94, 98, 79, 71, 80,
|
2380
|
-
# 91, 92, 93, 90, 97, 94, 82, 88, 95, 96,
|
2381
|
-
# 79, 78, 88, 94, 92, 85, 83, 85, 82, 81],
|
2382
|
-
# 'group': np.repeat(['strat1', 'strat2', 'strat3'],repeats=10)})
|
2383
|
-
# out1 = FuncMultiCmpt(pmc='auto',pair='unpaired',data=df, dv='score', factor='group', group='group')
|
2384
|
-
# =============================================================================
|
2385
|
-
# # two way anova
|
2386
|
-
# https://www.statology.org/two-way-anova-python/
|
2387
|
-
# =============================================================================
|
2388
|
-
# df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
|
2389
|
-
# 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
|
2390
|
-
# 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
|
2391
|
-
# 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
|
2392
|
-
# 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
|
2393
|
-
# df['subject'] = np.tile(range(0, 15), (1, 2)).T
|
2394
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
2395
|
-
# dv='height', factor=['water', 'sun'], subject='subject', group='water')
|
2396
|
-
# # print(out1['stat'])
|
2397
|
-
# # print(out1['res_tab'])
|
2398
|
-
|
2399
|
-
# =============================================================================
|
2400
|
-
# # 3-way ANOVA
|
2401
|
-
# =============================================================================
|
2402
|
-
# df = pd.DataFrame({'program': np.repeat([1, 2], 20),
|
2403
|
-
# 'gender': np.tile(np.repeat(['M', 'F'], 10), 2),
|
2404
|
-
# 'division': np.tile(np.repeat([1, 2], 5), 4),
|
2405
|
-
# 'height': [7, 7, 8, 8, 7, 6, 6, 5, 6, 5,
|
2406
|
-
# 5, 5, 4, 5, 4, 3, 3, 4, 3, 3,
|
2407
|
-
# 6, 6, 5, 4, 5, 4, 5, 4, 4, 3,
|
2408
|
-
# 2, 2, 1, 4, 4, 2, 1, 1, 2, 1]})
|
2409
|
-
# df['subject'] = np.tile(range(0, 20), (1, 2)).T
|
2410
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
2411
|
-
# dv='height', factor=['gender', 'program', 'division'], subject='subject', group='program')
|
2412
|
-
# # print(out1['stat'])
|
2413
|
-
# # print(out1['res_tab'])
|
2414
|
-
|
2415
|
-
# =============================================================================
|
2416
|
-
# # Repeated Measures ANOVA in Python
|
2417
|
-
# =============================================================================
|
2418
|
-
# df = pd.DataFrame({'patient': np.repeat([1, 2, 3, 4, 5], 4),
|
2419
|
-
# 'drug': np.tile([1, 2, 3, 4], 5),
|
2420
|
-
# 'response': [30, 28, 16, 34,
|
2421
|
-
# 14, 18, 10, 22,
|
2422
|
-
# 24, 20, 18, 30,
|
2423
|
-
# 38, 34, 20, 44,
|
2424
|
-
# 26, 28, 14, 30]})
|
2425
|
-
# # df['subject'] = np.tile(range(0, 20), (1, 2)).T
|
2426
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
|
2427
|
-
# dv='response', factor=['drug'], subject='patient', group='drug')
|
2428
|
-
# print(out1['stat'])
|
2429
|
-
# print(out1['res_tab'])
|
2430
|
-
# print(out1['APA'])
|
2431
|
-
|
2432
|
-
# =============================================================================
|
2433
|
-
# # repeated anova
|
2434
|
-
# https://www.geeksforgeeks.org/how-to-perform-a-repeated-measures-anova-in-python/
|
2435
|
-
# =============================================================================
|
2436
|
-
# df = pd.DataFrame({'Cars': np.repeat([1, 2, 3, 4, 5], 4),
|
2437
|
-
# 'Engine Oil': np.tile([1, 2, 3, 4], 5),
|
2438
|
-
# 'Mileage': [36, 38, 30, 29,
|
2439
|
-
# 34, 38, 30, 29,
|
2440
|
-
# 34, 28, 38, 32,
|
2441
|
-
# 38, 34, 20, 44,
|
2442
|
-
# 26, 28, 34, 50]})
|
2443
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
|
2444
|
-
# dv='Mileage', factor=['Engine Oil'], subject='Cars', group='Cars')
|
2445
|
-
# =============================================================================
|
2446
|
-
# #two-way repeated anova
|
2447
|
-
# =============================================================================
|
2448
|
-
# df = pd.read_csv(
|
2449
|
-
# "https://reneshbedre.github.io/assets/posts/anova/plants_leaves_two_within.csv")
|
2450
|
-
# df
|
2451
|
-
# # df['subject'] = np.tile(range(0, 20), (1, 2)).T
|
2452
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
|
2453
|
-
# dv='num_leaves', factor=['year', 'time'], subject='plants', group='year')
|
2454
|
-
# print(out1['stat'])
|
2455
|
-
# print(out1['res_tab'])
|
2456
|
-
# print(out1['APA'])
|
2457
|
-
|
2458
|
-
# =============================================================================
|
2459
|
-
# # repeated anova
|
2460
|
-
# =============================================================================
|
2461
|
-
# df = pd.read_csv('/Users/macjianfeng/Desktop/test.csv')
|
2462
|
-
# df.head()
|
2463
|
-
# df.loc[df['animal'].str.contains('Sleep'), 'experiment'] = 'sleep'
|
2464
|
-
# df.loc[df['animal'].str.contains('Wake'), 'experiment'] = 'wake'
|
2465
|
-
# df.loc[df['variable'].str.contains('hypo'), 'region'] = 'hypo'
|
2466
|
-
# df.loc[df['variable'].str.contains('cort'), 'region'] = 'cort'
|
2467
|
-
# df
|
2468
|
-
# for i in range(4):
|
2469
|
-
# match i:
|
2470
|
-
# case 0:
|
2471
|
-
# prot_name = 'A1'
|
2472
|
-
# case 1:
|
2473
|
-
# prot_name = 'A2'
|
2474
|
-
# case 2:
|
2475
|
-
# prot_name = '845'
|
2476
|
-
# case 3:
|
2477
|
-
# prot_name = '831'
|
2478
|
-
# df_tmp = df[df["variable"].str.contains(prot_name)]
|
2479
|
-
# df_tmp['protein'] = prot_name
|
2480
|
-
# df_tmp = df_tmp.reset_index()
|
2481
|
-
# print(df_tmp)
|
2482
|
-
|
2483
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='mix', data=df_tmp,
|
2484
|
-
# dv='value', between='experiment', within='region', subject='animal', group='experiment')
|
2485
|
-
# print(out1['stat'])
|
2486
|
-
# print(out1['res_tab'])
|
2487
|
-
# # =============================================================================
|
2488
|
-
# One-way ANOVA
|
2489
|
-
# df1 = pd.read_csv('/Users/macjianfeng/Desktop/Book2.csv')
|
2490
|
-
# df2 = df1.melt()
|
2491
|
-
# out1 = FuncMultiCmpt(pmc='npmc', pair='unpaired', data=df2,
|
2492
|
-
# dv='libido', factor=['brand x', 'brand y', 'brand z'], subject='participant')
|
2493
|
-
# print(out1['stat'])
|
2494
|
-
# print(out1['res_tab'])
|
2495
|
-
# =============================================================================
|
2496
|
-
|
2497
|
-
|
2498
|
-
# =============================================================================
|
2499
|
-
# # #One-way ANOVA new example: https://www.pythonfordatascience.org/anova-python/
|
2500
|
-
# =============================================================================
|
2501
|
-
# df1 = pd.read_csv(
|
2502
|
-
# "https://raw.githubusercontent.com/researchpy/Data-sets/master/difficile.csv")
|
2503
|
-
# df1.drop('person', axis=1, inplace=True)
|
2504
|
-
# # Recoding value from numeric to string
|
2505
|
-
# df1['dose'].replace({1: 'placebo', 2: 'low', 3: 'high'}, inplace=True)
|
2506
|
-
# df1.head(10)
|
2507
|
-
|
2508
|
-
# out3= FuncMultiCmpt(pmc='pmc', data=df1, dv='libido', factor='dose')
|
2509
|
-
# # print(out3['res_tab'])
|
2510
|
-
# # # print(out3['res_posthoc'])
|
2511
|
-
# # print(out3['APA'])
|
2512
|
-
|
2513
|
-
# =============================================================================
|
2514
|
-
# https://lifewithdata.com/2023/06/08/how-to-perform-a-two-way-anova-in-python/
|
2515
|
-
# =============================================================================
|
2516
|
-
# data = {
|
2517
|
-
# 'Diet': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C'],
|
2518
|
-
# 'Workout': ['Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High'],
|
2519
|
-
# 'WeightLoss': [3, 4, 5, 3.2, 5, 6, 5.2, 6, 5.5, 4, 5.5, 6.2]
|
2520
|
-
# }
|
2521
|
-
# df = pd.DataFrame(data)
|
2522
|
-
# out4= FuncMultiCmpt(pmc='pmc', pair='unpaired',data=df, dv='WeightLoss', factor=['Diet','Workout'],group='Diet')
|
2523
|
-
|
2524
|
-
# =============================================================================
|
2525
|
-
# # convert to list to string
|
2526
|
-
# =============================================================================
|
2527
1569
|
def list2str(x_str):
|
2528
1570
|
s = ''.join(str(x) for x in x_str)
|
2529
1571
|
return s
|