py2ls 0.1.9.1__py3-none-any.whl → 0.1.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/stats.py CHANGED
@@ -2,251 +2,362 @@ from scipy.ndimage import convolve1d
2
2
  from scipy.signal import savgol_filter
3
3
  import pingouin as pg
4
4
  from scipy import stats
5
-
6
5
  import numpy as np
7
6
  import pandas as pd
8
7
  import matplotlib.pyplot as plt
8
+ import warnings
9
9
 
10
+ warnings.filterwarnings("ignore", category=RuntimeWarning)
10
11
 
11
- # ==============FuncStars(ax,x1=1,x2=2, yscale=0.9, pval=0.01)====================================================
12
- # Usage:
13
- # FuncStars(ax, x1=2, x2=3, yscale=0.99, pval=0.02)
14
- # =============================================================================
15
12
 
16
13
  # FuncStars --v 0.1.1
17
- def FuncStars(ax,
18
- pval=None,
19
- Ylim=None,
20
- Xlim=None,
21
- symbol='*',
22
- yscale=0.95,
23
- x1=0,
24
- x2=1,
25
- alpha=0.05,
26
- fontsize=14,
27
- fontsize_note=6,
28
- rotation=0,
29
- fontname='Arial',
30
- values_below=None,
31
- linego=True,
32
- linestyle='-',
33
- linecolor='k',
34
- linewidth=.8,
35
- nsshow='off',
36
- symbolcolor='k',
37
- tailindicator=[0.06, 0.06],
38
- report=None,
39
- report_scale=-0.1,
40
- report_loc=None):
14
+ def FuncStars(
15
+ ax,
16
+ pval=None,
17
+ ylim=None,
18
+ xlim=None,
19
+ symbol="*",
20
+ yscale=0.95,
21
+ y_loc=None,
22
+ x1=0,
23
+ x2=1,
24
+ alpha=0.05,
25
+ fontsize=14,
26
+ fontsize_note=12,
27
+ rotation=0,
28
+ fontname="Arial",
29
+ values_below=None,
30
+ linego=True,
31
+ linestyle="-",
32
+ linecolor="k",
33
+ linewidth=0.8,
34
+ nsshow="off",
35
+ symbolcolor="k",
36
+ tailindicator=[0.06, 0.06],
37
+ report=None,
38
+ report_scale=-0.1,
39
+ report_loc=None,
40
+ ):
41
41
  if ax is None:
42
42
  ax = plt.gca()
43
- if Ylim is None:
44
- Ylim = plt.gca().get_ylim()
45
- if Xlim is None:
46
- Xlim = ax.get_xlim()
43
+ if ylim is None:
44
+ ylim = ax.get_ylim()
45
+ if xlim is None:
46
+ xlim = ax.get_xlim()
47
47
  if report_loc is None and report is not None:
48
- report_loc = np.min(Ylim) + report_scale*np.abs(np.diff(Ylim))
48
+ report_loc = np.min(ylim) + report_scale * np.abs(np.diff(ylim))
49
49
  if report_scale > 0:
50
50
  report_scale = -np.abs(report_scale)
51
51
  yscale = np.float64(yscale)
52
- y_loc = np.min(Ylim) + yscale*(np.max(Ylim)-np.min(Ylim))
52
+ if y_loc is None:
53
+ y_loc = np.min(ylim) + yscale * (np.max(ylim) - np.min(ylim))
53
54
  xcenter = np.mean([x1, x2])
54
55
  # ns / *
55
56
  if alpha < pval:
56
- if nsshow == 'on':
57
- ns_str = f'p={round(pval, 3)}' if pval < 0.9 else 'ns'
58
- color = 'm' if pval < 0.1 else 'k'
59
- plt.text(xcenter, y_loc, ns_str,
60
- ha='center', va='bottom', # 'center_baseline',
61
- fontsize=fontsize-6 if fontsize > 6 else fontsize,
62
- fontname=fontname, color=color, rotation=rotation
63
- # bbox=dict(facecolor=None, edgecolor=None, color=None, linewidth=None)
64
- )
57
+ if nsshow == "on":
58
+ ns_str = f"p={round(pval, 3)}" if pval < 0.9 else "ns"
59
+ color = "m" if pval < 0.1 else "k"
60
+ plt.text(
61
+ xcenter,
62
+ y_loc,
63
+ ns_str,
64
+ ha="center",
65
+ va="bottom", # 'center_baseline',
66
+ fontsize=fontsize - 6 if fontsize > 6 else fontsize,
67
+ fontname=fontname,
68
+ color=color,
69
+ rotation=rotation,
70
+ # bbox=dict(facecolor=None, edgecolor=None, color=None, linewidth=None)
71
+ )
65
72
  elif 0.01 < pval <= alpha:
66
- plt.text(xcenter, y_loc, symbol,
67
- ha='center', va='center_baseline',
68
- fontsize=fontsize, fontname=fontname, color=symbolcolor)
73
+ plt.text(
74
+ xcenter,
75
+ y_loc,
76
+ symbol,
77
+ ha="center",
78
+ va="center_baseline",
79
+ fontsize=fontsize,
80
+ fontname=fontname,
81
+ color=symbolcolor,
82
+ )
69
83
  elif 0.001 < pval <= 0.01:
70
- plt.text(xcenter, y_loc, symbol * 2,
71
- ha='center', va='center_baseline',
72
- fontsize=fontsize, fontname=fontname, color=symbolcolor)
84
+ plt.text(
85
+ xcenter,
86
+ y_loc,
87
+ symbol * 2,
88
+ ha="center",
89
+ va="center_baseline",
90
+ fontsize=fontsize,
91
+ fontname=fontname,
92
+ color=symbolcolor,
93
+ )
73
94
  elif 0 < pval <= 0.001:
74
- plt.text(xcenter, y_loc, symbol * 3,
75
- ha='center', va='center_baseline',
76
- fontsize=fontsize, fontname=fontname, color=symbolcolor)
95
+ plt.text(
96
+ xcenter,
97
+ y_loc,
98
+ symbol * 3,
99
+ ha="center",
100
+ va="center_baseline",
101
+ fontsize=fontsize,
102
+ fontname=fontname,
103
+ color=symbolcolor,
104
+ )
77
105
  # lines indicators
78
- if linego: # and 0 < pval <= 0.05:
79
- print(pval)
80
- print(linego)
106
+ if linego and 0 < pval <= 0.05:
81
107
  # horizontal line
82
- if yscale < 0.99:
83
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
84
- x2 - np.abs(np.diff(Xlim)) * 0.01],
85
- [y_loc - np.abs(np.diff(Ylim)) * .03,
86
- y_loc - np.abs(np.diff(Ylim)) * .03],
87
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
108
+ if yscale <= 0.99:
109
+ plt.plot(
110
+ [x1 + np.abs(np.diff(xlim)) * 0.01, x2 - np.abs(np.diff(xlim)) * 0.01],
111
+ [
112
+ y_loc - np.abs(np.diff(ylim)) * 0.03,
113
+ y_loc - np.abs(np.diff(ylim)) * 0.03,
114
+ ],
115
+ linestyle=linestyle,
116
+ color=linecolor,
117
+ linewidth=linewidth,
118
+ )
88
119
  # vertical line
89
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
90
- x1 + np.abs(np.diff(Xlim)) * 0.01],
91
- [y_loc - np.abs(np.diff(Ylim)) * tailindicator[0],
92
- y_loc - np.abs(np.diff(Ylim)) * .03],
93
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
94
- plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
95
- x2 - np.abs(np.diff(Xlim)) * 0.01],
96
- [y_loc - np.abs(np.diff(Ylim)) * tailindicator[1],
97
- y_loc - np.abs(np.diff(Ylim)) * .03],
98
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
120
+ plt.plot(
121
+ [x1 + np.abs(np.diff(xlim)) * 0.01, x1 + np.abs(np.diff(xlim)) * 0.01],
122
+ [
123
+ y_loc - np.abs(np.diff(ylim)) * tailindicator[0],
124
+ y_loc - np.abs(np.diff(ylim)) * 0.03,
125
+ ],
126
+ linestyle=linestyle,
127
+ color=linecolor,
128
+ linewidth=linewidth,
129
+ )
130
+ plt.plot(
131
+ [x2 - np.abs(np.diff(xlim)) * 0.01, x2 - np.abs(np.diff(xlim)) * 0.01],
132
+ [
133
+ y_loc - np.abs(np.diff(ylim)) * tailindicator[1],
134
+ y_loc - np.abs(np.diff(ylim)) * 0.03,
135
+ ],
136
+ linestyle=linestyle,
137
+ color=linecolor,
138
+ linewidth=linewidth,
139
+ )
99
140
  else:
100
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
101
- x2 - np.abs(np.diff(Xlim)) * 0.01],
102
- [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002,
103
- np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
104
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
141
+ plt.plot(
142
+ [x1 + np.abs(np.diff(xlim)) * 0.01, x2 - np.abs(np.diff(xlim)) * 0.01],
143
+ [
144
+ np.min(ylim)
145
+ + 0.95 * (np.max(ylim) - np.min(ylim))
146
+ - np.abs(np.diff(ylim)) * 0.002,
147
+ np.min(ylim)
148
+ + 0.95 * (np.max(ylim) - np.min(ylim))
149
+ - np.abs(np.diff(ylim)) * 0.002,
150
+ ],
151
+ linestyle=linestyle,
152
+ color=linecolor,
153
+ linewidth=linewidth,
154
+ )
105
155
  # vertical line
106
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
107
- x1 + np.abs(np.diff(Xlim)) * 0.01],
108
- [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[0],
109
- np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
110
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
111
- plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
112
- x2 - np.abs(np.diff(Xlim)) * 0.01],
113
- [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[1],
114
- np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
115
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
156
+ plt.plot(
157
+ [x1 + np.abs(np.diff(xlim)) * 0.01, x1 + np.abs(np.diff(xlim)) * 0.01],
158
+ [
159
+ np.min(ylim)
160
+ + 0.95 * (np.max(ylim) - np.min(ylim))
161
+ - np.abs(np.diff(ylim)) * tailindicator[0],
162
+ np.min(ylim)
163
+ + 0.95 * (np.max(ylim) - np.min(ylim))
164
+ - np.abs(np.diff(ylim)) * 0.002,
165
+ ],
166
+ linestyle=linestyle,
167
+ color=linecolor,
168
+ linewidth=linewidth,
169
+ )
170
+ plt.plot(
171
+ [x2 - np.abs(np.diff(xlim)) * 0.01, x2 - np.abs(np.diff(xlim)) * 0.01],
172
+ [
173
+ np.min(ylim)
174
+ + 0.95 * (np.max(ylim) - np.min(ylim))
175
+ - np.abs(np.diff(ylim)) * tailindicator[1],
176
+ np.min(ylim)
177
+ + 0.95 * (np.max(ylim) - np.min(ylim))
178
+ - np.abs(np.diff(ylim)) * 0.002,
179
+ ],
180
+ linestyle=linestyle,
181
+ color=linecolor,
182
+ linewidth=linewidth,
183
+ )
116
184
  if values_below is not None:
117
- plt.text(xcenter, y_loc * (-0.1), values_below,
118
- ha='center', va='bottom', # 'center_baseline', rotation=rotation,
119
- fontsize=fontsize_note, fontname=fontname, color='k')
185
+ plt.text(
186
+ xcenter,
187
+ y_loc * (-0.1),
188
+ values_below,
189
+ ha="center",
190
+ va="bottom", # 'center_baseline', rotation=rotation,
191
+ fontsize=fontsize_note,
192
+ fontname=fontname,
193
+ color="k",
194
+ )
120
195
  # report / comments
121
196
  if report is not None:
122
- plt.text(xcenter, report_loc, report,
123
- ha='left', va='bottom', # 'center_baseline', rotation=rotation,
124
- fontsize=fontsize_note, fontname=fontname, color='.7')
125
-
126
-
127
-
128
-
129
- def FuncCmpt(X1, X2, pmc='auto', pair='unpaired'):
197
+ plt.text(
198
+ xcenter,
199
+ report_loc,
200
+ report,
201
+ ha="left",
202
+ va="bottom", # 'center_baseline', rotation=rotation,
203
+ fontsize=fontsize_note,
204
+ fontname=fontname,
205
+ color=".7",
206
+ )
207
+
208
+
209
+ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
130
210
  # output = {}
131
211
 
132
212
  # pmc correction: 'parametric'/'non-parametric'/'auto'
133
213
  # meawhile get the opposite setting (to compare the results)
134
- def corr_pmc(pmc):
135
- cfg_pmc = None
136
- if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'npmc', 'nonparametric', 'non-parametric'}:
137
- cfg_pmc = 'parametric'
138
- elif pmc.lower() in {'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
139
- cfg_pmc = 'non-parametric'
140
- else:
141
- cfg_pmc = 'auto'
142
- return cfg_pmc
143
-
144
- def corr_pair(pair):
145
- cfg_pair = None
146
- if 'pa' in pair.lower() and 'np' not in pair.lower():
147
- cfg_pair = 'paired'
148
- elif 'np' in pair.lower():
149
- cfg_pair = 'unpaired'
150
- return cfg_pair
151
-
152
- def check_normality(data):
153
- stat_shapiro, pval_shapiro = stats.shapiro(data)
154
- if pval_shapiro > 0.05:
155
- Normality = True
156
- else:
157
- Normality = False
158
- print(f'\n normally distributed\n') if Normality else print(
159
- f'\n NOT normally distributed\n')
160
- return Normality
161
-
162
- def sub_cmpt_2group(X1, X2, cfg_pmc='pmc', pair='unpaired'):
214
+ # def corr_pmc(pmc):
215
+ # cfg_pmc = None
216
+ # if pmc.lower() in {"pmc", "parametric"} and pmc.lower() not in {
217
+ # "npmc",
218
+ # "nonparametric",
219
+ # "non-parametric",
220
+ # }:
221
+ # cfg_pmc = "parametric"
222
+ # elif pmc.lower() in {
223
+ # "npmc",
224
+ # "nonparametric",
225
+ # "non-parametric",
226
+ # } and pmc.lower() not in {"pmc", "parametric"}:
227
+ # cfg_pmc = "non-parametric"
228
+ # else:
229
+ # cfg_pmc = "auto"
230
+ # return cfg_pmc
231
+
232
+ # def corr_pair(pair):
233
+ # cfg_pair = None
234
+ # if "pa" in pair.lower() and "np" not in pair.lower():
235
+ # cfg_pair = "paired"
236
+ # elif "np" in pair.lower():
237
+ # cfg_pair = "unpaired"
238
+ # return cfg_pair
239
+
240
+ # def check_normality(data, verbose=True):
241
+ # stat_shapiro, pval_shapiro = stats.shapiro(data)
242
+ # if pval_shapiro > 0.05:
243
+ # Normality = True
244
+ # else:
245
+ # Normality = False
246
+ # if verbose:
247
+ # (
248
+ # print(f"\n normally distributed\n")
249
+ # if Normality
250
+ # else print(f"\n NOT normally distributed\n")
251
+ # )
252
+ # return Normality
253
+
254
+ def sub_cmpt_2group(x1, x2, cfg_pmc="pmc", pair="unpaired", verbose=True):
163
255
  output = {}
164
- nX1 = np.sum(~np.isnan(X1))
165
- nX2 = np.sum(~np.isnan(X2))
166
- if cfg_pmc == 'parametric' or cfg_pmc == 'auto':
256
+ nX1 = np.sum(~np.isnan(x1))
257
+ nX2 = np.sum(~np.isnan(x2))
258
+ if cfg_pmc == "parametric" or cfg_pmc == "auto":
167
259
  # VarType correction by checking variance Type via "levene"
168
260
  stat_lev, pval_lev = stats.levene(
169
- X1, X2, center='median', proportiontocut=0.05)
261
+ x1, x2, center="median", proportiontocut=0.05
262
+ )
170
263
  VarType = True if pval_lev > 0.05 and nX1 == nX2 else False
171
-
172
- if 'np' in pair: # 'unpaired'
264
+ print(pair)
265
+ if "np" in pair: # 'unpaired'
173
266
  if VarType and Normality:
174
267
  # The independent t-test requires that the dependent variable is approximately normally
175
268
  # distributed within each group
176
269
  # Note: Technically, it is the residuals that need to be normally distributed, but for
177
270
  # an independent t-test, both will give you the same result.
178
- stat_value, pval= stats.ttest_ind(
179
- X1, X2, axis=0, equal_var=True, nan_policy='omit', alternative='two-sided')
180
- notes_stat = 'unpaired t test'
181
- notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
271
+ stat_value, pval = stats.ttest_ind(
272
+ x1,
273
+ x2,
274
+ axis=0,
275
+ equal_var=True,
276
+ nan_policy="omit",
277
+ alternative="two-sided",
278
+ )
279
+ notes_stat = "unpaired t test"
280
+ notes_APA = (
281
+ f"t({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}"
282
+ )
182
283
  else:
183
284
  # If the Levene's Test for Equality of Variances is statistically significant,
184
285
  # which indicates that the group variances are unequal in the population, you
185
286
  # can correct for this violation by not using the pooled estimate for the error
186
287
  # term for the t-statistic, but instead using an adjustment to the degrees of
187
288
  # freedom using the Welch-Satterthwaite method
188
- stat_value, pval= stats.ttest_ind(
189
- X1, X2, axis=0, equal_var=False, nan_policy='omit', alternative='two-sided')
190
- notes_stat = 'Welchs t-test'
289
+ stat_value, pval = stats.ttest_ind(
290
+ x1,
291
+ x2,
292
+ axis=0,
293
+ equal_var=False,
294
+ nan_policy="omit",
295
+ alternative="two-sided",
296
+ )
297
+ notes_stat = "Welchs t-test"
191
298
  # note: APA FORMAT
192
- notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
193
- elif 'pa' in pair and 'np' not in pair: # 'paired'
299
+ notes_APA = (
300
+ f"t({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}"
301
+ )
302
+ elif "pa" in pair and "np" not in pair: # 'paired'
194
303
  # the paired-samples t-test is considered “robust” in handling violations of normality
195
304
  # to some extent. It can still yield valid results even if the data is not normally
196
305
  # distributed. Therefore, this test typically requires only approximately normal data
197
- stat_value, pval= stats.ttest_rel(
198
- X1, X2, axis=0, nan_policy='omit', alternative='two-sided')
199
- notes_stat = 'paired t test'
306
+ stat_value, pval = stats.ttest_rel(
307
+ x1, x2, axis=0, nan_policy="omit", alternative="two-sided"
308
+ )
309
+ notes_stat = "paired t test"
200
310
  # note: APA FORMAT
201
- notes_APA = f't({sum([nX1-1])})={round(stat_value, 5)},p={round(pval, 5)}'
202
- elif cfg_pmc == 'non-parametric':
203
- if 'np' in pair: # Perform Mann-Whitney
311
+ notes_APA = (
312
+ f"t({sum([nX1-1])})={round(stat_value, 5)},p={round(pval, 5)}"
313
+ )
314
+ elif cfg_pmc == "non-parametric":
315
+ if "np" in pair: # Perform Mann-Whitney
204
316
  stat_value, pval = stats.mannwhitneyu(
205
- X1, X2, method='exact', nan_policy='omit')
206
- notes_stat = 'Mann-Whitney U'
317
+ x1, x2, method="exact", nan_policy="omit"
318
+ )
319
+ notes_stat = "Mann-Whitney U"
207
320
  if nX1 == nX2:
208
- notes_APA = f'U(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
321
+ notes_APA = f"U(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}"
209
322
  else:
210
- notes_APA = f'U(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
211
- elif 'pa' in pair and 'np' not in pair: # Wilcoxon signed-rank test
323
+ notes_APA = f"U(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}"
324
+ elif "pa" in pair and "np" not in pair: # Wilcoxon signed-rank test
212
325
  stat_value, pval = stats.wilcoxon(
213
- X1, X2, method='exact', nan_policy='omit')
214
- notes_stat = 'Wilcoxon signed-rank'
326
+ x1, x2, method="exact", nan_policy="omit"
327
+ )
328
+ notes_stat = "Wilcoxon signed-rank"
215
329
  if nX1 == nX2:
216
- notes_APA = f'Z(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
330
+ notes_APA = f"Z(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}"
217
331
  else:
218
- notes_APA = f'Z(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
332
+ notes_APA = f"Z(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}"
219
333
 
220
334
  # filling output
221
- output['stat'] = stat_value
222
- output['pval'] = pval
223
- output['method'] = notes_stat
224
- output['APA'] = notes_APA
225
-
226
- print(f"{output['method']}\n {notes_APA}\n\n")
335
+ output["stat"] = stat_value
336
+ output["pval"] = pval
337
+ output["method"] = notes_stat
338
+ output["APA"] = notes_APA
339
+ if verbose:
340
+ print(f"{output['method']}\n {notes_APA}\n\n")
227
341
 
228
342
  return output, pval
229
343
 
230
- Normality1 = check_normality(X1)
231
- Normality2 = check_normality(X2)
344
+ Normality1 = check_normality(x1, verbose=verbose)
345
+ Normality2 = check_normality(x2, verbose=verbose)
232
346
  Normality = True if all([Normality1, Normality2]) else False
233
347
 
234
- nX1 = np.sum(~np.isnan(X1))
235
- nX2 = np.sum(~np.isnan(X2))
236
-
237
348
  cfg_pmc = corr_pmc(pmc)
238
349
  cfg_pair = corr_pair(pair)
239
350
 
240
- output, p = sub_cmpt_2group(
241
- X1, X2, cfg_pmc=cfg_pmc, pair=cfg_pair)
351
+ output, p = sub_cmpt_2group(x1, x2, cfg_pmc=cfg_pmc, pair=cfg_pair, verbose=verbose)
242
352
  return p, output
243
353
 
354
+
244
355
  # ======compare 2 group test===================================================
245
356
  # # Example
246
- # X1 = [19, 22, 16, 29, 24]
247
- # X2 = [20, 11, 17, 12, 22]
357
+ # x1 = [19, 22, 16, 29, 24]
358
+ # x2 = [20, 11, 17, 12, 22]
248
359
 
249
- # p, res= FuncCmpt(X1, X2, pmc='pmc', pair='unparrr')
360
+ # p, res= FuncCmpt(x1, x2, pmc='pmc', pair='unparrr')
250
361
 
251
362
  # =============================================================================
252
363
 
@@ -270,192 +381,40 @@ def FuncCmpt(X1, X2, pmc='auto', pair='unpaired'):
270
381
  # # 'friedman', # Non-parametric one-way repeated measures ANOVA
271
382
  # # ]
272
383
  # =============================================================================
273
- def df_wide_long(df):
274
- rows, columns = df.shape
275
- if columns > rows:
276
- return "Wide"
277
- elif rows > columns:
278
- return "Long"
279
384
 
280
- def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
281
- ss_type=2, detailed=True, effsize='np2',
282
- correction='auto', between=None, within=None,
283
- subject=None, group=None
284
- ):
285
-
286
- def corr_pair(pair):
287
- cfg_pair = None
288
- if 'pa' in pair.lower() and 'np' not in pair.lower():
289
- cfg_pair = 'paired'
290
- elif 'np' in pair.lower():
291
- cfg_pair = 'unpaired'
292
- elif 'mix' in pair.lower():
293
- cfg_pair = 'mix'
294
- return cfg_pair
295
-
296
- def check_normality(data):
297
- stat_shapiro, pval_shapiro = stats.shapiro(data)
298
- if pval_shapiro > 0.05:
299
- Normality = True
300
- else:
301
- Normality = False
302
- print(f'\n normally distributed\n') if Normality else print(
303
- f'\n NOT normally distributed\n')
304
- return Normality
305
-
306
- def corr_pmc(pmc):
307
- cfg_pmc = None
308
- if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'upmc', 'npmc', 'nonparametric', 'non-parametric'}:
309
- cfg_pmc = 'parametric'
310
- elif pmc.lower() in {'upmc', 'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
311
- cfg_pmc = 'non-parametric'
312
- else:
313
- cfg_pmc = 'auto'
314
- return cfg_pmc
315
-
316
- def extract_apa(res_tab):
317
- notes_APA = []
318
- if "ddof1" in res_tab:
319
- for irow in range(res_tab.shape[0]):
320
- note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
321
- notes_APA.append([note_tmp])
322
- elif "DF" in res_tab:
323
- print(res_tab.shape[0])
324
- for irow in range(res_tab.shape[0]-1):
325
- note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
326
- notes_APA.append([note_tmp])
327
- notes_APA.append(['NaN'])
328
- elif "DF1" in res_tab: # in 'mix' case
329
- for irow in range(res_tab.shape[0]):
330
- note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF1[irow]),round(res_tab.DF2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
331
- notes_APA.append([note_tmp])
332
- return notes_APA
333
-
334
- def anovatable(res_tab):
335
- if 'df' in res_tab: # statsmodels
336
- res_tab['mean_sq'] = res_tab[:]['sum_sq']/res_tab[:]['df']
337
- res_tab['est_sq'] = res_tab[:-1]['sum_sq'] / \
338
- sum(res_tab['sum_sq'])
339
- res_tab['omega_sq'] = (res_tab[:-1]['sum_sq']-(res_tab[:-1]['df'] *
340
- res_tab['mean_sq'][-1]))/(sum(res_tab['sum_sq'])+res_tab['mean_sq'][-1])
341
- elif 'DF' in res_tab:
342
- res_tab['MS'] = res_tab[:]['SS']/res_tab[:]['DF']
343
- res_tab['est_sq'] = res_tab[:-1]['SS']/sum(res_tab['SS'])
344
- res_tab['omega_sq'] = (res_tab[:-1]['SS']-(res_tab[:-1]['DF'] *
345
- res_tab['MS'][1]))/(sum(res_tab['SS'])+res_tab['MS'][1])
346
- if 'p-unc' in res_tab:
347
- if 'np2' in res_tab:
348
- res_tab['est_sq'] = res_tab['np2']
349
- if 'p-unc' in res_tab:
350
- res_tab['PR(>F)'] = res_tab['p-unc']
351
- return res_tab
352
-
353
- def run_anova(data, dv, factor, ss_type=2, detailed=True, effsize='np2'):
354
- # perform ANOVA
355
- # =============================================================================
356
- # # # ANOVA (input: formula, dataset)
357
- # =============================================================================
358
- # # note: if the data is balanced (equal sample size for each group), Type 1, 2, and 3 sums of squares
359
- # # (typ parameter) will produce similar results.
360
- # lm = ols("values ~ C(group)", data=df).fit()
361
- # res_tab = anova_lm(lm, typ=ss_type)
362
-
363
- # # however, it does not provide any effect size measures to tell if the
364
- # # statistical significance is meaningful. The function below calculates
365
- # # eta-squared () and omega-squared (). A quick note, is the exact same
366
- # # thing as except when coming from the ANOVA framework people call it ;
367
- # # is considered a better measure of effect size since it is unbiased in
368
- # # it's calculation by accounting for the degrees of freedom in the model.
369
- # # note: No effect sizes are calculated when using statsmodels.
370
- # # to calculate eta squared, use the sum of squares from the table
371
- # res_tab = anovatable(res_tab)
372
-
373
- # =============================================================================
374
- # # alternativ for ANOVA
375
- # =============================================================================
376
- res_tab = pg.anova(dv=dv, between=factor, data=data,
377
- detailed=detailed, ss_type=ss_type, effsize=effsize)
378
- res_tab = anovatable(res_tab)
379
- return res_tab
380
-
381
- def run_rmanova(data, dv, factor, subject, correction='auto', detailed=True, effsize='ng2'):
382
- # One-way repeated-measures ANOVA using a long-format dataset.
383
- res_tab = pg.rm_anova(data=data, dv=dv, within=factor,
384
- subject=subject, detailed=detailed, effsize=effsize)
385
- return res_tab
386
-
387
- def run_welchanova(data, dv, factor):
388
- # When the groups are balanced and have equal variances, the optimal
389
- # post-hoc test is the Tukey-HSD test (pingouin.pairwise_tukey()). If the
390
- # groups have unequal variances, the Games-Howell test is more adequate
391
- # (pingouin.pairwise_gameshowell()). Results have been tested against R.
392
- res_tab = pg.welch_anova(data=data, dv=dv, between=factor)
393
- res_tab = anovatable(res_tab)
394
- return res_tab
395
-
396
- def run_mixedanova(data, dv, between, within, subject, correction='auto', effsize='np2'):
397
- # Notes
398
- # Data are expected to be in long-format (even the repeated measures).
399
- # If your data is in wide-format, you can use the pandas.melt() function
400
- # to convert from wide to long format.
401
-
402
- # Warning
403
- # If the between-subject groups are unbalanced(=unequal sample sizes), a
404
- # type II ANOVA will be computed. Note however that SPSS, JAMOVI and JASP
405
- # by default return a type III ANOVA, which may lead to slightly different
406
- # results.
407
- res_tab = pg.mixed_anova(data=data, dv=dv, within=within, subject=subject,
408
- between=between, correction=correction, effsize=effsize)
409
- res_tab = anovatable(res_tab)
410
- return res_tab
411
-
412
- def run_friedman(data, dv, factor, subject, method='chisq'):
413
- # Friedman test for repeated measurements
414
- # The Friedman test is used for non-parametric (rank-based) one-way
415
- # repeated measures ANOVA
416
-
417
- # check df form ('long' or 'wide')
418
- # df_long = data.melt(ignore_index=False).reset_index()
419
- # if data.describe().shape[1] >= df_long.describe().shape[1]:
420
- # res_tab = pg.friedman(data, method=method)
421
- # else:
422
- # res_tab = pg.friedman(data=df_long, dv='value',
423
- # within="variable", subject="index", method=method)
424
- if "Wide" in df_wide_long(data):
425
- df_long = data.melt(ignore_index=False).reset_index()
426
- res_tab = pg.friedman(data=df_long, dv='value',
427
- within="variable", subject="index", method=method)
428
- else:
429
- res_tab = pg.friedman(data, dv=dv, within=factor, subject=subject,method=method)
430
- res_tab = anovatable(res_tab)
431
- return res_tab
432
-
433
- def run_kruskal(data, dv, factor):
434
- # Kruskal-Wallis H-test for independent samples
435
- res_tab = pg.kruskal(data=data, dv=dv, between=factor)
436
- res_tab = anovatable(res_tab)
437
- return res_tab
438
-
439
- # Normality Check:
440
- # Conduct normality tests (Shapiro-Wilk) for each group.
441
- # If the data is approximately normally distributed, ANOVA is robust to
442
- # moderate departures from normality, especially with larger sample sizes.
443
-
444
- # print(data[factor])
445
- # print(type(data[factor]))
446
- # print(len(data[factor].columns))
447
- # print(data[factor].nunique())
448
- # print(data[factor[0]])
449
- # print(data[factor[0]].unique())
385
+
386
+ def str_mean_sem(data: list, delimit=5):
387
+ mean_ = np.nanmean(data)
388
+ sem_ = np.nanstd(data, ddof=1) / np.sqrt(sum(~np.isnan(data)))
389
+ return str(round(mean_, delimit)) + "±" + str(round(sem_, delimit))
390
+
391
+
392
+ def FuncMultiCmpt(
393
+ pmc="pmc",
394
+ pair="unpair",
395
+ data=None,
396
+ dv=None,
397
+ factor=None,
398
+ ss_type=2,
399
+ detailed=True,
400
+ effsize="np2",
401
+ correction="auto",
402
+ between=None,
403
+ within=None,
404
+ subject=None,
405
+ group=None,
406
+ verbose=True,
407
+ ):
450
408
  if group is None:
451
409
  group = factor
452
410
 
453
- # print(f'\ngroup is :\n{data[group]},\ndv is :\n{dv}\n')
454
411
  norm_array = []
455
- for sub_group in data[group].unique():
456
- norm_curr = check_normality(
457
- data.loc[data[group] == sub_group, dv])
458
- norm_array.append(norm_curr)
412
+ if len(group) > 1:
413
+ pass
414
+ else:
415
+ for sub_group in data[group].unique():
416
+ norm_curr = check_normality(data.loc[data[group] == sub_group, dv])
417
+ norm_array.append(norm_curr)
459
418
  norm_all = True if all(norm_array) else False
460
419
 
461
420
  # Homogeneity of Variances:
@@ -477,55 +436,74 @@ def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
477
436
  # # method2: pingouin.homoscedasticity
478
437
  # =============================================================================
479
438
  res_levene = None
480
- variance_all = pg.homoscedasticity(
481
- data, dv=dv, group=group, method='levene', alpha=0.05)
482
- res_levene = True if variance_all.iloc[0,1] > 0.05 else False
439
+ if len(group) > 1:
440
+ pass
441
+ else:
442
+ variance_all = pg.homoscedasticity(
443
+ data, dv=dv, group=group, method="levene", alpha=0.05
444
+ )
445
+ res_levene = True if variance_all.iloc[0, 1] > 0.05 else False
483
446
  # =============================================================================
484
447
  # # ANOVA Assumptions:
485
448
  # # Ensure that the assumptions of independence, homogeneity of variances, and
486
449
  # # normality are reasonably met before proceeding.
487
450
  # =============================================================================
488
- notes_norm = 'normally' if norm_all else 'NOT-normally'
489
- notes_variance = 'equal' if res_levene else 'unequal'
490
- print(f'Data is {notes_norm} distributed, shows {notes_variance} variance')
451
+ notes_norm = "normally" if norm_all else "NOT-normally"
452
+ notes_variance = "equal" if res_levene else "unequal"
453
+ print(f"Data is {notes_norm} distributed, shows {notes_variance} variance")
491
454
 
492
455
  cfg_pmc = corr_pmc(pmc)
493
456
  cfg_pair = corr_pair(pair)
494
457
  output = {}
495
- if (cfg_pmc == 'parametric') or (cfg_pmc == 'auto'):
496
- if 'np' in cfg_pair: # 'unpaired'
497
- if cfg_pmc == 'auto':
458
+ if (cfg_pmc == "parametric") or (cfg_pmc == "auto"):
459
+ if "np" in cfg_pair: # 'unpaired'
460
+ if cfg_pmc == "auto":
498
461
  if norm_all:
499
462
  if res_levene:
500
- res_tab = run_anova(data, dv, factor, ss_type=ss_type,
501
- detailed=True, effsize='np2')
502
- notes_stat = f'{data[factor].nunique()} Way ANOVA'
463
+ res_tab = run_anova(
464
+ data,
465
+ dv,
466
+ factor,
467
+ ss_type=ss_type,
468
+ detailed=True,
469
+ effsize="np2",
470
+ )
471
+ notes_stat = f"{data[factor].nunique()} Way ANOVA"
503
472
  notes_APA = extract_apa(res_tab)
504
473
 
505
474
  else:
506
475
  res_tab = run_welchanova(data, dv, factor)
507
- notes_stat = f'{data[factor].nunique()} Way Welch ANOVA'
476
+ notes_stat = f"{data[factor].nunique()} Way Welch ANOVA"
508
477
  notes_APA = extract_apa(res_tab)
509
-
510
478
  else:
511
-
512
479
  res_tab = run_kruskal(data, dv, factor)
513
- notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
480
+ notes_stat = (
481
+ f"Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA"
482
+ )
514
483
  notes_APA = extract_apa(res_tab)
515
484
 
516
- elif cfg_pmc == 'parametric':
517
- res_tab = run_anova(data, dv, factor, ss_type=ss_type,
518
- detailed=True, effsize='np2')
519
- notes_stat = f'{data[factor].nunique()} Way ANOVA'
485
+ elif cfg_pmc == "parametric":
486
+ res_tab = run_anova(
487
+ data, dv, factor, ss_type=ss_type, detailed=True, effsize="np2"
488
+ )
489
+ notes_stat = f"{data[factor].nunique()} Way ANOVA"
520
490
  notes_APA = extract_apa(res_tab)
521
491
 
522
- elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
523
- res_tab = run_rmanova(data, dv, factor, subject, correction='auto',
524
- detailed=True, effsize='ng2')
525
- notes_stat = f'{data[factor].nunique()} Way Repeated measures ANOVA'
492
+ elif "pa" in cfg_pair and "np" not in cfg_pair: # 'paired'
493
+ res_tab = run_rmanova(
494
+ data,
495
+ dv,
496
+ factor,
497
+ subject,
498
+ correction="auto",
499
+ detailed=True,
500
+ effsize="ng2",
501
+ )
502
+ notes_stat = f"{data[factor].nunique()} Way Repeated measures ANOVA"
526
503
  notes_APA = extract_apa(res_tab)
527
504
 
528
- elif 'mix' in cfg_pair or 'both' in cfg_pair:
505
+ elif "mix" in cfg_pair or "both" in cfg_pair:
506
+ print("mix")
529
507
  res_tab = run_mixedanova(data, dv, between, within, subject)
530
508
  # notes_stat = f'{len(sum(len(between)+sum(len(within))))} Way Mixed ANOVA'
531
509
  notes_stat = ""
@@ -533,15 +511,15 @@ def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
533
511
  # print(n_inter)
534
512
  notes_APA = extract_apa(res_tab)
535
513
 
536
- elif cfg_pmc == 'non-parametric':
537
- if 'np' in cfg_pair: # 'unpaired'
514
+ elif cfg_pmc == "non-parametric":
515
+ if "np" in cfg_pair: # 'unpaired'
538
516
  res_tab = run_kruskal(data, dv, factor)
539
- notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
517
+ notes_stat = f"Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA"
540
518
  notes_APA = f'H({res_tab.ddof1[0]},n={data.shape[0]})={round(res_tab.H[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
541
519
 
542
- elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
543
- res_tab = run_friedman(data, dv, factor, subject, method='chisq')
544
- notes_stat = f'Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA'
520
+ elif "pa" in cfg_pair and "np" not in cfg_pair: # 'paired'
521
+ res_tab = run_friedman(data, dv, factor, subject, method="chisq")
522
+ notes_stat = f"Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA"
545
523
  notes_APA = f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
546
524
 
547
525
  # =============================================================================
@@ -551,18 +529,13 @@ def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
551
529
  # Tukey's HSD, Bonferroni, or Scheffé) to identify which groups differ from each other.
552
530
  # # https://pingouin-stats.org/build/html/generated/pingouin.pairwise_tests.html
553
531
  # =============================================================================
554
- go_pmc = True if cfg_pmc == 'parametric' else False
555
- go_subject = subject if ('pa' in cfg_pair) and (
556
- 'np' not in cfg_pair) else None
557
- go_mix_between = between if ('mix' in cfg_pair) or (
558
- 'both' in cfg_pair) else None
559
- go_mix_between = None if ('pa' in cfg_pair) or (
560
- 'np' not in cfg_pair) else factor
561
- go_mix_within = within if ('mix' in cfg_pair) or (
562
- 'both' in cfg_pair) else None
563
- go_mix_within = factor if ('pa' in cfg_pair) or (
564
- 'np' not in cfg_pair) else None
565
- if res_tab['p-unc'][0] <= .05:
532
+ go_pmc = True if cfg_pmc == "parametric" else False
533
+ go_subject = subject if ("pa" in cfg_pair) and ("np" not in cfg_pair) else None
534
+ go_mix_between = between if ("mix" in cfg_pair) or ("both" in cfg_pair) else None
535
+ go_mix_between = None if ("pa" in cfg_pair) or ("np" not in cfg_pair) else factor
536
+ go_mix_within = within if ("mix" in cfg_pair) or ("both" in cfg_pair) else None
537
+ go_mix_within = factor if ("pa" in cfg_pair) or ("np" not in cfg_pair) else None
538
+ if res_tab["p-unc"][0] <= 0.05:
566
539
  # Pairwise Comparisons
567
540
  method_post_hoc = [
568
541
  "bonf", # 'bonferroni', # : one-step correction
@@ -571,32 +544,319 @@ def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
571
544
  "fdr_bh", # Benjamini/Hochberg (non-negative)
572
545
  "fdr_by", # Benjamini/Yekutieli (negative)
573
546
  ]
547
+ # *********? not work properly below*********
548
+ # res_posthoc = pd.DataFrame()
549
+
550
+ # for met in method_post_hoc:
551
+ # post_curr = pg.pairwise_tests(
552
+ # data=data,
553
+ # dv=dv,
554
+ # between=go_mix_between,
555
+ # within=go_mix_within,
556
+ # subject=go_subject,
557
+ # parametric=go_pmc,
558
+ # marginal=True,
559
+ # alpha=0.05,
560
+ # alternative="two-sided",
561
+ # padjust=met,
562
+ # nan_policy="listwise",#"pairwise"
563
+ # return_desc=True
564
+ # )
565
+ # res_posthoc = pd.concat([res_posthoc, post_curr], ignore_index=True)
566
+ # *********? not work properly above *********
567
+
568
+ # add ttest
569
+ data_within = df2array(data=data, x=factor, y=dv)
570
+ colname_within = data[factor].unique().tolist()
571
+ nrow, ncol = data_within.shape
574
572
  res_posthoc = pd.DataFrame()
575
- for met in method_post_hoc:
576
- post_curr = pg.pairwise_tests(data=data, dv=dv, between=go_mix_between, within=go_mix_within, subject=go_subject, parametric=go_pmc, marginal=True, alpha=0.05, alternative='two-sided',
577
- padjust=met)
578
- res_posthoc = pd.concat([res_posthoc, post_curr],
579
- ignore_index=True)
573
+ for icol in range(ncol):
574
+ for icol_ in range(1, ncol):
575
+ if icol_ > icol:
576
+ res_posthoc_ = pd.DataFrame()
577
+ _, res__ = FuncCmpt(
578
+ x1=data_within[:, icol],
579
+ x2=data_within[:, icol_],
580
+ pmc=pmc,
581
+ pair=pair,
582
+ verbose=False,
583
+ )
584
+ res_posthoc_["A"] = pd.Series(colname_within[icol])
585
+ res_posthoc_["B"] = pd.Series(colname_within[icol_])
586
+ res_posthoc_["mean(A)"] = pd.Series(
587
+ str_mean_sem(data_within[:, icol])
588
+ )
589
+ res_posthoc_["mean(B)"] = pd.Series(
590
+ str_mean_sem(data_within[:, icol_])
591
+ )
592
+ res_posthoc_["APA"] = pd.Series(res__["APA"])
593
+ res_posthoc_["p-unc"] = pd.Series(res__["pval"])
594
+ res_posthoc_["method"] = pd.Series(res__["method"])
595
+ res_posthoc = pd.concat(
596
+ [res_posthoc, res_posthoc_], ignore_index=True
597
+ )
580
598
  else:
581
599
  res_posthoc = None
582
- output['res_posthoc'] = res_posthoc
600
+ output["res_posthoc"] = res_posthoc
583
601
  # =============================================================================
584
602
  # # filling output
585
603
  # =============================================================================
586
604
 
587
- pd.set_option('display.max_columns', None)
588
- output['stat'] = notes_stat
605
+ pd.set_option("display.max_columns", None)
606
+ output["stat"] = notes_stat
589
607
  # print(output['APA'])
590
- output['APA'] = notes_APA
591
- output['pval'] = res_tab['p-unc']
592
- output['res_tab'] = res_tab
608
+ output["APA"] = notes_APA
609
+ output["pval"] = res_tab["p-unc"]
610
+ output["res_tab"] = res_tab
593
611
  if res_tab.shape[0] == len(notes_APA):
594
- output['res_tab']['APA'] = output['APA'] # note APA in the table
595
- # print(output['stat'])
596
- # print(output['res_tab'])
612
+ output["res_tab"]["APA"] = output["APA"] # note APA in the table
597
613
  return output
598
614
 
599
615
 
616
+ def display_output(output: dict):
617
+ if isinstance(output, pd.DataFrame):
618
+ output = output.to_dict(orient="list")
619
+ # ['res_posthoc', 'stat', 'APA', 'pval', 'res_tab']
620
+ # res_keys = list(output.keys())
621
+ # display(res_keys)
622
+ try:
623
+ print("APA:")
624
+ display(output["APA"])
625
+ except:
626
+ pass
627
+ try:
628
+ print("results table:")
629
+ display(output["res_tab"])
630
+ except:
631
+ pass
632
+ try:
633
+ print("posthoc:")
634
+ display(output["res_posthoc"])
635
+ except:
636
+ pass
637
+
638
+
639
+ def corr_pair(pair):
640
+ cfg_pair = None
641
+ if "pa" in pair.lower() and "np" not in pair.lower():
642
+ cfg_pair = "paired"
643
+ elif "np" in pair.lower():
644
+ cfg_pair = "unpaired"
645
+ elif "mix" in pair.lower():
646
+ cfg_pair = "mix"
647
+ return cfg_pair
648
+
649
+
650
+ def check_normality(data, verbose=True):
651
+ stat_shapiro, pval_shapiro = stats.shapiro(data)
652
+ if pval_shapiro > 0.05:
653
+ Normality = True
654
+ else:
655
+ Normality = False
656
+ if verbose:
657
+ (
658
+ print(f"\n normally distributed\n")
659
+ if Normality
660
+ else print(f"\n NOT normally distributed\n")
661
+ )
662
+ return Normality
663
+
664
+
665
+ def corr_pmc(pmc):
666
+ cfg_pmc = None
667
+ if pmc.lower() in {"pmc", "parametric"} and pmc.lower() not in {
668
+ "upmc",
669
+ "npmc",
670
+ "nonparametric",
671
+ "non-parametric",
672
+ }:
673
+ cfg_pmc = "parametric"
674
+ elif pmc.lower() in {
675
+ "upmc",
676
+ "npmc",
677
+ "nonparametric",
678
+ "non-parametric",
679
+ } and pmc.lower() not in {"pmc", "parametric"}:
680
+ cfg_pmc = "non-parametric"
681
+ elif pmc.lower() in {
682
+ "mix",
683
+ "both",
684
+ }:
685
+ cfg_pmc = "mix"
686
+ else:
687
+ cfg_pmc = "auto"
688
+ return cfg_pmc
689
+
690
+
691
+ def extract_apa(res_tab):
692
+ notes_APA = []
693
+ if "ddof1" in res_tab:
694
+ for irow in range(res_tab.shape[0]):
695
+ note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
696
+ notes_APA.append([note_tmp])
697
+ elif "DF" in res_tab:
698
+ for irow in range(res_tab.shape[0] - 1):
699
+ note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
700
+ notes_APA.append([note_tmp])
701
+ notes_APA.append(["NaN"])
702
+ elif "DF1" in res_tab: # in 'mix' case
703
+ for irow in range(res_tab.shape[0]):
704
+ note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF1[irow]),round(res_tab.DF2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
705
+ notes_APA.append([note_tmp])
706
+ return notes_APA
707
+
708
+
709
+ def anovatable(res_tab):
710
+ if "df" in res_tab: # statsmodels
711
+ res_tab["mean_sq"] = res_tab[:]["sum_sq"] / res_tab[:]["df"]
712
+ res_tab["est_sq"] = res_tab[:-1]["sum_sq"] / sum(res_tab["sum_sq"])
713
+ res_tab["omega_sq"] = (
714
+ res_tab[:-1]["sum_sq"] - (res_tab[:-1]["df"] * res_tab["mean_sq"][-1])
715
+ ) / (sum(res_tab["sum_sq"]) + res_tab["mean_sq"][-1])
716
+ elif "DF" in res_tab:
717
+ res_tab["MS"] = res_tab[:]["SS"] / res_tab[:]["DF"]
718
+ res_tab["est_sq"] = res_tab[:-1]["SS"] / sum(res_tab["SS"])
719
+ res_tab["omega_sq"] = (
720
+ res_tab[:-1]["SS"] - (res_tab[:-1]["DF"] * res_tab["MS"][1])
721
+ ) / (sum(res_tab["SS"]) + res_tab["MS"][1])
722
+ if "p-unc" in res_tab:
723
+ if "np2" in res_tab:
724
+ res_tab["est_sq"] = res_tab["np2"]
725
+ if "p-unc" in res_tab:
726
+ res_tab["PR(>F)"] = res_tab["p-unc"]
727
+ return res_tab
728
+
729
+
730
+ def run_anova(data, dv, factor, ss_type=2, detailed=True, effsize="np2"):
731
+ # perform ANOVA
732
+ # =============================================================================
733
+ # # # ANOVA (input: formula, dataset)
734
+ # =============================================================================
735
+ # # note: if the data is balanced (equal sample size for each group), Type 1, 2, and 3 sums of squares
736
+ # # (typ parameter) will produce similar results.
737
+ # lm = ols("values ~ C(group)", data=df).fit()
738
+ # res_tab = anova_lm(lm, typ=ss_type)
739
+
740
+ # # however, it does not provide any effect size measures to tell if the
741
+ # # statistical significance is meaningful. The function below calculates
742
+ # # eta-squared () and omega-squared (). A quick note, is the exact same
743
+ # # thing as except when coming from the ANOVA framework people call it ;
744
+ # # is considered a better measure of effect size since it is unbiased in
745
+ # # it's calculation by accounting for the degrees of freedom in the model.
746
+ # # note: No effect sizes are calculated when using statsmodels.
747
+ # # to calculate eta squared, use the sum of squares from the table
748
+ # res_tab = anovatable(res_tab)
749
+
750
+ # =============================================================================
751
+ # # alternativ for ANOVA
752
+ # =============================================================================
753
+ res_tab = pg.anova(
754
+ dv=dv,
755
+ between=factor,
756
+ data=data,
757
+ detailed=detailed,
758
+ ss_type=ss_type,
759
+ effsize=effsize,
760
+ )
761
+ res_tab = anovatable(res_tab)
762
+ return res_tab
763
+
764
+
765
+ def run_rmanova(
766
+ data, dv, factor, subject, correction="auto", detailed=True, effsize="ng2"
767
+ ):
768
+ # One-way repeated-measures ANOVA using a long-format dataset.
769
+ res_tab = pg.rm_anova(
770
+ data=data,
771
+ dv=dv,
772
+ within=factor,
773
+ subject=subject,
774
+ detailed=detailed,
775
+ effsize=effsize,
776
+ )
777
+ return res_tab
778
+
779
+
780
+ def run_welchanova(data, dv, factor):
781
+ # When the groups are balanced and have equal variances, the optimal
782
+ # post-hoc test is the Tukey-HSD test (pingouin.pairwise_tukey()). If the
783
+ # groups have unequal variances, the Games-Howell test is more adequate
784
+ # (pingouin.pairwise_gameshowell()). Results have been tested against R.
785
+ res_tab = pg.welch_anova(data=data, dv=dv, between=factor)
786
+ res_tab = anovatable(res_tab)
787
+ return res_tab
788
+
789
+
790
+ def run_mixedanova(
791
+ data, dv, between, within, subject, correction="auto", effsize="np2"
792
+ ):
793
+ # Notes
794
+ # Data are expected to be in long-format (even the repeated measures).
795
+ # If your data is in wide-format, you can use the pandas.melt() function
796
+ # to convert from wide to long format.
797
+
798
+ # Warning
799
+ # If the between-subject groups are unbalanced(=unequal sample sizes), a
800
+ # type II ANOVA will be computed. Note however that SPSS, JAMOVI and JASP
801
+ # by default return a type III ANOVA, which may lead to slightly different
802
+ # results.
803
+ res_tab = pg.mixed_anova(
804
+ data=data,
805
+ dv=dv,
806
+ within=within,
807
+ subject=subject,
808
+ between=between,
809
+ correction=correction,
810
+ effsize=effsize,
811
+ )
812
+ res_tab = anovatable(res_tab)
813
+ return res_tab
814
+
815
+
816
+ def run_friedman(data, dv, factor, subject, method="chisq"):
817
+ # Friedman test for repeated measurements
818
+ # The Friedman test is used for non-parametric (rank-based) one-way
819
+ # repeated measures ANOVA
820
+
821
+ # check df form ('long' or 'wide')
822
+ # df_long = data.melt(ignore_index=False).reset_index()
823
+ # if data.describe().shape[1] >= df_long.describe().shape[1]:
824
+ # res_tab = pg.friedman(data, method=method)
825
+ # else:
826
+ # res_tab = pg.friedman(data=df_long, dv='value',
827
+ # within="variable", subject="index", method=method)
828
+ if "Wide" in df_wide_long(data):
829
+ df_long = data.melt(ignore_index=False).reset_index()
830
+ res_tab = pg.friedman(
831
+ data=df_long,
832
+ dv="value",
833
+ within="variable",
834
+ subject="index",
835
+ method=method,
836
+ )
837
+ else:
838
+ res_tab = pg.friedman(
839
+ data, dv=dv, within=factor, subject=subject, method=method
840
+ )
841
+ res_tab = anovatable(res_tab)
842
+ return res_tab
843
+
844
+
845
+ def run_kruskal(data, dv, factor):
846
+ # Kruskal-Wallis H-test for independent samples
847
+ res_tab = pg.kruskal(data=data, dv=dv, between=factor)
848
+ res_tab = anovatable(res_tab)
849
+ return res_tab
850
+
851
+
852
+ def df_wide_long(df):
853
+ rows, columns = df.shape
854
+ if columns > rows:
855
+ return "Wide"
856
+ elif rows > columns:
857
+ return "Long"
858
+
859
+
600
860
  # =============================================================================
601
861
  # # One-way ANOVA
602
862
  # =============================================================================
@@ -807,4 +1067,138 @@ def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
807
1067
 
808
1068
  # =============================================================================
809
1069
  # # convert to list to string
810
- # =============================================================================
1070
+ # =============================================================================
1071
+
1072
+
1073
+ def sort_rows_move_nan(arr, sort=False):
1074
+ # Handle edge cases where all values are NaN
1075
+ if np.all(np.isnan(arr)):
1076
+ return arr # Return unchanged if the entire array is NaN
1077
+
1078
+ if sort:
1079
+ # Replace NaNs with a temporary large value for sorting
1080
+ temp_value = (
1081
+ np.nanmax(arr[np.isfinite(arr)]) + 1 if np.any(np.isfinite(arr)) else np.inf
1082
+ )
1083
+ arr_no_nan = np.where(np.isnan(arr), temp_value, arr)
1084
+
1085
+ # Sort each row
1086
+ sorted_arr = np.sort(arr_no_nan, axis=1)
1087
+
1088
+ # Move NaNs to the end
1089
+ result_arr = np.where(sorted_arr == temp_value, np.nan, sorted_arr)
1090
+ else:
1091
+ result_rows = []
1092
+ for row in arr:
1093
+ # Separate non-NaN and NaN values
1094
+ non_nan_values = row[~np.isnan(row)]
1095
+ nan_count = np.isnan(row).sum()
1096
+ # Create a new row with non-NaN values followed by NaNs
1097
+ new_row = np.concatenate([non_nan_values, [np.nan] * nan_count])
1098
+ result_rows.append(new_row)
1099
+ # Convert the list of rows back into a 2D NumPy array
1100
+ result_arr = np.array(result_rows)
1101
+
1102
+ # Remove rows/columns that contain only NaNs
1103
+ clean_arr = result_arr[~np.isnan(result_arr).all(axis=1)]
1104
+ clean_arr_ = clean_arr[:, ~np.isnan(clean_arr).all(axis=0)]
1105
+
1106
+ return clean_arr_
1107
+
1108
+
1109
+ def df2array(data: pd.DataFrame, x, y, hue=None, sort=False):
1110
+ if hue is None:
1111
+ a = []
1112
+ if sort:
1113
+ np.sort(data[x].unique().tolist()).tolist()
1114
+ else:
1115
+ cat_x = data[x].unique().tolist()
1116
+ for i, x_ in enumerate(cat_x):
1117
+ new_ = data.loc[data[x] == x_, y].to_list()
1118
+ a = padcat(a, new_, axis=0)
1119
+ return sort_rows_move_nan(a).T
1120
+ else:
1121
+ a = []
1122
+ if sort:
1123
+ cat_x = np.sort(data[x].unique().tolist()).tolist()
1124
+ cat_hue = np.sort(data[hue].unique().tolist()).tolist()
1125
+ else:
1126
+ cat_x = data[x].unique().tolist()
1127
+ cat_hue = data[hue].unique().tolist()
1128
+ for i, x_ in enumerate(cat_x):
1129
+ for j, hue_ in enumerate(cat_hue):
1130
+ new_ = data.loc[(data[x] == x_) & (data[hue] == hue_), y].to_list()
1131
+ a = padcat(a, new_, axis=0)
1132
+ return sort_rows_move_nan(a).T
1133
+
1134
+
1135
+ def padcat(*args, fill_value=np.nan, axis=1, order="row"):
1136
+ """
1137
+ Concatenate vectors with padding.
1138
+
1139
+ Parameters:
1140
+ *args : variable number of list or 1D arrays
1141
+ Input arrays to concatenate.
1142
+ fill_value : scalar, optional
1143
+ The value to use for padding the shorter lists (default is np.nan).
1144
+ axis : int, optional
1145
+ The axis along which to concatenate (0 for rows, 1 for columns, default is 1).
1146
+ order : str, optional
1147
+ The order for flattening when required: "row" or "column" (default is "row").
1148
+
1149
+ Returns:
1150
+ np.ndarray
1151
+ A 2D array with the input arrays concatenated along the specified axis,
1152
+ padded with fill_value where necessary.
1153
+ """
1154
+ # Set the order for processing
1155
+ if "ro" in order.lower():
1156
+ order = "C" # row-major order
1157
+ else:
1158
+ order = "F" # column-major order
1159
+
1160
+ # Process input arrays based on their dimensions
1161
+ processed_arrays = []
1162
+ for arg in args:
1163
+ arr = np.asarray(arg)
1164
+ if arr.ndim == 1:
1165
+ processed_arrays.append(arr) # Keep 1D arrays as is
1166
+ elif arr.ndim == 2:
1167
+ if axis == 0:
1168
+ # If concatenating along rows, split 2D arrays into 1D arrays row-wise
1169
+ processed_arrays.extend(arr)
1170
+ elif axis == 1:
1171
+ # If concatenating along columns, split 2D arrays into 1D arrays column-wise
1172
+ processed_arrays.extend(arr.T)
1173
+ else:
1174
+ raise ValueError("axis must be 0 or 1")
1175
+ else:
1176
+ raise ValueError("Input arrays must be 1D or 2D")
1177
+
1178
+ if axis == 0:
1179
+ # Concatenate along rows
1180
+ max_len = max(arr.size for arr in processed_arrays)
1181
+ result = np.full((len(processed_arrays), max_len), fill_value)
1182
+ for i, arr in enumerate(processed_arrays):
1183
+ result[i, : arr.size] = arr
1184
+ elif axis == 1:
1185
+ # Concatenate along columns
1186
+ max_len = max(arr.size for arr in processed_arrays)
1187
+ result = np.full((max_len, len(processed_arrays)), fill_value)
1188
+ for i, arr in enumerate(processed_arrays):
1189
+ result[: arr.size, i] = arr
1190
+ else:
1191
+ raise ValueError("axis must be 0 or 1")
1192
+
1193
+ return result
1194
+
1195
+
1196
+ # # Example usage:
1197
+ # a = [1, np.nan]
1198
+ # b = [1, 3, 4, np.nan, 2, np.nan]
1199
+ # c = [1, 2, 3, 4, 5, 6, 7, 8, 10]
1200
+ # d = padcat(a, b)
1201
+ # result1 = padcat(d, c)
1202
+ # result2 = padcat(a, b, c)
1203
+ # print("Result of padcat(d, c):\n", result1)
1204
+ # print("Result of padcat(a, b, c):\n", result2)