AutoStatLib 0.2.2__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AutoStatLib might be problematic. Click here for more details.
- AutoStatLib/AutoStatLib.py +27 -442
- AutoStatLib/_version.py +1 -1
- AutoStatLib/helpers.py +80 -0
- AutoStatLib/normality_tests.py +83 -0
- AutoStatLib/statistical_tests.py +184 -0
- AutoStatLib/text_formatting.py +106 -0
- {AutoStatLib-0.2.2.dist-info → autostatlib-0.2.6.dist-info}/METADATA +34 -26
- autostatlib-0.2.6.dist-info/RECORD +13 -0
- {AutoStatLib-0.2.2.dist-info → autostatlib-0.2.6.dist-info}/WHEEL +1 -1
- AutoStatLib-0.2.2.dist-info/RECORD +0 -9
- {AutoStatLib-0.2.2.dist-info → autostatlib-0.2.6.dist-info}/LICENSE +0 -0
- {AutoStatLib-0.2.2.dist-info → autostatlib-0.2.6.dist-info}/top_level.txt +0 -0
AutoStatLib/AutoStatLib.py
CHANGED
|
@@ -1,428 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare, shapiro, anderson, normaltest
|
|
1
|
+
from AutoStatLib.statistical_tests import StatisticalTests
|
|
2
|
+
from AutoStatLib.normality_tests import NormalityTests
|
|
3
|
+
from AutoStatLib.helpers import Helpers
|
|
4
|
+
from AutoStatLib.text_formatting import TextFormatting
|
|
6
5
|
|
|
7
6
|
|
|
8
|
-
class
|
|
9
|
-
'''
|
|
10
|
-
Statistical tests mixin
|
|
11
|
-
'''
|
|
12
|
-
|
|
13
|
-
def run_test_auto(self):
|
|
14
|
-
|
|
15
|
-
if self.n_groups == 1:
|
|
16
|
-
if self.parametric:
|
|
17
|
-
self.run_test_by_id('t_test_single_sample')
|
|
18
|
-
else:
|
|
19
|
-
self.run_test_by_id('wilcoxon_single_sample')
|
|
20
|
-
|
|
21
|
-
elif self.n_groups == 2:
|
|
22
|
-
if self.paired:
|
|
23
|
-
if self.parametric:
|
|
24
|
-
self.run_test_by_id('t_test_paired')
|
|
25
|
-
else:
|
|
26
|
-
self.run_test_by_id('wilcoxon')
|
|
27
|
-
else:
|
|
28
|
-
if self.parametric:
|
|
29
|
-
self.run_test_by_id('t_test_independent')
|
|
30
|
-
else:
|
|
31
|
-
self.run_test_by_id('mann_whitney')
|
|
32
|
-
|
|
33
|
-
elif self.n_groups >= 3:
|
|
34
|
-
if self.paired:
|
|
35
|
-
if self.parametric:
|
|
36
|
-
self.run_test_by_id('anova_1w_rm')
|
|
37
|
-
else:
|
|
38
|
-
self.run_test_by_id('friedman')
|
|
39
|
-
else:
|
|
40
|
-
if self.parametric:
|
|
41
|
-
self.run_test_by_id('anova_1w_ordinary')
|
|
42
|
-
else:
|
|
43
|
-
self.run_test_by_id('kruskal_wallis')
|
|
44
|
-
|
|
45
|
-
else:
|
|
46
|
-
pass
|
|
47
|
-
|
|
48
|
-
def run_test_by_id(self, test_id):
|
|
49
|
-
|
|
50
|
-
test_names_dict = {
|
|
51
|
-
'anova_1w_ordinary': 'Ordinary One-Way ANOVA',
|
|
52
|
-
'anova_1w_rm': 'Repeated Measures One-Way ANOVA',
|
|
53
|
-
'friedman': 'Friedman test',
|
|
54
|
-
'kruskal_wallis': 'Kruskal-Wallis test',
|
|
55
|
-
'mann_whitney': 'Mann-Whitney U test',
|
|
56
|
-
't_test_independent': 't-test for independent samples',
|
|
57
|
-
't_test_paired': 't-test for paired samples',
|
|
58
|
-
't_test_single_sample': 'Single-sample t-test',
|
|
59
|
-
'wilcoxon': 'Wilcoxon signed-rank test',
|
|
60
|
-
'wilcoxon_single_sample': 'Wilcoxon signed-rank test for single sample',
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
match test_id:
|
|
64
|
-
case 'anova_1w_ordinary': stat, p_value = self.anova_1w_ordinary()
|
|
65
|
-
case 'anova_1w_rm': stat, p_value = self.anova_1w_rm()
|
|
66
|
-
case 'friedman': stat, p_value = self.friedman()
|
|
67
|
-
case 'kruskal_wallis': stat, p_value = self.kruskal_wallis()
|
|
68
|
-
case 'mann_whitney': stat, p_value = self.mann_whitney()
|
|
69
|
-
case 't_test_independent': stat, p_value = self.t_test_independent()
|
|
70
|
-
case 't_test_paired': stat, p_value = self.t_test_paired()
|
|
71
|
-
case 't_test_single_sample': stat, p_value = self.t_test_single_sample()
|
|
72
|
-
case 'wilcoxon': stat, p_value = self.wilcoxon()
|
|
73
|
-
case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
|
|
74
|
-
|
|
75
|
-
if test_id in self.test_ids_dependent:
|
|
76
|
-
self.paired = True
|
|
77
|
-
else:
|
|
78
|
-
self.paired = False
|
|
79
|
-
|
|
80
|
-
self.test_name = test_names_dict[test_id]
|
|
81
|
-
self.test_id = test_id
|
|
82
|
-
self.test_stat = stat
|
|
83
|
-
self.p_value = p_value
|
|
84
|
-
|
|
85
|
-
def anova_1w_ordinary(self):
|
|
86
|
-
stat, p_value = f_oneway(*self.data)
|
|
87
|
-
self.tails = 2
|
|
88
|
-
# if self.tails == 1 and p_value > 0.5:
|
|
89
|
-
# p_value /= 2
|
|
90
|
-
# if self.tails == 1:
|
|
91
|
-
# p_value /= 2
|
|
92
|
-
return stat, p_value
|
|
93
|
-
|
|
94
|
-
def anova_1w_rm(self):
|
|
95
|
-
"""
|
|
96
|
-
Perform repeated measures one-way ANOVA test.
|
|
97
|
-
|
|
98
|
-
Parameters:
|
|
99
|
-
data: list of lists, where each sublist represents repeated measures for a subject
|
|
100
|
-
"""
|
|
101
|
-
|
|
102
|
-
df = self.matrix_to_dataframe(self.data)
|
|
103
|
-
res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
|
|
104
|
-
stat = res.anova_table['F Value'][0]
|
|
105
|
-
p_value = res.anova_table['Pr > F'][0]
|
|
106
|
-
|
|
107
|
-
self.tails = 2
|
|
108
|
-
return stat, p_value
|
|
109
|
-
|
|
110
|
-
def friedman(self):
|
|
111
|
-
stat, p_value = friedmanchisquare(*self.data)
|
|
112
|
-
self.tails = 2
|
|
113
|
-
return stat, p_value
|
|
114
|
-
|
|
115
|
-
def kruskal_wallis(self):
|
|
116
|
-
stat, p_value = kruskal(*self.data)
|
|
117
|
-
return stat, p_value
|
|
118
|
-
|
|
119
|
-
def mann_whitney(self):
|
|
120
|
-
stat, p_value = mannwhitneyu(
|
|
121
|
-
self.data[0], self.data[1], alternative='two-sided')
|
|
122
|
-
if self.tails == 1:
|
|
123
|
-
p_value /= 2
|
|
124
|
-
# alternative method of one-tailed calculation
|
|
125
|
-
# gives the same result:
|
|
126
|
-
# stat, p_value = mannwhitneyu(
|
|
127
|
-
# self.data[0], self.data[1], alternative='two-sided' if self.tails == 2 else 'less')
|
|
128
|
-
# if self.tails == 1 and p_value > 0.5:
|
|
129
|
-
# p_value = 1-p_value
|
|
130
|
-
return stat, p_value
|
|
131
|
-
|
|
132
|
-
def t_test_independent(self):
|
|
133
|
-
stat, p_value = ttest_ind(
|
|
134
|
-
self.data[0], self.data[1])
|
|
135
|
-
if self.tails == 1:
|
|
136
|
-
p_value /= 2
|
|
137
|
-
return stat, p_value
|
|
138
|
-
|
|
139
|
-
def t_test_paired(self):
|
|
140
|
-
stat, p_value = ttest_rel(
|
|
141
|
-
self.data[0], self.data[1])
|
|
142
|
-
if self.tails == 1:
|
|
143
|
-
p_value /= 2
|
|
144
|
-
return stat, p_value
|
|
145
|
-
|
|
146
|
-
def t_test_single_sample(self):
|
|
147
|
-
if self.popmean == None:
|
|
148
|
-
self.popmean = 0
|
|
149
|
-
self.AddWarning('no_pop_mean_set')
|
|
150
|
-
stat, p_value = ttest_1samp(self.data[0], self.popmean)
|
|
151
|
-
if self.tails == 1:
|
|
152
|
-
p_value /= 2
|
|
153
|
-
return stat, p_value
|
|
154
|
-
|
|
155
|
-
def wilcoxon(self):
|
|
156
|
-
stat, p_value = wilcoxon(self.data[0], self.data[1])
|
|
157
|
-
if self.tails == 1:
|
|
158
|
-
p_value /= 2
|
|
159
|
-
return stat, p_value
|
|
160
|
-
|
|
161
|
-
def wilcoxon_single_sample(self):
|
|
162
|
-
if self.popmean == None:
|
|
163
|
-
self.popmean = 0
|
|
164
|
-
self.AddWarning('no_pop_mean_set')
|
|
165
|
-
data = [i - self.popmean for i in self.data[0]]
|
|
166
|
-
stat, p_value = wilcoxon(data)
|
|
167
|
-
if self.tails == 1:
|
|
168
|
-
p_value /= 2
|
|
169
|
-
return stat, p_value
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
class __NormalityTests():
|
|
173
|
-
'''
|
|
174
|
-
Normality tests mixin
|
|
175
|
-
|
|
176
|
-
see the article about minimal sample size for tests:
|
|
177
|
-
Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
|
|
178
|
-
Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
|
|
179
|
-
'''
|
|
180
|
-
|
|
181
|
-
def check_normality(self, data):
|
|
182
|
-
sw = None
|
|
183
|
-
lf = None
|
|
184
|
-
ad = None
|
|
185
|
-
ap = None
|
|
186
|
-
n = len(data)
|
|
187
|
-
|
|
188
|
-
# Shapiro-Wilk test
|
|
189
|
-
sw_stat, sw_p_value = shapiro(data)
|
|
190
|
-
if sw_p_value > 0.05:
|
|
191
|
-
sw = True
|
|
192
|
-
else:
|
|
193
|
-
sw = False
|
|
194
|
-
|
|
195
|
-
# Lilliefors test
|
|
196
|
-
lf_stat, lf_p_value = lilliefors(
|
|
197
|
-
data, dist='norm')
|
|
198
|
-
if lf_p_value > 0.05:
|
|
199
|
-
lf = True
|
|
200
|
-
else:
|
|
201
|
-
lf = False
|
|
202
|
-
|
|
203
|
-
# Anderson-Darling test
|
|
204
|
-
if n >= 20:
|
|
205
|
-
ad_stat, ad_p_value = self.anderson_get_p(
|
|
206
|
-
data, dist='norm')
|
|
207
|
-
if ad_p_value > 0.05:
|
|
208
|
-
ad = True
|
|
209
|
-
else:
|
|
210
|
-
ad = False
|
|
211
|
-
|
|
212
|
-
# D'Agostino-Pearson test
|
|
213
|
-
# test result is skewed if n<20
|
|
214
|
-
if n >= 20:
|
|
215
|
-
ap_stat, ap_p_value = normaltest(data)
|
|
216
|
-
if ap_p_value > 0.05:
|
|
217
|
-
ap = True
|
|
218
|
-
else:
|
|
219
|
-
ap = False
|
|
220
|
-
|
|
221
|
-
# print(ap_p_value, ad_p_value, sw_p_value, lf_p_value)
|
|
222
|
-
|
|
223
|
-
return (sw, lf, ad, ap)
|
|
224
|
-
|
|
225
|
-
def anderson_get_p(self, data, dist='norm'):
|
|
226
|
-
'''
|
|
227
|
-
calculating p-value for Anderson-Darling test using the method described here:
|
|
228
|
-
Computation of Probability Associated with Anderson-Darling Statistic
|
|
229
|
-
Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
|
|
230
|
-
|
|
231
|
-
'''
|
|
232
|
-
e = 2.718281828459045
|
|
233
|
-
n = len(data)
|
|
234
|
-
|
|
235
|
-
ad, critical_values, significance_levels = anderson(
|
|
236
|
-
data, dist=dist)
|
|
237
|
-
|
|
238
|
-
# adjust ad_stat for small sample sizes:
|
|
239
|
-
s = ad*(1 + 0.75/n + 2.25/(n**2))
|
|
240
|
-
|
|
241
|
-
if s >= 0.6:
|
|
242
|
-
p = e**(1.2937 - 5.709*s + 0.0186*s**2)
|
|
243
|
-
elif s > 0.34:
|
|
244
|
-
p = e**(0.9177 - 4.279*s - 1.38*s**2)
|
|
245
|
-
elif s > 0.2:
|
|
246
|
-
p = 1 - e**(-8.318 + 42.796*s - 59.938*s**2)
|
|
247
|
-
elif s <= 0.2:
|
|
248
|
-
p = 1 - e**(-13.436 + 101.14*s - 223.73*s**2)
|
|
249
|
-
else:
|
|
250
|
-
p = None
|
|
251
|
-
|
|
252
|
-
return ad, p
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
class __Helpers():
|
|
256
|
-
|
|
257
|
-
def matrix_to_dataframe(self, matrix):
|
|
258
|
-
data = []
|
|
259
|
-
cols = []
|
|
260
|
-
rows = []
|
|
261
|
-
|
|
262
|
-
order_number = 1
|
|
263
|
-
for i, row in enumerate(matrix):
|
|
264
|
-
for j, value in enumerate(row):
|
|
265
|
-
data.append(value)
|
|
266
|
-
cols.append(i)
|
|
267
|
-
rows.append(j)
|
|
268
|
-
order_number += 1
|
|
269
|
-
|
|
270
|
-
df = pd.DataFrame(
|
|
271
|
-
{'Row': rows, 'Col': cols, 'Value': data})
|
|
272
|
-
return df
|
|
273
|
-
|
|
274
|
-
def create_results_dict(self) -> dict:
|
|
275
|
-
|
|
276
|
-
self.stars_int = self.make_stars()
|
|
277
|
-
self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
|
|
278
|
-
|
|
279
|
-
return {
|
|
280
|
-
'p-value': self.make_p_value_printed(),
|
|
281
|
-
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
282
|
-
'Stars_Printed': self.stars_str,
|
|
283
|
-
'Test_Name': self.test_name,
|
|
284
|
-
'Groups_Compared': self.n_groups,
|
|
285
|
-
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
286
|
-
'Data_Normaly_Distributed': self.parametric,
|
|
287
|
-
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
288
|
-
'Paired_Test_Applied': self.paired,
|
|
289
|
-
'Tails': self.tails,
|
|
290
|
-
'p-value_exact': self.p_value.item(),
|
|
291
|
-
'Stars': self.stars_int,
|
|
292
|
-
# 'Stat_Value': self.test_stat.item(),
|
|
293
|
-
'Warnings': self.warnings,
|
|
294
|
-
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
295
|
-
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
296
|
-
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
297
|
-
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
298
|
-
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
|
|
299
|
-
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
300
|
-
'Samples': self.data,
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
def log(self, *args, **kwargs):
|
|
304
|
-
message = ' '.join(map(str, args))
|
|
305
|
-
# print(message, **kwargs)
|
|
306
|
-
self.summary += '\n' + message
|
|
307
|
-
|
|
308
|
-
def AddWarning(self, warning_id):
|
|
309
|
-
message = self.warning_ids_all[warning_id]
|
|
310
|
-
self.log(message)
|
|
311
|
-
self.warnings.append(message)
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
class __TextFormatting():
|
|
315
|
-
'''
|
|
316
|
-
Text formatting mixin
|
|
317
|
-
'''
|
|
318
|
-
|
|
319
|
-
def autospace(self, elements_list, space, delimiter=' ') -> str:
|
|
320
|
-
output = ''
|
|
321
|
-
for i, element in enumerate(elements_list):
|
|
322
|
-
if i == len(elements_list):
|
|
323
|
-
output += element
|
|
324
|
-
else:
|
|
325
|
-
output += element + (space-len(element))*delimiter
|
|
326
|
-
return output
|
|
327
|
-
|
|
328
|
-
def print_groups(self, space=24, max_length=15):
|
|
329
|
-
self.log('')
|
|
330
|
-
# Get the number of groups (rows) and the maximum length of rows
|
|
331
|
-
data = self.data
|
|
332
|
-
num_groups = len(data)
|
|
333
|
-
group_longest = max(len(row) for row in data)
|
|
334
|
-
|
|
335
|
-
# Print the header
|
|
336
|
-
header = [f'Group {i+1}' for i in range(num_groups)]
|
|
337
|
-
line = [''*7]
|
|
338
|
-
self.log(self.autospace(header, space))
|
|
339
|
-
self.log(self.autospace(line, space))
|
|
340
|
-
|
|
341
|
-
# Print each column with a placeholder if longer than max_length
|
|
342
|
-
for i in range(group_longest):
|
|
343
|
-
row_values = []
|
|
344
|
-
all_values_empty = True
|
|
345
|
-
for row in data:
|
|
346
|
-
if len(row) > max_length:
|
|
347
|
-
if i < max_length:
|
|
348
|
-
row_values.append(str(row[i]))
|
|
349
|
-
all_values_empty = False
|
|
350
|
-
elif i == max_length:
|
|
351
|
-
row_values.append(f'[{len(row) - max_length} more]')
|
|
352
|
-
all_values_empty = False
|
|
353
|
-
else:
|
|
354
|
-
continue
|
|
355
|
-
else:
|
|
356
|
-
if i < len(row):
|
|
357
|
-
row_values.append(str(row[i]))
|
|
358
|
-
all_values_empty = False
|
|
359
|
-
else:
|
|
360
|
-
row_values.append('')
|
|
361
|
-
if all_values_empty:
|
|
362
|
-
break
|
|
363
|
-
self.log(self.autospace(row_values, space))
|
|
364
|
-
|
|
365
|
-
def make_stars(self) -> int:
|
|
366
|
-
p = self.p_value.item()
|
|
367
|
-
if p is not None:
|
|
368
|
-
if p < 0.0001:
|
|
369
|
-
return 4
|
|
370
|
-
if p < 0.001:
|
|
371
|
-
return 3
|
|
372
|
-
elif p < 0.01:
|
|
373
|
-
return 2
|
|
374
|
-
elif p < 0.05:
|
|
375
|
-
return 1
|
|
376
|
-
else:
|
|
377
|
-
return 0
|
|
378
|
-
return 0
|
|
379
|
-
|
|
380
|
-
def make_p_value_printed(self) -> str:
|
|
381
|
-
p = self.p_value.item()
|
|
382
|
-
if p is not None:
|
|
383
|
-
if p > 0.99:
|
|
384
|
-
return 'p>0.99'
|
|
385
|
-
elif p >= 0.01:
|
|
386
|
-
return f'p={p:.2g}'
|
|
387
|
-
elif p >= 0.001:
|
|
388
|
-
return f'p={p:.2g}'
|
|
389
|
-
elif p >= 0.0001:
|
|
390
|
-
return f'p={p:.1g}'
|
|
391
|
-
elif p < 0.0001:
|
|
392
|
-
return 'p<0.0001'
|
|
393
|
-
else:
|
|
394
|
-
return 'N/A'
|
|
395
|
-
return 'N/A'
|
|
396
|
-
|
|
397
|
-
def print_results(self):
|
|
398
|
-
self.log('\n\nResults: \n')
|
|
399
|
-
for i in self.results:
|
|
400
|
-
shift = 27 - len(i)
|
|
401
|
-
if i == 'Warnings':
|
|
402
|
-
self.log(i, ':', ' ' * shift, len(self.results[i]))
|
|
403
|
-
elif i == 'Samples':
|
|
404
|
-
pass
|
|
405
|
-
else:
|
|
406
|
-
self.log(i, ':', ' ' * shift, self.results[i])
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
class __InputFormatting():
|
|
410
|
-
def floatify_recursive(self, data):
|
|
411
|
-
if isinstance(data, list):
|
|
412
|
-
# Recursively process sublists and filter out None values
|
|
413
|
-
processed_list = [self.floatify_recursive(item) for item in data]
|
|
414
|
-
return [item for item in processed_list if item is not None]
|
|
415
|
-
else:
|
|
416
|
-
try:
|
|
417
|
-
# Try to convert the item to float
|
|
418
|
-
return np.float64(data)
|
|
419
|
-
except (ValueError, TypeError):
|
|
420
|
-
# If conversion fails, replace with None
|
|
421
|
-
self.warning_flag_non_numeric_data = True
|
|
422
|
-
return None
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting, __Helpers):
|
|
7
|
+
class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Helpers):
|
|
426
8
|
'''
|
|
427
9
|
The main class
|
|
428
10
|
*documentation placeholder*
|
|
@@ -434,6 +16,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
434
16
|
paired=False,
|
|
435
17
|
tails=2,
|
|
436
18
|
popmean=None,
|
|
19
|
+
posthoc=True,
|
|
437
20
|
verbose=True):
|
|
438
21
|
self.results = None
|
|
439
22
|
self.error = False
|
|
@@ -441,6 +24,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
441
24
|
self.paired = paired
|
|
442
25
|
self.tails = tails
|
|
443
26
|
self.popmean = popmean
|
|
27
|
+
self.posthoc = posthoc
|
|
444
28
|
self.verbose = verbose
|
|
445
29
|
self.n_groups = len(self.groups_list)
|
|
446
30
|
self.warning_flag_non_numeric_data = False
|
|
@@ -495,7 +79,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
495
79
|
'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
|
|
496
80
|
}
|
|
497
81
|
|
|
498
|
-
def
|
|
82
|
+
def run_test(self, test='auto'):
|
|
499
83
|
|
|
500
84
|
# reset values from previous tests
|
|
501
85
|
self.results = None
|
|
@@ -506,9 +90,12 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
506
90
|
self.test_id = None
|
|
507
91
|
self.test_stat = None
|
|
508
92
|
self.p_value = None
|
|
93
|
+
self.posthoc_matrix_df = None
|
|
94
|
+
self.posthoc_matrix = []
|
|
95
|
+
self.posthoc_name = None
|
|
509
96
|
|
|
510
97
|
self.log('\n' + '-'*67)
|
|
511
|
-
self.log('Statistical analysis
|
|
98
|
+
self.log('Statistical analysis __init__iated for data in {} groups\n'.format(
|
|
512
99
|
len(self.groups_list)))
|
|
513
100
|
|
|
514
101
|
# adjusting input data type
|
|
@@ -588,7 +175,6 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
588
175
|
else:
|
|
589
176
|
self.run_test_auto()
|
|
590
177
|
|
|
591
|
-
|
|
592
178
|
# print the results
|
|
593
179
|
self.results = self.create_results_dict()
|
|
594
180
|
self.print_results()
|
|
@@ -600,49 +186,48 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
600
186
|
if self.verbose == True:
|
|
601
187
|
print(self.summary)
|
|
602
188
|
|
|
603
|
-
|
|
604
|
-
|
|
605
189
|
# public methods:
|
|
190
|
+
|
|
606
191
|
def RunAuto(self):
|
|
607
|
-
self.
|
|
192
|
+
self.run_test(test='auto')
|
|
608
193
|
|
|
609
194
|
def RunManual(self, test):
|
|
610
|
-
self.
|
|
195
|
+
self.run_test(test)
|
|
611
196
|
|
|
612
197
|
def RunOnewayAnova(self):
|
|
613
|
-
self.
|
|
198
|
+
self.run_test(test='anova_1w_ordinary')
|
|
614
199
|
|
|
615
200
|
def RunOnewayAnovaRM(self):
|
|
616
|
-
self.
|
|
201
|
+
self.run_test(test='anova_1w_rm')
|
|
617
202
|
|
|
618
203
|
def RunFriedman(self):
|
|
619
|
-
self.
|
|
204
|
+
self.run_test(test='friedman')
|
|
620
205
|
|
|
621
206
|
def RunKruskalWallis(self):
|
|
622
|
-
self.
|
|
207
|
+
self.run_test(test='kruskal_wallis')
|
|
623
208
|
|
|
624
209
|
def RunMannWhitney(self):
|
|
625
|
-
self.
|
|
210
|
+
self.run_test(test='mann_whitney')
|
|
626
211
|
|
|
627
212
|
def RunTtest(self):
|
|
628
|
-
self.
|
|
213
|
+
self.run_test(test='t_test_independent')
|
|
629
214
|
|
|
630
215
|
def RunTtestPaired(self):
|
|
631
|
-
self.
|
|
216
|
+
self.run_test(test='t_test_paired')
|
|
632
217
|
|
|
633
218
|
def RunTtestSingleSample(self):
|
|
634
|
-
self.
|
|
219
|
+
self.run_test(test='t_test_single_sample')
|
|
635
220
|
|
|
636
221
|
def RunWilcoxonSingleSample(self):
|
|
637
|
-
self.
|
|
222
|
+
self.run_test(test='wilcoxon_single_sample')
|
|
638
223
|
|
|
639
224
|
def RunWilcoxon(self):
|
|
640
|
-
self.
|
|
225
|
+
self.run_test(test='wilcoxon')
|
|
641
226
|
|
|
642
227
|
def GetResult(self):
|
|
643
228
|
if not self.results and not self.error:
|
|
644
229
|
print('No test chosen, no results to output')
|
|
645
|
-
# self.
|
|
230
|
+
# self.run_test(test='auto')
|
|
646
231
|
return self.results
|
|
647
232
|
if not self.results and self.error:
|
|
648
233
|
print('Error occured, no results to output')
|
|
@@ -653,7 +238,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
653
238
|
def GetSummary(self):
|
|
654
239
|
if not self.results and not self.error:
|
|
655
240
|
print('No test chosen, no summary to output')
|
|
656
|
-
# self.
|
|
241
|
+
# self.run_test(test='auto')
|
|
657
242
|
return self.summary
|
|
658
243
|
else:
|
|
659
244
|
return self.summary
|
AutoStatLib/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.2.
|
|
2
|
+
__version__ = "0.2.6"
|
AutoStatLib/helpers.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Helpers():
|
|
6
|
+
|
|
7
|
+
def matrix_to_dataframe(self, matrix):
|
|
8
|
+
data = []
|
|
9
|
+
cols = []
|
|
10
|
+
rows = []
|
|
11
|
+
|
|
12
|
+
order_number = 1
|
|
13
|
+
for i, row in enumerate(matrix):
|
|
14
|
+
for j, value in enumerate(row):
|
|
15
|
+
data.append(value)
|
|
16
|
+
cols.append(i)
|
|
17
|
+
rows.append(j)
|
|
18
|
+
order_number += 1
|
|
19
|
+
|
|
20
|
+
df = pd.DataFrame(
|
|
21
|
+
{'Row': rows, 'Col': cols, 'Value': data})
|
|
22
|
+
return df
|
|
23
|
+
|
|
24
|
+
def floatify_recursive(self, data):
|
|
25
|
+
if isinstance(data, list):
|
|
26
|
+
# Recursively process sublists and filter out None values
|
|
27
|
+
processed_list = [self.floatify_recursive(item) for item in data]
|
|
28
|
+
return [item for item in processed_list if item is not None]
|
|
29
|
+
else:
|
|
30
|
+
try:
|
|
31
|
+
# Try to convert the item to float
|
|
32
|
+
return np.float64(data)
|
|
33
|
+
except (ValueError, TypeError):
|
|
34
|
+
# If conversion fails, replace with None
|
|
35
|
+
self.warning_flag_non_numeric_data = True
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
def create_results_dict(self) -> dict:
|
|
39
|
+
|
|
40
|
+
self.stars_int = self.make_stars(self.p_value.item())
|
|
41
|
+
self.stars_str = self.make_stars_printed(self.stars_int)
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
'p-value': self.make_p_value_printed(self.p_value.item()),
|
|
45
|
+
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
46
|
+
'Stars_Printed': self.stars_str,
|
|
47
|
+
'Test_Name': self.test_name,
|
|
48
|
+
'Groups_Compared': self.n_groups,
|
|
49
|
+
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
50
|
+
'Data_Normaly_Distributed': self.parametric,
|
|
51
|
+
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
52
|
+
'Paired_Test_Applied': self.paired,
|
|
53
|
+
'Tails': self.tails,
|
|
54
|
+
'p-value_exact': self.p_value.item(),
|
|
55
|
+
'Stars': self.stars_int,
|
|
56
|
+
# 'Stat_Value': self.test_stat.item(),
|
|
57
|
+
'Warnings': self.warnings,
|
|
58
|
+
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
59
|
+
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
60
|
+
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
61
|
+
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
62
|
+
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
|
|
63
|
+
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
64
|
+
'Samples': self.data,
|
|
65
|
+
'Posthoc_Tests_Name': self.posthoc_name if self.posthoc_name is not None else '',
|
|
66
|
+
'Posthoc_Matrix': self.posthoc_matrix if self.posthoc_matrix else [],
|
|
67
|
+
'Posthoc_Matrix_bool': [[bool(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
|
|
68
|
+
'Posthoc_Matrix_printed': [[self.make_p_value_printed(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
|
|
69
|
+
'Posthoc_Matrix_stars': [[self.make_stars_printed(self.make_stars(element)) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
def log(self, *args, **kwargs):
|
|
73
|
+
message = ' '.join(map(str, args))
|
|
74
|
+
# print(message, **kwargs)
|
|
75
|
+
self.summary += '\n' + message
|
|
76
|
+
|
|
77
|
+
def AddWarning(self, warning_id):
|
|
78
|
+
message = self.warning_ids_all[warning_id]
|
|
79
|
+
self.log(message)
|
|
80
|
+
self.warnings.append(message)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from statsmodels.stats.diagnostic import lilliefors
|
|
2
|
+
from scipy.stats import shapiro, normaltest, anderson
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class NormalityTests():
|
|
6
|
+
'''
|
|
7
|
+
Normality tests mixin
|
|
8
|
+
|
|
9
|
+
see the article about minimal sample size for tests:
|
|
10
|
+
Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
|
|
11
|
+
Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
def check_normality(self, data):
|
|
15
|
+
sw = None
|
|
16
|
+
lf = None
|
|
17
|
+
ad = None
|
|
18
|
+
ap = None
|
|
19
|
+
n = len(data)
|
|
20
|
+
|
|
21
|
+
# Shapiro-Wilk test
|
|
22
|
+
sw_stat, sw_p_value = shapiro(data)
|
|
23
|
+
if sw_p_value and sw_p_value > 0.05:
|
|
24
|
+
sw = True
|
|
25
|
+
else:
|
|
26
|
+
sw = False
|
|
27
|
+
|
|
28
|
+
# Lilliefors test
|
|
29
|
+
lf_stat, lf_p_value = lilliefors(
|
|
30
|
+
data, dist='norm')
|
|
31
|
+
if lf_p_value and lf_p_value > 0.05:
|
|
32
|
+
lf = True
|
|
33
|
+
else:
|
|
34
|
+
lf = False
|
|
35
|
+
|
|
36
|
+
# Anderson-Darling test
|
|
37
|
+
if n >= 20:
|
|
38
|
+
ad_stat, ad_p_value = self.anderson_get_p(
|
|
39
|
+
data, dist='norm')
|
|
40
|
+
if ad_p_value and ad_p_value > 0.05:
|
|
41
|
+
ad = True
|
|
42
|
+
else:
|
|
43
|
+
ad = False
|
|
44
|
+
|
|
45
|
+
# D'Agostino-Pearson test
|
|
46
|
+
# test result is skewed if n<20
|
|
47
|
+
if n >= 20:
|
|
48
|
+
ap_stat, ap_p_value = normaltest(data)
|
|
49
|
+
if ap_p_value and ap_p_value > 0.05:
|
|
50
|
+
ap = True
|
|
51
|
+
else:
|
|
52
|
+
ap = False
|
|
53
|
+
|
|
54
|
+
return (sw, lf, ad, ap)
|
|
55
|
+
|
|
56
|
+
def anderson_get_p(self, data, dist='norm'):
|
|
57
|
+
'''
|
|
58
|
+
calculating p-value for Anderson-Darling test using the method described here:
|
|
59
|
+
Computation of Probability Associated with Anderson-Darling Statistic
|
|
60
|
+
Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
|
|
61
|
+
|
|
62
|
+
'''
|
|
63
|
+
e = 2.718281828459045
|
|
64
|
+
n = len(data)
|
|
65
|
+
|
|
66
|
+
ad, critical_values, significance_levels = anderson(
|
|
67
|
+
data, dist=dist)
|
|
68
|
+
|
|
69
|
+
# adjust ad_stat for small sample sizes:
|
|
70
|
+
s = ad*(1 + 0.75/n + 2.25/(n**2))
|
|
71
|
+
|
|
72
|
+
if s >= 0.6:
|
|
73
|
+
p = e**(1.2937 - 5.709*s + 0.0186*s**2)
|
|
74
|
+
elif s > 0.34:
|
|
75
|
+
p = e**(0.9177 - 4.279*s - 1.38*s**2)
|
|
76
|
+
elif s > 0.2:
|
|
77
|
+
p = 1 - e**(-8.318 + 42.796*s - 59.938*s**2)
|
|
78
|
+
elif s <= 0.2:
|
|
79
|
+
p = 1 - e**(-13.436 + 101.14*s - 223.73*s**2)
|
|
80
|
+
else:
|
|
81
|
+
p = None
|
|
82
|
+
|
|
83
|
+
return ad, p
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import scikit_posthocs as sp
|
|
3
|
+
from statsmodels.stats.anova import AnovaRM
|
|
4
|
+
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
|
5
|
+
from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StatisticalTests():
|
|
9
|
+
'''
|
|
10
|
+
Statistical tests mixin
|
|
11
|
+
'''
|
|
12
|
+
|
|
13
|
+
def run_test_auto(self):
|
|
14
|
+
|
|
15
|
+
if self.n_groups == 1:
|
|
16
|
+
if self.parametric:
|
|
17
|
+
self.run_test_by_id('t_test_single_sample')
|
|
18
|
+
else:
|
|
19
|
+
self.run_test_by_id('wilcoxon_single_sample')
|
|
20
|
+
|
|
21
|
+
elif self.n_groups == 2:
|
|
22
|
+
if self.paired:
|
|
23
|
+
if self.parametric:
|
|
24
|
+
self.run_test_by_id('t_test_paired')
|
|
25
|
+
else:
|
|
26
|
+
self.run_test_by_id('wilcoxon')
|
|
27
|
+
else:
|
|
28
|
+
if self.parametric:
|
|
29
|
+
self.run_test_by_id('t_test_independent')
|
|
30
|
+
else:
|
|
31
|
+
self.run_test_by_id('mann_whitney')
|
|
32
|
+
|
|
33
|
+
elif self.n_groups >= 3:
|
|
34
|
+
if self.paired:
|
|
35
|
+
if self.parametric:
|
|
36
|
+
self.run_test_by_id('anova_1w_rm')
|
|
37
|
+
else:
|
|
38
|
+
self.run_test_by_id('friedman')
|
|
39
|
+
else:
|
|
40
|
+
if self.parametric:
|
|
41
|
+
self.run_test_by_id('anova_1w_ordinary')
|
|
42
|
+
else:
|
|
43
|
+
self.run_test_by_id('kruskal_wallis')
|
|
44
|
+
|
|
45
|
+
else:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
def run_test_by_id(self, test_id):
|
|
49
|
+
|
|
50
|
+
test_names_dict = {
|
|
51
|
+
'anova_1w_ordinary': 'Ordinary One-Way ANOVA',
|
|
52
|
+
'anova_1w_rm': 'Repeated Measures One-Way ANOVA',
|
|
53
|
+
'friedman': 'Friedman test',
|
|
54
|
+
'kruskal_wallis': 'Kruskal-Wallis test',
|
|
55
|
+
'mann_whitney': 'Mann-Whitney U test',
|
|
56
|
+
't_test_independent': 't-test for independent samples',
|
|
57
|
+
't_test_paired': 't-test for paired samples',
|
|
58
|
+
't_test_single_sample': 'Single-sample t-test',
|
|
59
|
+
'wilcoxon': 'Wilcoxon signed-rank test',
|
|
60
|
+
'wilcoxon_single_sample': 'Wilcoxon signed-rank test for single sample',
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
match test_id:
|
|
64
|
+
case 'anova_1w_ordinary': stat, p_value = self.anova_1w_ordinary()
|
|
65
|
+
case 'anova_1w_rm': stat, p_value = self.anova_1w_rm()
|
|
66
|
+
case 'friedman': stat, p_value = self.friedman()
|
|
67
|
+
case 'kruskal_wallis': stat, p_value = self.kruskal_wallis()
|
|
68
|
+
case 'mann_whitney': stat, p_value = self.mann_whitney()
|
|
69
|
+
case 't_test_independent': stat, p_value = self.t_test_independent()
|
|
70
|
+
case 't_test_paired': stat, p_value = self.t_test_paired()
|
|
71
|
+
case 't_test_single_sample': stat, p_value = self.t_test_single_sample()
|
|
72
|
+
case 'wilcoxon': stat, p_value = self.wilcoxon()
|
|
73
|
+
case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
|
|
74
|
+
|
|
75
|
+
if test_id in self.test_ids_dependent:
|
|
76
|
+
self.paired = True
|
|
77
|
+
else:
|
|
78
|
+
self.paired = False
|
|
79
|
+
|
|
80
|
+
self.test_name = test_names_dict[test_id]
|
|
81
|
+
self.test_id = test_id
|
|
82
|
+
self.test_stat = stat
|
|
83
|
+
self.p_value = p_value
|
|
84
|
+
|
|
85
|
+
def anova_1w_ordinary(self):
|
|
86
|
+
stat, p_value = f_oneway(*self.data)
|
|
87
|
+
self.tails = 2
|
|
88
|
+
# if self.tails == 1 and p_value > 0.5:
|
|
89
|
+
# p_value /= 2
|
|
90
|
+
# if self.tails == 1:
|
|
91
|
+
# p_value /= 2
|
|
92
|
+
|
|
93
|
+
# if p_value < 0.05 and self.posthoc:
|
|
94
|
+
# data_flat = np.concatenate(self.data)
|
|
95
|
+
# self.posthoc_name = 'Tukey`s multiple comparisons'
|
|
96
|
+
# group_labels = np.concatenate(
|
|
97
|
+
# [[f"Group_{i+1}"] * len(group) for i, group in enumerate(self.data)])
|
|
98
|
+
# # Tukey's multiple comparisons
|
|
99
|
+
# tukey_result = pairwise_tukeyhsd(data_flat, group_labels)
|
|
100
|
+
# print(tukey_result)
|
|
101
|
+
return stat, p_value
|
|
102
|
+
|
|
103
|
+
def anova_1w_rm(self):
|
|
104
|
+
"""
|
|
105
|
+
Perform repeated measures one-way ANOVA test.
|
|
106
|
+
|
|
107
|
+
Parameters:
|
|
108
|
+
data: list of lists, where each sublist represents repeated measures for a subject
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
df = self.matrix_to_dataframe(self.data)
|
|
112
|
+
res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
|
|
113
|
+
stat = res.anova_table['F Value'][0]
|
|
114
|
+
p_value = res.anova_table['Pr > F'][0]
|
|
115
|
+
|
|
116
|
+
self.tails = 2
|
|
117
|
+
return stat, p_value
|
|
118
|
+
|
|
119
|
+
def friedman(self):
|
|
120
|
+
stat, p_value = friedmanchisquare(*self.data)
|
|
121
|
+
self.tails = 2
|
|
122
|
+
return stat, p_value
|
|
123
|
+
|
|
124
|
+
def kruskal_wallis(self):
|
|
125
|
+
stat, p_value = kruskal(*self.data)
|
|
126
|
+
|
|
127
|
+
# Perform Dunn's multiple comparisons if Kruskal-Wallis is significant
|
|
128
|
+
if p_value < 0.05 and self.posthoc:
|
|
129
|
+
self.posthoc_matrix = sp.posthoc_dunn(
|
|
130
|
+
self.data, p_adjust='bonferroni').values.tolist()
|
|
131
|
+
self.posthoc_name = 'Dunn`s multiple comparisons'
|
|
132
|
+
return stat, p_value
|
|
133
|
+
|
|
134
|
+
def mann_whitney(self):
|
|
135
|
+
stat, p_value = mannwhitneyu(
|
|
136
|
+
self.data[0], self.data[1], alternative='two-sided')
|
|
137
|
+
if self.tails == 1:
|
|
138
|
+
p_value /= 2
|
|
139
|
+
# alternative method of one-tailed calculation
|
|
140
|
+
# gives the same result:
|
|
141
|
+
# stat, p_value = mannwhitneyu(
|
|
142
|
+
# self.data[0], self.data[1], alternative='two-sided' if self.tails == 2 else 'less')
|
|
143
|
+
# if self.tails == 1 and p_value > 0.5:
|
|
144
|
+
# p_value = 1-p_value
|
|
145
|
+
return stat, p_value
|
|
146
|
+
|
|
147
|
+
def t_test_independent(self):
|
|
148
|
+
stat, p_value = ttest_ind(
|
|
149
|
+
self.data[0], self.data[1])
|
|
150
|
+
if self.tails == 1:
|
|
151
|
+
p_value /= 2
|
|
152
|
+
return stat, p_value
|
|
153
|
+
|
|
154
|
+
def t_test_paired(self):
|
|
155
|
+
stat, p_value = ttest_rel(
|
|
156
|
+
self.data[0], self.data[1])
|
|
157
|
+
if self.tails == 1:
|
|
158
|
+
p_value /= 2
|
|
159
|
+
return stat, p_value
|
|
160
|
+
|
|
161
|
+
def t_test_single_sample(self):
|
|
162
|
+
if self.popmean == None:
|
|
163
|
+
self.popmean = 0
|
|
164
|
+
self.AddWarning('no_pop_mean_set')
|
|
165
|
+
stat, p_value = ttest_1samp(self.data[0], self.popmean)
|
|
166
|
+
if self.tails == 1:
|
|
167
|
+
p_value /= 2
|
|
168
|
+
return stat, p_value
|
|
169
|
+
|
|
170
|
+
def wilcoxon(self):
|
|
171
|
+
stat, p_value = wilcoxon(self.data[0], self.data[1])
|
|
172
|
+
if self.tails == 1:
|
|
173
|
+
p_value /= 2
|
|
174
|
+
return stat, p_value
|
|
175
|
+
|
|
176
|
+
def wilcoxon_single_sample(self):
|
|
177
|
+
if self.popmean == None:
|
|
178
|
+
self.popmean = 0
|
|
179
|
+
self.AddWarning('no_pop_mean_set')
|
|
180
|
+
data = [i - self.popmean for i in self.data[0]]
|
|
181
|
+
stat, p_value = wilcoxon(data)
|
|
182
|
+
if self.tails == 1:
|
|
183
|
+
p_value /= 2
|
|
184
|
+
return stat, p_value
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
class TextFormatting():
|
|
4
|
+
'''
|
|
5
|
+
Text formatting mixin
|
|
6
|
+
'''
|
|
7
|
+
|
|
8
|
+
def autospace(self, elements_list, space, delimiter=' ') -> str:
|
|
9
|
+
output = ''
|
|
10
|
+
for i, element in enumerate(elements_list):
|
|
11
|
+
if i == len(elements_list):
|
|
12
|
+
output += element
|
|
13
|
+
else:
|
|
14
|
+
output += element + (space-len(element))*delimiter
|
|
15
|
+
return output
|
|
16
|
+
|
|
17
|
+
def print_groups(self, space=24, max_length=15):
|
|
18
|
+
self.log('')
|
|
19
|
+
# Get the number of groups (rows) and the maximum length of rows
|
|
20
|
+
data = self.data
|
|
21
|
+
num_groups = len(data)
|
|
22
|
+
group_longest = max(len(row) for row in data)
|
|
23
|
+
|
|
24
|
+
# Print the header
|
|
25
|
+
header = [f'Group {i+1}' for i in range(num_groups)]
|
|
26
|
+
line = [''*7]
|
|
27
|
+
self.log(self.autospace(header, space))
|
|
28
|
+
self.log(self.autospace(line, space))
|
|
29
|
+
|
|
30
|
+
# Print each column with a placeholder if longer than max_length
|
|
31
|
+
for i in range(group_longest):
|
|
32
|
+
row_values = []
|
|
33
|
+
all_values_empty = True
|
|
34
|
+
for row in data:
|
|
35
|
+
if len(row) > max_length:
|
|
36
|
+
if i < max_length:
|
|
37
|
+
row_values.append(str(row[i]))
|
|
38
|
+
all_values_empty = False
|
|
39
|
+
elif i == max_length:
|
|
40
|
+
row_values.append(f'[{len(row) - max_length} more]')
|
|
41
|
+
all_values_empty = False
|
|
42
|
+
else:
|
|
43
|
+
continue
|
|
44
|
+
else:
|
|
45
|
+
if i < len(row):
|
|
46
|
+
row_values.append(str(row[i]))
|
|
47
|
+
all_values_empty = False
|
|
48
|
+
else:
|
|
49
|
+
row_values.append('')
|
|
50
|
+
if all_values_empty:
|
|
51
|
+
break
|
|
52
|
+
self.log(self.autospace(row_values, space))
|
|
53
|
+
|
|
54
|
+
def print_results(self):
|
|
55
|
+
self.log('\n\nResults: \n')
|
|
56
|
+
for i in self.results:
|
|
57
|
+
shift = 27 - len(i)
|
|
58
|
+
if i == 'Warnings':
|
|
59
|
+
self.log(i, ':', ' ' * shift, len(self.results[i]))
|
|
60
|
+
elif i == 'Posthoc_Tests_Name':
|
|
61
|
+
self.log(i, ':', ' ' * shift,
|
|
62
|
+
self.results[i]) if self.results[i] != '' else 'N/A'
|
|
63
|
+
elif i == 'Posthoc_Matrix':
|
|
64
|
+
self.log(i, ':', ' ' * shift, '{0}x{0} matrix'.format(
|
|
65
|
+
len(self.results[i])) if self.results[i] else 'N/A')
|
|
66
|
+
elif (i == 'Samples'
|
|
67
|
+
or i == 'Posthoc_Matrix_bool'
|
|
68
|
+
or i == 'Posthoc_Matrix_printed'
|
|
69
|
+
or i == 'Posthoc_Matrix_stars'
|
|
70
|
+
):
|
|
71
|
+
pass
|
|
72
|
+
else:
|
|
73
|
+
self.log(i, ':', ' ' * shift, self.results[i])
|
|
74
|
+
|
|
75
|
+
def make_p_value_printed(self, p) -> str:
|
|
76
|
+
if p is not None:
|
|
77
|
+
if p > 0.99:
|
|
78
|
+
return 'p>0.99'
|
|
79
|
+
elif p >= 0.01:
|
|
80
|
+
return f'p={p:.2g}'
|
|
81
|
+
elif p >= 0.001:
|
|
82
|
+
return f'p={p:.2g}'
|
|
83
|
+
elif p >= 0.0001:
|
|
84
|
+
return f'p={p:.1g}'
|
|
85
|
+
elif p < 0.0001:
|
|
86
|
+
return 'p<0.0001'
|
|
87
|
+
else:
|
|
88
|
+
return 'N/A'
|
|
89
|
+
return 'N/A'
|
|
90
|
+
|
|
91
|
+
def make_stars(self, p) -> int:
|
|
92
|
+
if p is not None:
|
|
93
|
+
if p < 0.0001:
|
|
94
|
+
return 4
|
|
95
|
+
if p < 0.001:
|
|
96
|
+
return 3
|
|
97
|
+
elif p < 0.01:
|
|
98
|
+
return 2
|
|
99
|
+
elif p < 0.05:
|
|
100
|
+
return 1
|
|
101
|
+
else:
|
|
102
|
+
return 0
|
|
103
|
+
return 0
|
|
104
|
+
|
|
105
|
+
def make_stars_printed(self, n) -> str:
|
|
106
|
+
return '*' * n if n else 'ns'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: AutoStatLib
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: AutoStatLib - a simple statistical analysis tool
|
|
5
5
|
Author: Stemonitis, SciWare LLC
|
|
6
6
|
Author-email: konung-yaropolk <yaropolk1995@gmail.com>
|
|
@@ -509,15 +509,16 @@ License: GNU LESSER GENERAL PUBLIC LICENSE
|
|
|
509
509
|
|
|
510
510
|
That's all there is to it!
|
|
511
511
|
|
|
512
|
-
Project-URL: Homepage, https://github.com/konung-yaropolk/
|
|
513
|
-
Project-URL:
|
|
512
|
+
Project-URL: Homepage, https://github.com/konung-yaropolk/AutoStatLib
|
|
513
|
+
Project-URL: Repository, https://github.com/konung-yaropolk/AutoStatLib.git
|
|
514
|
+
Project-URL: Issues, https://github.com/konung-yaropolk/AutoStatLib/issues
|
|
514
515
|
Keywords: Science,Statistics
|
|
515
516
|
Classifier: Programming Language :: Python
|
|
516
517
|
Classifier: Programming Language :: Python :: 3
|
|
517
518
|
Classifier: Programming Language :: Python :: 3.12
|
|
518
519
|
Classifier: License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)
|
|
519
520
|
Classifier: Operating System :: OS Independent
|
|
520
|
-
Classifier: Development Status ::
|
|
521
|
+
Classifier: Development Status :: 4 - Beta
|
|
521
522
|
Classifier: Intended Audience :: Developers
|
|
522
523
|
Classifier: Intended Audience :: Science/Research
|
|
523
524
|
Classifier: Natural Language :: English
|
|
@@ -531,6 +532,7 @@ License-File: LICENSE
|
|
|
531
532
|
Requires-Dist: numpy
|
|
532
533
|
Requires-Dist: scipy
|
|
533
534
|
Requires-Dist: statsmodels
|
|
535
|
+
Requires-Dist: scikit-posthocs
|
|
534
536
|
Requires-Dist: pandas
|
|
535
537
|
|
|
536
538
|
# AutoStatLib - python library for automated statistical analysis
|
|
@@ -619,26 +621,30 @@ results = analysis.GetResult()
|
|
|
619
621
|
The results dictionary keys with representing value types:
|
|
620
622
|
```
|
|
621
623
|
{
|
|
622
|
-
'p-value':
|
|
623
|
-
'Significance(p<0.05)':
|
|
624
|
-
'Stars_Printed':
|
|
625
|
-
'Test_Name':
|
|
626
|
-
'Groups_Compared':
|
|
627
|
-
'Population_Mean':
|
|
628
|
-
'Data_Normaly_Distributed':
|
|
629
|
-
'Parametric_Test_Applied':
|
|
630
|
-
'Paired_Test_Applied':
|
|
631
|
-
'Tails':
|
|
632
|
-
'p-value_exact':
|
|
633
|
-
'Stars':
|
|
634
|
-
'Warnings':
|
|
635
|
-
'Groups_N':
|
|
636
|
-
'Groups_Median':
|
|
637
|
-
'Groups_Mean':
|
|
638
|
-
'Groups_SD':
|
|
639
|
-
'Groups_SE':
|
|
640
|
-
'Samples':
|
|
624
|
+
'p-value' : String
|
|
625
|
+
'Significance(p<0.05)' : Boolean
|
|
626
|
+
'Stars_Printed' : String
|
|
627
|
+
'Test_Name' : String
|
|
628
|
+
'Groups_Compared' : Integer
|
|
629
|
+
'Population_Mean' : Float (taken from the input)
|
|
630
|
+
'Data_Normaly_Distributed' : Boolean
|
|
631
|
+
'Parametric_Test_Applied' : Boolean
|
|
632
|
+
'Paired_Test_Applied' : Boolean
|
|
633
|
+
'Tails' : Integer (taken from the input)
|
|
634
|
+
'p-value_exact' : Float
|
|
635
|
+
'Stars' : Integer
|
|
636
|
+
'Warnings' : String
|
|
637
|
+
'Groups_N' : List of integers
|
|
638
|
+
'Groups_Median' : List of floats
|
|
639
|
+
'Groups_Mean' : List of floats
|
|
640
|
+
'Groups_SD' : List of floats
|
|
641
|
+
'Groups_SE' : List of floats
|
|
642
|
+
'Samples' : List of input values by groups
|
|
641
643
|
(taken from the input)
|
|
644
|
+
'Posthoc_Matrix' : 2D List of floats
|
|
645
|
+
'Posthoc_Matrix_bool' : 2D List of Boolean
|
|
646
|
+
'Posthoc_Matrix_printed': 2D List of String
|
|
647
|
+
'Posthoc_Matrix_stars': 2D List of String
|
|
642
648
|
}
|
|
643
649
|
```
|
|
644
650
|
If errors occured, *GetResult()* returns an empty dictionary
|
|
@@ -653,7 +659,7 @@ If errors occured, *GetResult()* returns an empty dictionary
|
|
|
653
659
|
|
|
654
660
|
### TODO:
|
|
655
661
|
|
|
656
|
-
--
|
|
662
|
+
-- Anova: posthocs
|
|
657
663
|
-- Anova: add 2-way anova and 3-way anova
|
|
658
664
|
-- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
|
|
659
665
|
-- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
|
|
@@ -666,10 +672,11 @@ If errors occured, *GetResult()* returns an empty dictionary
|
|
|
666
672
|
-- add QQ plot
|
|
667
673
|
-- n-sample tests: add onetail option
|
|
668
674
|
|
|
669
|
-
✅ done -- detailed normality test results
|
|
675
|
+
✅ done -- detailed normality test results
|
|
676
|
+
✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
|
|
670
677
|
|
|
671
678
|
|
|
672
|
-
|
|
679
|
+
tests check:
|
|
673
680
|
1-sample:
|
|
674
681
|
--Wilcoxon 2,1 tails - ok
|
|
675
682
|
--t-tests 2,1 tails -ok
|
|
@@ -681,6 +688,7 @@ checked tests:
|
|
|
681
688
|
|
|
682
689
|
n-sample:
|
|
683
690
|
--Kruskal-Wallis 2 tail - ok
|
|
691
|
+
--Dunn's multiple comparisons - ??
|
|
684
692
|
--Friedman 2 tail - ok
|
|
685
693
|
--one-way ANOWA 2 tail - ok
|
|
686
694
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
AutoStatLib/AutoStatLib.py,sha256=KJM2x-fChnxVinnCFsAKpoacKeoIJcJw_r8FYqPCljk,9677
|
|
2
|
+
AutoStatLib/__init__.py,sha256=0wHYnglzKRPqSHtZlfbMEA2Bj5rDR4LLaXbOrJi-sqM,101
|
|
3
|
+
AutoStatLib/__main__.py,sha256=ROKWensrxDh3Gl-yhexJ-BYFohDSh9y-CuMkaLpmnnQ,247
|
|
4
|
+
AutoStatLib/_version.py,sha256=dXJmKxrIARBAzN_ILPim1iDgRdZ6HKMR3FqtamGwNUk,53
|
|
5
|
+
AutoStatLib/helpers.py,sha256=d8P6_q706rjuc6N4WBbdOqNQFuAIjCHfmrhgJABFxqE,3646
|
|
6
|
+
AutoStatLib/normality_tests.py,sha256=TYeKpfpJRzOHvDZucObuZhPktjiZpSZwh381eJ8ENC4,2381
|
|
7
|
+
AutoStatLib/statistical_tests.py,sha256=xfHdTtN5Es_qoVMUwX8VFsl-FLpF3zd56S9ya7dPXVo,6566
|
|
8
|
+
AutoStatLib/text_formatting.py,sha256=rWDsrlZdquook7lUg8t2mb3az8nR12BDprxfy_NwE2o,3576
|
|
9
|
+
autostatlib-0.2.6.dist-info/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
|
|
10
|
+
autostatlib-0.2.6.dist-info/METADATA,sha256=BDNKvdfcHaPlSF2q3YuDpOPFK_K7_hKcDH2NgjdCYtQ,36872
|
|
11
|
+
autostatlib-0.2.6.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
12
|
+
autostatlib-0.2.6.dist-info/top_level.txt,sha256=BuHzVyE2andc7RwD_UPmDjLl9CUAyBH6WHZGjaIReUI,12
|
|
13
|
+
autostatlib-0.2.6.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
AutoStatLib/AutoStatLib.py,sha256=lUDNdzH2NdsyGm1jgLvQ1b-PXIyo8SfMApEK4uOQxSg,23479
|
|
2
|
-
AutoStatLib/__init__.py,sha256=0wHYnglzKRPqSHtZlfbMEA2Bj5rDR4LLaXbOrJi-sqM,101
|
|
3
|
-
AutoStatLib/__main__.py,sha256=ROKWensrxDh3Gl-yhexJ-BYFohDSh9y-CuMkaLpmnnQ,247
|
|
4
|
-
AutoStatLib/_version.py,sha256=WbLB15iApm4FvkoTxz3n4t20nHfs58LNdIBr1m1YbxU,53
|
|
5
|
-
AutoStatLib-0.2.2.dist-info/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
|
|
6
|
-
AutoStatLib-0.2.2.dist-info/METADATA,sha256=4Ro1Bo6FsklfwMo-G5N9C--n-7HJA4nMNns6qivu90k,36473
|
|
7
|
-
AutoStatLib-0.2.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
8
|
-
AutoStatLib-0.2.2.dist-info/top_level.txt,sha256=BuHzVyE2andc7RwD_UPmDjLl9CUAyBH6WHZGjaIReUI,12
|
|
9
|
-
AutoStatLib-0.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|