AutoStatLib 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AutoStatLib might be problematic. Click here for more details.
- AutoStatLib/AutoStatLib.py +238 -180
- AutoStatLib/_version.py +1 -1
- {AutoStatLib-0.2.0.dist-info → AutoStatLib-0.2.2.dist-info}/METADATA +40 -18
- AutoStatLib-0.2.2.dist-info/RECORD +9 -0
- AutoStatLib-0.2.0.dist-info/RECORD +0 -9
- {AutoStatLib-0.2.0.dist-info → AutoStatLib-0.2.2.dist-info}/LICENSE +0 -0
- {AutoStatLib-0.2.0.dist-info → AutoStatLib-0.2.2.dist-info}/WHEEL +0 -0
- {AutoStatLib-0.2.0.dist-info → AutoStatLib-0.2.2.dist-info}/top_level.txt +0 -0
AutoStatLib/AutoStatLib.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
2
3
|
from statsmodels.stats.diagnostic import lilliefors
|
|
3
|
-
from
|
|
4
|
+
from statsmodels.stats.anova import AnovaRM
|
|
5
|
+
from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare, shapiro, anderson, normaltest
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
class __StatisticalTests():
|
|
@@ -8,37 +10,113 @@ class __StatisticalTests():
|
|
|
8
10
|
Statistical tests mixin
|
|
9
11
|
'''
|
|
10
12
|
|
|
11
|
-
def
|
|
13
|
+
def run_test_auto(self):
|
|
14
|
+
|
|
15
|
+
if self.n_groups == 1:
|
|
16
|
+
if self.parametric:
|
|
17
|
+
self.run_test_by_id('t_test_single_sample')
|
|
18
|
+
else:
|
|
19
|
+
self.run_test_by_id('wilcoxon_single_sample')
|
|
20
|
+
|
|
21
|
+
elif self.n_groups == 2:
|
|
22
|
+
if self.paired:
|
|
23
|
+
if self.parametric:
|
|
24
|
+
self.run_test_by_id('t_test_paired')
|
|
25
|
+
else:
|
|
26
|
+
self.run_test_by_id('wilcoxon')
|
|
27
|
+
else:
|
|
28
|
+
if self.parametric:
|
|
29
|
+
self.run_test_by_id('t_test_independent')
|
|
30
|
+
else:
|
|
31
|
+
self.run_test_by_id('mann_whitney')
|
|
32
|
+
|
|
33
|
+
elif self.n_groups >= 3:
|
|
34
|
+
if self.paired:
|
|
35
|
+
if self.parametric:
|
|
36
|
+
self.run_test_by_id('anova_1w_rm')
|
|
37
|
+
else:
|
|
38
|
+
self.run_test_by_id('friedman')
|
|
39
|
+
else:
|
|
40
|
+
if self.parametric:
|
|
41
|
+
self.run_test_by_id('anova_1w_ordinary')
|
|
42
|
+
else:
|
|
43
|
+
self.run_test_by_id('kruskal_wallis')
|
|
44
|
+
|
|
45
|
+
else:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
def run_test_by_id(self, test_id):
|
|
49
|
+
|
|
50
|
+
test_names_dict = {
|
|
51
|
+
'anova_1w_ordinary': 'Ordinary One-Way ANOVA',
|
|
52
|
+
'anova_1w_rm': 'Repeated Measures One-Way ANOVA',
|
|
53
|
+
'friedman': 'Friedman test',
|
|
54
|
+
'kruskal_wallis': 'Kruskal-Wallis test',
|
|
55
|
+
'mann_whitney': 'Mann-Whitney U test',
|
|
56
|
+
't_test_independent': 't-test for independent samples',
|
|
57
|
+
't_test_paired': 't-test for paired samples',
|
|
58
|
+
't_test_single_sample': 'Single-sample t-test',
|
|
59
|
+
'wilcoxon': 'Wilcoxon signed-rank test',
|
|
60
|
+
'wilcoxon_single_sample': 'Wilcoxon signed-rank test for single sample',
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
match test_id:
|
|
64
|
+
case 'anova_1w_ordinary': stat, p_value = self.anova_1w_ordinary()
|
|
65
|
+
case 'anova_1w_rm': stat, p_value = self.anova_1w_rm()
|
|
66
|
+
case 'friedman': stat, p_value = self.friedman()
|
|
67
|
+
case 'kruskal_wallis': stat, p_value = self.kruskal_wallis()
|
|
68
|
+
case 'mann_whitney': stat, p_value = self.mann_whitney()
|
|
69
|
+
case 't_test_independent': stat, p_value = self.t_test_independent()
|
|
70
|
+
case 't_test_paired': stat, p_value = self.t_test_paired()
|
|
71
|
+
case 't_test_single_sample': stat, p_value = self.t_test_single_sample()
|
|
72
|
+
case 'wilcoxon': stat, p_value = self.wilcoxon()
|
|
73
|
+
case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
|
|
74
|
+
|
|
75
|
+
if test_id in self.test_ids_dependent:
|
|
76
|
+
self.paired = True
|
|
77
|
+
else:
|
|
78
|
+
self.paired = False
|
|
79
|
+
|
|
80
|
+
self.test_name = test_names_dict[test_id]
|
|
81
|
+
self.test_id = test_id
|
|
82
|
+
self.test_stat = stat
|
|
83
|
+
self.p_value = p_value
|
|
84
|
+
|
|
85
|
+
def anova_1w_ordinary(self):
|
|
12
86
|
stat, p_value = f_oneway(*self.data)
|
|
13
87
|
self.tails = 2
|
|
14
88
|
# if self.tails == 1 and p_value > 0.5:
|
|
15
89
|
# p_value /= 2
|
|
16
90
|
# if self.tails == 1:
|
|
17
91
|
# p_value /= 2
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
92
|
+
return stat, p_value
|
|
93
|
+
|
|
94
|
+
def anova_1w_rm(self):
|
|
95
|
+
"""
|
|
96
|
+
Perform repeated measures one-way ANOVA test.
|
|
97
|
+
|
|
98
|
+
Parameters:
|
|
99
|
+
data: list of lists, where each sublist represents repeated measures for a subject
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
df = self.matrix_to_dataframe(self.data)
|
|
103
|
+
res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
|
|
104
|
+
stat = res.anova_table['F Value'][0]
|
|
105
|
+
p_value = res.anova_table['Pr > F'][0]
|
|
106
|
+
|
|
107
|
+
self.tails = 2
|
|
108
|
+
return stat, p_value
|
|
23
109
|
|
|
24
|
-
def
|
|
110
|
+
def friedman(self):
|
|
25
111
|
stat, p_value = friedmanchisquare(*self.data)
|
|
26
112
|
self.tails = 2
|
|
27
|
-
|
|
28
|
-
self.test_id = 'friedman'
|
|
29
|
-
self.paired = True
|
|
30
|
-
self.test_stat = stat
|
|
31
|
-
self.p_value = p_value
|
|
113
|
+
return stat, p_value
|
|
32
114
|
|
|
33
|
-
def
|
|
115
|
+
def kruskal_wallis(self):
|
|
34
116
|
stat, p_value = kruskal(*self.data)
|
|
35
|
-
|
|
36
|
-
self.test_id = 'kruskal_wallis'
|
|
37
|
-
self.paired = False
|
|
38
|
-
self.test_stat = stat
|
|
39
|
-
self.p_value = p_value
|
|
117
|
+
return stat, p_value
|
|
40
118
|
|
|
41
|
-
def
|
|
119
|
+
def mann_whitney(self):
|
|
42
120
|
stat, p_value = mannwhitneyu(
|
|
43
121
|
self.data[0], self.data[1], alternative='two-sided')
|
|
44
122
|
if self.tails == 1:
|
|
@@ -49,78 +127,53 @@ class __StatisticalTests():
|
|
|
49
127
|
# self.data[0], self.data[1], alternative='two-sided' if self.tails == 2 else 'less')
|
|
50
128
|
# if self.tails == 1 and p_value > 0.5:
|
|
51
129
|
# p_value = 1-p_value
|
|
130
|
+
return stat, p_value
|
|
52
131
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
self.paired = False
|
|
56
|
-
self.test_stat = stat
|
|
57
|
-
self.p_value = p_value
|
|
58
|
-
|
|
59
|
-
def t_test_independend(self):
|
|
60
|
-
t_stat, t_p_value = ttest_ind(
|
|
132
|
+
def t_test_independent(self):
|
|
133
|
+
stat, p_value = ttest_ind(
|
|
61
134
|
self.data[0], self.data[1])
|
|
62
135
|
if self.tails == 1:
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
self.test_id = 't_test_independend'
|
|
66
|
-
self.paired = False
|
|
67
|
-
self.test_stat = t_stat
|
|
68
|
-
self.p_value = t_p_value
|
|
136
|
+
p_value /= 2
|
|
137
|
+
return stat, p_value
|
|
69
138
|
|
|
70
139
|
def t_test_paired(self):
|
|
71
|
-
|
|
140
|
+
stat, p_value = ttest_rel(
|
|
72
141
|
self.data[0], self.data[1])
|
|
73
142
|
if self.tails == 1:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
self.test_id = 't_test_paired'
|
|
77
|
-
self.paired = True
|
|
78
|
-
self.test_stat = t_stat
|
|
79
|
-
self.p_value = t_p_value
|
|
143
|
+
p_value /= 2
|
|
144
|
+
return stat, p_value
|
|
80
145
|
|
|
81
146
|
def t_test_single_sample(self):
|
|
82
147
|
if self.popmean == None:
|
|
83
148
|
self.popmean = 0
|
|
84
149
|
self.AddWarning('no_pop_mean_set')
|
|
85
|
-
|
|
150
|
+
stat, p_value = ttest_1samp(self.data[0], self.popmean)
|
|
86
151
|
if self.tails == 1:
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
self.test_id = 't_test_single_sample'
|
|
90
|
-
self.paired = False
|
|
91
|
-
self.test_stat = t_stat
|
|
92
|
-
self.p_value = t_p_value
|
|
152
|
+
p_value /= 2
|
|
153
|
+
return stat, p_value
|
|
93
154
|
|
|
155
|
+
def wilcoxon(self):
|
|
156
|
+
stat, p_value = wilcoxon(self.data[0], self.data[1])
|
|
157
|
+
if self.tails == 1:
|
|
158
|
+
p_value /= 2
|
|
159
|
+
return stat, p_value
|
|
160
|
+
|
|
94
161
|
def wilcoxon_single_sample(self):
|
|
95
162
|
if self.popmean == None:
|
|
96
163
|
self.popmean = 0
|
|
97
164
|
self.AddWarning('no_pop_mean_set')
|
|
98
165
|
data = [i - self.popmean for i in self.data[0]]
|
|
99
|
-
|
|
166
|
+
stat, p_value = wilcoxon(data)
|
|
100
167
|
if self.tails == 1:
|
|
101
168
|
p_value /= 2
|
|
102
|
-
|
|
103
|
-
self.test_id = 'wilcoxon_single_sample'
|
|
104
|
-
self.paired = False
|
|
105
|
-
self.test_stat = w_stat
|
|
106
|
-
self.p_value = p_value
|
|
107
|
-
|
|
108
|
-
def wilcoxon(self):
|
|
109
|
-
stat, p_value = wilcoxon(self.data[0], self.data[1])
|
|
110
|
-
if self.tails == 1:
|
|
111
|
-
p_value /= 2
|
|
112
|
-
self.test_name = 'Wilcoxon signed-rank test'
|
|
113
|
-
self.test_id = 'wilcoxon'
|
|
114
|
-
self.paired = True
|
|
115
|
-
self.test_stat = stat
|
|
116
|
-
self.p_value = p_value
|
|
169
|
+
return stat, p_value
|
|
117
170
|
|
|
118
171
|
|
|
119
172
|
class __NormalityTests():
|
|
120
173
|
'''
|
|
121
174
|
Normality tests mixin
|
|
122
175
|
|
|
123
|
-
see the article about
|
|
176
|
+
see the article about minimal sample size for tests:
|
|
124
177
|
Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
|
|
125
178
|
Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
|
|
126
179
|
'''
|
|
@@ -171,7 +224,7 @@ class __NormalityTests():
|
|
|
171
224
|
|
|
172
225
|
def anderson_get_p(self, data, dist='norm'):
|
|
173
226
|
'''
|
|
174
|
-
calculating p-value for Anderson-Darling test using the method described here:
|
|
227
|
+
calculating p-value for Anderson-Darling test using the method described here:
|
|
175
228
|
Computation of Probability Associated with Anderson-Darling Statistic
|
|
176
229
|
Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
|
|
177
230
|
|
|
@@ -199,6 +252,65 @@ class __NormalityTests():
|
|
|
199
252
|
return ad, p
|
|
200
253
|
|
|
201
254
|
|
|
255
|
+
class __Helpers():
|
|
256
|
+
|
|
257
|
+
def matrix_to_dataframe(self, matrix):
|
|
258
|
+
data = []
|
|
259
|
+
cols = []
|
|
260
|
+
rows = []
|
|
261
|
+
|
|
262
|
+
order_number = 1
|
|
263
|
+
for i, row in enumerate(matrix):
|
|
264
|
+
for j, value in enumerate(row):
|
|
265
|
+
data.append(value)
|
|
266
|
+
cols.append(i)
|
|
267
|
+
rows.append(j)
|
|
268
|
+
order_number += 1
|
|
269
|
+
|
|
270
|
+
df = pd.DataFrame(
|
|
271
|
+
{'Row': rows, 'Col': cols, 'Value': data})
|
|
272
|
+
return df
|
|
273
|
+
|
|
274
|
+
def create_results_dict(self) -> dict:
|
|
275
|
+
|
|
276
|
+
self.stars_int = self.make_stars()
|
|
277
|
+
self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
|
|
278
|
+
|
|
279
|
+
return {
|
|
280
|
+
'p-value': self.make_p_value_printed(),
|
|
281
|
+
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
282
|
+
'Stars_Printed': self.stars_str,
|
|
283
|
+
'Test_Name': self.test_name,
|
|
284
|
+
'Groups_Compared': self.n_groups,
|
|
285
|
+
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
286
|
+
'Data_Normaly_Distributed': self.parametric,
|
|
287
|
+
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
288
|
+
'Paired_Test_Applied': self.paired,
|
|
289
|
+
'Tails': self.tails,
|
|
290
|
+
'p-value_exact': self.p_value.item(),
|
|
291
|
+
'Stars': self.stars_int,
|
|
292
|
+
# 'Stat_Value': self.test_stat.item(),
|
|
293
|
+
'Warnings': self.warnings,
|
|
294
|
+
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
295
|
+
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
296
|
+
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
297
|
+
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
298
|
+
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
|
|
299
|
+
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
300
|
+
'Samples': self.data,
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
def log(self, *args, **kwargs):
|
|
304
|
+
message = ' '.join(map(str, args))
|
|
305
|
+
# print(message, **kwargs)
|
|
306
|
+
self.summary += '\n' + message
|
|
307
|
+
|
|
308
|
+
def AddWarning(self, warning_id):
|
|
309
|
+
message = self.warning_ids_all[warning_id]
|
|
310
|
+
self.log(message)
|
|
311
|
+
self.warnings.append(message)
|
|
312
|
+
|
|
313
|
+
|
|
202
314
|
class __TextFormatting():
|
|
203
315
|
'''
|
|
204
316
|
Text formatting mixin
|
|
@@ -293,45 +405,6 @@ class __TextFormatting():
|
|
|
293
405
|
else:
|
|
294
406
|
self.log(i, ':', ' ' * shift, self.results[i])
|
|
295
407
|
|
|
296
|
-
def create_results_dict(self) -> dict:
|
|
297
|
-
|
|
298
|
-
self.stars_int = self.make_stars()
|
|
299
|
-
self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
|
|
300
|
-
|
|
301
|
-
return {
|
|
302
|
-
'p-value': self.make_p_value_printed(),
|
|
303
|
-
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
304
|
-
'Stars_Printed': self.stars_str,
|
|
305
|
-
'Test_Name': self.test_name,
|
|
306
|
-
'Groups_Compared': self.n_groups,
|
|
307
|
-
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
308
|
-
'Data_Normaly_Distributed': self.parametric,
|
|
309
|
-
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
310
|
-
'Paired_Test_Applied': self.paired,
|
|
311
|
-
'Tails': self.tails,
|
|
312
|
-
'p-value_exact': self.p_value.item(),
|
|
313
|
-
'Stars': self.stars_int,
|
|
314
|
-
# 'Stat_Value': self.test_stat.item(),
|
|
315
|
-
'Warnings': self.warnings,
|
|
316
|
-
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
317
|
-
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
318
|
-
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
319
|
-
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
320
|
-
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
|
|
321
|
-
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
322
|
-
'Samples': self.data,
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
def log(self, *args, **kwargs):
|
|
326
|
-
message = ' '.join(map(str, args))
|
|
327
|
-
# print(message, **kwargs)
|
|
328
|
-
self.summary += '\n' + message
|
|
329
|
-
|
|
330
|
-
def AddWarning(self, warning_id):
|
|
331
|
-
message = self.warning_ids_all[warning_id]
|
|
332
|
-
self.log(message)
|
|
333
|
-
self.warnings.append(message)
|
|
334
|
-
|
|
335
408
|
|
|
336
409
|
class __InputFormatting():
|
|
337
410
|
def floatify_recursive(self, data):
|
|
@@ -349,7 +422,7 @@ class __InputFormatting():
|
|
|
349
422
|
return None
|
|
350
423
|
|
|
351
424
|
|
|
352
|
-
class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting):
|
|
425
|
+
class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting, __Helpers):
|
|
353
426
|
'''
|
|
354
427
|
The main class
|
|
355
428
|
*documentation placeholder*
|
|
@@ -372,21 +445,49 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
372
445
|
self.n_groups = len(self.groups_list)
|
|
373
446
|
self.warning_flag_non_numeric_data = False
|
|
374
447
|
self.summary = ''
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
't_test_paired',
|
|
378
|
-
't_test_single_sample',]
|
|
448
|
+
|
|
449
|
+
# test IDs classification:
|
|
379
450
|
self.test_ids_all = [ # in aplhabetical order
|
|
380
|
-
'
|
|
451
|
+
'anova_1w_ordinary',
|
|
452
|
+
'anova_1w_rm',
|
|
381
453
|
'friedman',
|
|
382
454
|
'kruskal_wallis',
|
|
383
455
|
'mann_whitney',
|
|
384
|
-
'
|
|
456
|
+
't_test_independent',
|
|
385
457
|
't_test_paired',
|
|
386
458
|
't_test_single_sample',
|
|
387
459
|
'wilcoxon',
|
|
388
460
|
'wilcoxon_single_sample',
|
|
389
461
|
]
|
|
462
|
+
self.test_ids_parametric = [
|
|
463
|
+
'anova_1w_ordinary',
|
|
464
|
+
'anova_1w_rm'
|
|
465
|
+
't_test_independent',
|
|
466
|
+
't_test_paired',
|
|
467
|
+
't_test_single_sample',
|
|
468
|
+
]
|
|
469
|
+
self.test_ids_dependent = [
|
|
470
|
+
'anova_1w_rm',
|
|
471
|
+
'friedman',
|
|
472
|
+
't_test_paired',
|
|
473
|
+
'wilcoxon',
|
|
474
|
+
]
|
|
475
|
+
self.test_ids_3sample = [
|
|
476
|
+
'anova_1w_ordinary',
|
|
477
|
+
'anova_1w_rm',
|
|
478
|
+
'friedman',
|
|
479
|
+
'kruskal_wallis',
|
|
480
|
+
]
|
|
481
|
+
self.test_ids_2sample = [
|
|
482
|
+
'mann_whitney',
|
|
483
|
+
't_test_independent',
|
|
484
|
+
't_test_paired',
|
|
485
|
+
'wilcoxon',
|
|
486
|
+
]
|
|
487
|
+
self.test_ids_1sample = [
|
|
488
|
+
't_test_single_sample',
|
|
489
|
+
'wilcoxon_single_sample',
|
|
490
|
+
]
|
|
390
491
|
self.warning_ids_all = {
|
|
391
492
|
# 'not-numeric': '\nWarning: Non-numeric data was found in input and ignored.\n Make sure the input data is correct to get the correct results\n',
|
|
392
493
|
'param_test_with_non-normal_data': '\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n The results might be skewed. \n Please, run non-parametric test or preform automatic test selection.\n',
|
|
@@ -425,28 +526,18 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
425
526
|
assert self.data, 'There is no input data'
|
|
426
527
|
assert self.tails in [1, 2], 'Tails parameter can be 1 or 2 only'
|
|
427
528
|
assert test in self.test_ids_all or test == 'auto', 'Wrong test id choosen, ensure you called correct function'
|
|
428
|
-
assert not (self.n_groups > 1
|
|
429
|
-
and (test == 't_test_single_sample'
|
|
430
|
-
or test == 'wilcoxon_single_sample')), 'Only one group of data must be given for single-group tests'
|
|
431
529
|
assert all(len(
|
|
432
530
|
group) >= 4 for group in self.data), 'Each group must contain at least four values'
|
|
433
|
-
assert not (self.paired == True
|
|
434
|
-
|
|
435
|
-
assert not (test
|
|
436
|
-
|
|
437
|
-
assert not (test
|
|
438
|
-
|
|
439
|
-
assert not (test
|
|
440
|
-
|
|
441
|
-
assert not (test
|
|
442
|
-
3), 'At least three groups of data must be given for
|
|
443
|
-
assert not ((test == 'anova'
|
|
444
|
-
or test == 'kruskal_wallis') and self.n_groups < 2), 'At least two groups of data must be given for ANOVA or Kruskal Wallis tests'
|
|
445
|
-
assert not ((test == 'wilcoxon'
|
|
446
|
-
or test == 't_test_independend'
|
|
447
|
-
or test == 't_test_paired'
|
|
448
|
-
or test == 'mann_whitney')
|
|
449
|
-
and self.n_groups != 2), 'Only two groups of data must be given for 2-groups tests'
|
|
531
|
+
assert not (self.paired == True
|
|
532
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired groups must have the same length'
|
|
533
|
+
assert not (test in self.test_ids_dependent
|
|
534
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Groups must have the same length for dependent groups test'
|
|
535
|
+
assert not (test in self.test_ids_2sample
|
|
536
|
+
and self.n_groups != 2), f'Only two groups of data must be given for 2-groups tests, got {self.n_groups}'
|
|
537
|
+
assert not (test in self.test_ids_1sample
|
|
538
|
+
and self.n_groups > 1), f'Only one group of data must be given for single-group tests, got {self.n_groups}'
|
|
539
|
+
assert not (test in self.test_ids_3sample
|
|
540
|
+
and self.n_groups < 3), f'At least three groups of data must be given for multi-groups tests, got {self.n_groups}'
|
|
450
541
|
except AssertionError as error:
|
|
451
542
|
self.log('\nTest :', test)
|
|
452
543
|
self.log('Error :', error)
|
|
@@ -490,27 +581,13 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
490
581
|
if not test == 'auto' and self.parametric and not test in self.test_ids_parametric:
|
|
491
582
|
self.AddWarning('non-param_test_with_normal_data')
|
|
492
583
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
self.
|
|
497
|
-
elif test == 'kruskal_wallis':
|
|
498
|
-
self.kruskal_wallis_test()
|
|
499
|
-
elif test == 'mann_whitney':
|
|
500
|
-
self.mann_whitney_u_test()
|
|
501
|
-
elif test == 't_test_independend':
|
|
502
|
-
self.t_test_independend()
|
|
503
|
-
elif test == 't_test_paired':
|
|
504
|
-
self.t_test_paired()
|
|
505
|
-
elif test == 't_test_single_sample':
|
|
506
|
-
self.t_test_single_sample()
|
|
507
|
-
elif test == 'wilcoxon':
|
|
508
|
-
self.wilcoxon()
|
|
509
|
-
elif test == 'wilcoxon_single_sample':
|
|
510
|
-
self.wilcoxon_single_sample()
|
|
584
|
+
# run the test
|
|
585
|
+
|
|
586
|
+
if test in self.test_ids_all:
|
|
587
|
+
self.run_test_by_id(test)
|
|
511
588
|
else:
|
|
512
|
-
self.
|
|
513
|
-
|
|
589
|
+
self.run_test_auto()
|
|
590
|
+
|
|
514
591
|
|
|
515
592
|
# print the results
|
|
516
593
|
self.results = self.create_results_dict()
|
|
@@ -523,32 +600,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
523
600
|
if self.verbose == True:
|
|
524
601
|
print(self.summary)
|
|
525
602
|
|
|
526
|
-
def __auto(self):
|
|
527
603
|
|
|
528
|
-
if self.n_groups == 2:
|
|
529
|
-
if self.paired:
|
|
530
|
-
if self.parametric:
|
|
531
|
-
return self.t_test_paired()
|
|
532
|
-
else:
|
|
533
|
-
return self.wilcoxon()
|
|
534
|
-
else:
|
|
535
|
-
if self.parametric:
|
|
536
|
-
return self.t_test_independend()
|
|
537
|
-
else:
|
|
538
|
-
return self.mann_whitney_u_test()
|
|
539
|
-
elif self.n_groups == 1:
|
|
540
|
-
if self.parametric:
|
|
541
|
-
return self.t_test_single_sample()
|
|
542
|
-
else:
|
|
543
|
-
return self.wilcoxon_single_sample()
|
|
544
|
-
else:
|
|
545
|
-
if self.paired:
|
|
546
|
-
return self.friedman_test()
|
|
547
|
-
else:
|
|
548
|
-
if self.parametric:
|
|
549
|
-
return self.anova()
|
|
550
|
-
else:
|
|
551
|
-
return self.kruskal_wallis_test()
|
|
552
604
|
|
|
553
605
|
# public methods:
|
|
554
606
|
def RunAuto(self):
|
|
@@ -557,8 +609,11 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
557
609
|
def RunManual(self, test):
|
|
558
610
|
self.__run_test(test)
|
|
559
611
|
|
|
560
|
-
def
|
|
561
|
-
self.__run_test(test='
|
|
612
|
+
def RunOnewayAnova(self):
|
|
613
|
+
self.__run_test(test='anova_1w_ordinary')
|
|
614
|
+
|
|
615
|
+
def RunOnewayAnovaRM(self):
|
|
616
|
+
self.__run_test(test='anova_1w_rm')
|
|
562
617
|
|
|
563
618
|
def RunFriedman(self):
|
|
564
619
|
self.__run_test(test='friedman')
|
|
@@ -570,7 +625,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
570
625
|
self.__run_test(test='mann_whitney')
|
|
571
626
|
|
|
572
627
|
def RunTtest(self):
|
|
573
|
-
self.__run_test(test='
|
|
628
|
+
self.__run_test(test='t_test_independent')
|
|
574
629
|
|
|
575
630
|
def RunTtestPaired(self):
|
|
576
631
|
self.__run_test(test='t_test_paired')
|
|
@@ -603,6 +658,9 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
603
658
|
else:
|
|
604
659
|
return self.summary
|
|
605
660
|
|
|
661
|
+
def GetTestIDs(self):
|
|
662
|
+
return self.test_ids_all
|
|
663
|
+
|
|
606
664
|
def PrintSummary(self):
|
|
607
665
|
print(self.summary)
|
|
608
666
|
|
AutoStatLib/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.2.
|
|
2
|
+
__version__ = "0.2.2"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: AutoStatLib
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: AutoStatLib - a simple statistical analysis tool
|
|
5
5
|
Author: Stemonitis, SciWare LLC
|
|
6
6
|
Author-email: konung-yaropolk <yaropolk1995@gmail.com>
|
|
@@ -531,6 +531,7 @@ License-File: LICENSE
|
|
|
531
531
|
Requires-Dist: numpy
|
|
532
532
|
Requires-Dist: scipy
|
|
533
533
|
Requires-Dist: statsmodels
|
|
534
|
+
Requires-Dist: pandas
|
|
534
535
|
|
|
535
536
|
# AutoStatLib - python library for automated statistical analysis
|
|
536
537
|
|
|
@@ -569,7 +570,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
|
|
|
569
570
|
|
|
570
571
|
|
|
571
572
|
# set the parameters:
|
|
572
|
-
paired = False # is groups
|
|
573
|
+
paired = False # is groups dependent or not
|
|
573
574
|
tails = 2 # two-tailed or one-tailed result
|
|
574
575
|
popmean = 0 # population mean - only for single-sample tests needed
|
|
575
576
|
|
|
@@ -585,7 +586,7 @@ analysis.RunAuto()
|
|
|
585
586
|
|
|
586
587
|
or you can choose specific tests:
|
|
587
588
|
```python
|
|
588
|
-
# 2 groups
|
|
589
|
+
# 2 groups independent:
|
|
589
590
|
analysis.RunTtest()
|
|
590
591
|
analysis.RunMannWhitney()
|
|
591
592
|
|
|
@@ -594,10 +595,11 @@ analysis.RunTtestPaired()
|
|
|
594
595
|
analysis.RunWilcoxon()
|
|
595
596
|
|
|
596
597
|
# 3 and more independed groups comparison:
|
|
597
|
-
analysis.
|
|
598
|
+
analysis.RunOnewayAnova()
|
|
598
599
|
analysis.RunKruskalWallis()
|
|
599
600
|
|
|
600
601
|
# 3 and more depended groups comparison:
|
|
602
|
+
analysis.RunOnewayAnovaRM()
|
|
601
603
|
analysis.RunFriedman()
|
|
602
604
|
|
|
603
605
|
# single group tests"
|
|
@@ -647,20 +649,40 @@ If errors occured, *GetResult()* returns an empty dictionary
|
|
|
647
649
|
|
|
648
650
|
|
|
649
651
|
---
|
|
650
|
-
## Pre-Alpha dev status.
|
|
651
|
-
|
|
652
|
-
### TODO:
|
|
653
|
-
|
|
654
|
-
--Kruskal-Wallis test - add Dunn's multiple comparisons
|
|
655
|
-
--Anova: add 2-way anova and 3-way
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
--
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
--
|
|
663
|
-
--
|
|
652
|
+
## Pre-Alpha dev status.
|
|
653
|
+
|
|
654
|
+
### TODO:
|
|
655
|
+
|
|
656
|
+
-- Kruskal-Wallis test - add Dunn's multiple comparisons
|
|
657
|
+
-- Anova: add 2-way anova and 3-way anova
|
|
658
|
+
-- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
|
|
659
|
+
-- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
|
|
660
|
+
-- paired T-test: add ratio-paired t-test (ratios of paired values are consistent)
|
|
661
|
+
-- add Welch test (for norm data unequal variances)
|
|
662
|
+
-- add Kolmogorov-smirnov test (unpaired nonparametric 2 sample, compare cumulative distributions)
|
|
663
|
+
-- add independent t-test with Welch correction (do not assume equal SDs in groups)
|
|
664
|
+
-- add correlation test, correlation diagram
|
|
665
|
+
-- add linear regression, regression diagram
|
|
666
|
+
-- add QQ plot
|
|
667
|
+
-- n-sample tests: add onetail option
|
|
668
|
+
|
|
669
|
+
✅ done -- detailed normality test results
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
checked tests:
|
|
673
|
+
1-sample:
|
|
674
|
+
--Wilcoxon 2,1 tails - ok
|
|
675
|
+
--t-tests 2,1 tails -ok
|
|
676
|
+
|
|
677
|
+
2-sample:
|
|
678
|
+
--Wilcoxon 2,1 tails - ok
|
|
679
|
+
--Mann-whitney 2,1 tails - ok
|
|
680
|
+
--t-tests 2,1 tails -ok
|
|
681
|
+
|
|
682
|
+
n-sample:
|
|
683
|
+
--Kruskal-Wallis 2 tail - ok
|
|
684
|
+
--Friedman 2 tail - ok
|
|
685
|
+
--one-way ANOWA 2 tail - ok
|
|
664
686
|
|
|
665
687
|
|
|
666
688
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
AutoStatLib/AutoStatLib.py,sha256=lUDNdzH2NdsyGm1jgLvQ1b-PXIyo8SfMApEK4uOQxSg,23479
|
|
2
|
+
AutoStatLib/__init__.py,sha256=0wHYnglzKRPqSHtZlfbMEA2Bj5rDR4LLaXbOrJi-sqM,101
|
|
3
|
+
AutoStatLib/__main__.py,sha256=ROKWensrxDh3Gl-yhexJ-BYFohDSh9y-CuMkaLpmnnQ,247
|
|
4
|
+
AutoStatLib/_version.py,sha256=WbLB15iApm4FvkoTxz3n4t20nHfs58LNdIBr1m1YbxU,53
|
|
5
|
+
AutoStatLib-0.2.2.dist-info/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
|
|
6
|
+
AutoStatLib-0.2.2.dist-info/METADATA,sha256=4Ro1Bo6FsklfwMo-G5N9C--n-7HJA4nMNns6qivu90k,36473
|
|
7
|
+
AutoStatLib-0.2.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
8
|
+
AutoStatLib-0.2.2.dist-info/top_level.txt,sha256=BuHzVyE2andc7RwD_UPmDjLl9CUAyBH6WHZGjaIReUI,12
|
|
9
|
+
AutoStatLib-0.2.2.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
AutoStatLib/AutoStatLib.py,sha256=fSDVReNCcidK7PmKN5r3pUcu-5ZHxf5nVlWiIi44Moo,22414
|
|
2
|
-
AutoStatLib/__init__.py,sha256=0wHYnglzKRPqSHtZlfbMEA2Bj5rDR4LLaXbOrJi-sqM,101
|
|
3
|
-
AutoStatLib/__main__.py,sha256=ROKWensrxDh3Gl-yhexJ-BYFohDSh9y-CuMkaLpmnnQ,247
|
|
4
|
-
AutoStatLib/_version.py,sha256=3NP9JpFOaSsAhLIlSo_w7f117z3XyFjM1ZwmEoVUPl0,53
|
|
5
|
-
AutoStatLib-0.2.0.dist-info/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
|
|
6
|
-
AutoStatLib-0.2.0.dist-info/METADATA,sha256=h7sUItDh2vtQTdjubf9rfMWL5mQazlkSy3gHIold3Ts,35547
|
|
7
|
-
AutoStatLib-0.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
8
|
-
AutoStatLib-0.2.0.dist-info/top_level.txt,sha256=BuHzVyE2andc7RwD_UPmDjLl9CUAyBH6WHZGjaIReUI,12
|
|
9
|
-
AutoStatLib-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|