AutoStatLib 0.2.1__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AutoStatLib might be problematic. Click here for more details.
- AutoStatLib/AutoStatLib.py +31 -463
- AutoStatLib/_version.py +1 -1
- AutoStatLib/helpers.py +77 -0
- AutoStatLib/normality_tests.py +85 -0
- AutoStatLib/statistical_tests.py +173 -0
- AutoStatLib/text_formatting.py +98 -0
- {AutoStatLib-0.2.1.dist-info → AutoStatLib-0.2.5.dist-info}/METADATA +40 -16
- AutoStatLib-0.2.5.dist-info/RECORD +13 -0
- AutoStatLib-0.2.1.dist-info/RECORD +0 -9
- {AutoStatLib-0.2.1.dist-info → AutoStatLib-0.2.5.dist-info}/LICENSE +0 -0
- {AutoStatLib-0.2.1.dist-info → AutoStatLib-0.2.5.dist-info}/WHEEL +0 -0
- {AutoStatLib-0.2.1.dist-info → AutoStatLib-0.2.5.dist-info}/top_level.txt +0 -0
AutoStatLib/AutoStatLib.py
CHANGED
|
@@ -1,397 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare, shapiro, anderson, normaltest
|
|
1
|
+
from AutoStatLib.statistical_tests import StatisticalTests
|
|
2
|
+
from AutoStatLib.normality_tests import NormalityTests
|
|
3
|
+
from AutoStatLib.helpers import Helpers
|
|
4
|
+
from AutoStatLib.text_formatting import TextFormatting
|
|
6
5
|
|
|
7
6
|
|
|
8
|
-
class
|
|
9
|
-
'''
|
|
10
|
-
Statistical tests mixin
|
|
11
|
-
'''
|
|
12
|
-
|
|
13
|
-
def anova_1w_ordinary(self):
|
|
14
|
-
stat, p_value = f_oneway(*self.data)
|
|
15
|
-
self.tails = 2
|
|
16
|
-
# if self.tails == 1 and p_value > 0.5:
|
|
17
|
-
# p_value /= 2
|
|
18
|
-
# if self.tails == 1:
|
|
19
|
-
# p_value /= 2
|
|
20
|
-
self.test_name = 'Ordinary One-Way ANOVA'
|
|
21
|
-
self.test_id = 'anova_1w_ordinary'
|
|
22
|
-
self.paired = False
|
|
23
|
-
self.test_stat = stat
|
|
24
|
-
self.p_value = p_value
|
|
25
|
-
|
|
26
|
-
def anova_1w_rm(self):
|
|
27
|
-
"""
|
|
28
|
-
Perform repeated measures one-way ANOVA test.
|
|
29
|
-
|
|
30
|
-
Parameters:
|
|
31
|
-
data: list of lists, where each sublist represents repeated measures for a subject
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
df = self.matrix_to_dataframe(self.data)
|
|
35
|
-
res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
|
|
36
|
-
f_stat = res.anova_table['F Value'][0]
|
|
37
|
-
p_value = res.anova_table['Pr > F'][0]
|
|
38
|
-
|
|
39
|
-
self.tails = 2
|
|
40
|
-
self.test_name = 'Repeated Measures One-Way ANOVA'
|
|
41
|
-
self.test_id = 'anova_1w_rm'
|
|
42
|
-
self.paired = True
|
|
43
|
-
self.test_stat = f_stat
|
|
44
|
-
self.p_value = p_value
|
|
45
|
-
|
|
46
|
-
def friedman_test(self):
|
|
47
|
-
stat, p_value = friedmanchisquare(*self.data)
|
|
48
|
-
self.tails = 2
|
|
49
|
-
self.test_name = 'Friedman test'
|
|
50
|
-
self.test_id = 'friedman'
|
|
51
|
-
self.paired = True
|
|
52
|
-
self.test_stat = stat
|
|
53
|
-
self.p_value = p_value
|
|
54
|
-
|
|
55
|
-
def kruskal_wallis_test(self):
|
|
56
|
-
stat, p_value = kruskal(*self.data)
|
|
57
|
-
self.test_name = 'Kruskal-Wallis test'
|
|
58
|
-
self.test_id = 'kruskal_wallis'
|
|
59
|
-
self.paired = False
|
|
60
|
-
self.test_stat = stat
|
|
61
|
-
self.p_value = p_value
|
|
62
|
-
|
|
63
|
-
def mann_whitney_u_test(self):
|
|
64
|
-
stat, p_value = mannwhitneyu(
|
|
65
|
-
self.data[0], self.data[1], alternative='two-sided')
|
|
66
|
-
if self.tails == 1:
|
|
67
|
-
p_value /= 2
|
|
68
|
-
# alternative method of one-tailed calculation
|
|
69
|
-
# gives the same result:
|
|
70
|
-
# stat, p_value = mannwhitneyu(
|
|
71
|
-
# self.data[0], self.data[1], alternative='two-sided' if self.tails == 2 else 'less')
|
|
72
|
-
# if self.tails == 1 and p_value > 0.5:
|
|
73
|
-
# p_value = 1-p_value
|
|
74
|
-
|
|
75
|
-
self.test_name = 'Mann-Whitney U test'
|
|
76
|
-
self.test_id = 'mann_whitney'
|
|
77
|
-
self.paired = False
|
|
78
|
-
self.test_stat = stat
|
|
79
|
-
self.p_value = p_value
|
|
80
|
-
|
|
81
|
-
def t_test_independent(self):
|
|
82
|
-
t_stat, t_p_value = ttest_ind(
|
|
83
|
-
self.data[0], self.data[1])
|
|
84
|
-
if self.tails == 1:
|
|
85
|
-
t_p_value /= 2
|
|
86
|
-
self.test_name = 't-test for independent samples'
|
|
87
|
-
self.test_id = 't_test_independent'
|
|
88
|
-
self.paired = False
|
|
89
|
-
self.test_stat = t_stat
|
|
90
|
-
self.p_value = t_p_value
|
|
91
|
-
|
|
92
|
-
def t_test_paired(self):
|
|
93
|
-
t_stat, t_p_value = ttest_rel(
|
|
94
|
-
self.data[0], self.data[1])
|
|
95
|
-
if self.tails == 1:
|
|
96
|
-
t_p_value /= 2
|
|
97
|
-
self.test_name = 't-test for paired samples'
|
|
98
|
-
self.test_id = 't_test_paired'
|
|
99
|
-
self.paired = True
|
|
100
|
-
self.test_stat = t_stat
|
|
101
|
-
self.p_value = t_p_value
|
|
102
|
-
|
|
103
|
-
def t_test_single_sample(self):
|
|
104
|
-
if self.popmean == None:
|
|
105
|
-
self.popmean = 0
|
|
106
|
-
self.AddWarning('no_pop_mean_set')
|
|
107
|
-
t_stat, t_p_value = ttest_1samp(self.data[0], self.popmean)
|
|
108
|
-
if self.tails == 1:
|
|
109
|
-
t_p_value /= 2
|
|
110
|
-
self.test_name = 'Single-sample t-test'
|
|
111
|
-
self.test_id = 't_test_single_sample'
|
|
112
|
-
self.paired = False
|
|
113
|
-
self.test_stat = t_stat
|
|
114
|
-
self.p_value = t_p_value
|
|
115
|
-
|
|
116
|
-
def wilcoxon_single_sample(self):
|
|
117
|
-
if self.popmean == None:
|
|
118
|
-
self.popmean = 0
|
|
119
|
-
self.AddWarning('no_pop_mean_set')
|
|
120
|
-
data = [i - self.popmean for i in self.data[0]]
|
|
121
|
-
w_stat, p_value = wilcoxon(data)
|
|
122
|
-
if self.tails == 1:
|
|
123
|
-
p_value /= 2
|
|
124
|
-
self.test_name = 'Wilcoxon signed-rank test for single sample'
|
|
125
|
-
self.test_id = 'wilcoxon_single_sample'
|
|
126
|
-
self.paired = False
|
|
127
|
-
self.test_stat = w_stat
|
|
128
|
-
self.p_value = p_value
|
|
129
|
-
|
|
130
|
-
def wilcoxon(self):
|
|
131
|
-
stat, p_value = wilcoxon(self.data[0], self.data[1])
|
|
132
|
-
if self.tails == 1:
|
|
133
|
-
p_value /= 2
|
|
134
|
-
self.test_name = 'Wilcoxon signed-rank test'
|
|
135
|
-
self.test_id = 'wilcoxon'
|
|
136
|
-
self.paired = True
|
|
137
|
-
self.test_stat = stat
|
|
138
|
-
self.p_value = p_value
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
class __NormalityTests():
|
|
142
|
-
'''
|
|
143
|
-
Normality tests mixin
|
|
144
|
-
|
|
145
|
-
see the article about minimal sample size for tests:
|
|
146
|
-
Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
|
|
147
|
-
Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
|
|
148
|
-
'''
|
|
149
|
-
|
|
150
|
-
def check_normality(self, data):
|
|
151
|
-
sw = None
|
|
152
|
-
lf = None
|
|
153
|
-
ad = None
|
|
154
|
-
ap = None
|
|
155
|
-
n = len(data)
|
|
156
|
-
|
|
157
|
-
# Shapiro-Wilk test
|
|
158
|
-
sw_stat, sw_p_value = shapiro(data)
|
|
159
|
-
if sw_p_value > 0.05:
|
|
160
|
-
sw = True
|
|
161
|
-
else:
|
|
162
|
-
sw = False
|
|
163
|
-
|
|
164
|
-
# Lilliefors test
|
|
165
|
-
lf_stat, lf_p_value = lilliefors(
|
|
166
|
-
data, dist='norm')
|
|
167
|
-
if lf_p_value > 0.05:
|
|
168
|
-
lf = True
|
|
169
|
-
else:
|
|
170
|
-
lf = False
|
|
171
|
-
|
|
172
|
-
# Anderson-Darling test
|
|
173
|
-
if n >= 20:
|
|
174
|
-
ad_stat, ad_p_value = self.anderson_get_p(
|
|
175
|
-
data, dist='norm')
|
|
176
|
-
if ad_p_value > 0.05:
|
|
177
|
-
ad = True
|
|
178
|
-
else:
|
|
179
|
-
ad = False
|
|
180
|
-
|
|
181
|
-
# D'Agostino-Pearson test
|
|
182
|
-
# test result is skewed if n<20
|
|
183
|
-
if n >= 20:
|
|
184
|
-
ap_stat, ap_p_value = normaltest(data)
|
|
185
|
-
if ap_p_value > 0.05:
|
|
186
|
-
ap = True
|
|
187
|
-
else:
|
|
188
|
-
ap = False
|
|
189
|
-
|
|
190
|
-
# print(ap_p_value, ad_p_value, sw_p_value, lf_p_value)
|
|
191
|
-
|
|
192
|
-
return (sw, lf, ad, ap)
|
|
193
|
-
|
|
194
|
-
def anderson_get_p(self, data, dist='norm'):
|
|
195
|
-
'''
|
|
196
|
-
calculating p-value for Anderson-Darling test using the method described here:
|
|
197
|
-
Computation of Probability Associated with Anderson-Darling Statistic
|
|
198
|
-
Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
|
|
199
|
-
|
|
200
|
-
'''
|
|
201
|
-
e = 2.718281828459045
|
|
202
|
-
n = len(data)
|
|
203
|
-
|
|
204
|
-
ad, critical_values, significance_levels = anderson(
|
|
205
|
-
data, dist=dist)
|
|
206
|
-
|
|
207
|
-
# adjust ad_stat for small sample sizes:
|
|
208
|
-
s = ad*(1 + 0.75/n + 2.25/(n**2))
|
|
209
|
-
|
|
210
|
-
if s >= 0.6:
|
|
211
|
-
p = e**(1.2937 - 5.709*s + 0.0186*s**2)
|
|
212
|
-
elif s > 0.34:
|
|
213
|
-
p = e**(0.9177 - 4.279*s - 1.38*s**2)
|
|
214
|
-
elif s > 0.2:
|
|
215
|
-
p = 1 - e**(-8.318 + 42.796*s - 59.938*s**2)
|
|
216
|
-
elif s <= 0.2:
|
|
217
|
-
p = 1 - e**(-13.436 + 101.14*s - 223.73*s**2)
|
|
218
|
-
else:
|
|
219
|
-
p = None
|
|
220
|
-
|
|
221
|
-
return ad, p
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
class __Helpers():
|
|
225
|
-
|
|
226
|
-
def matrix_to_dataframe(self, matrix):
|
|
227
|
-
data = []
|
|
228
|
-
cols = []
|
|
229
|
-
rows = []
|
|
230
|
-
|
|
231
|
-
order_number = 1
|
|
232
|
-
for i, row in enumerate(matrix):
|
|
233
|
-
for j, value in enumerate(row):
|
|
234
|
-
data.append(value)
|
|
235
|
-
cols.append(i)
|
|
236
|
-
rows.append(j)
|
|
237
|
-
order_number += 1
|
|
238
|
-
|
|
239
|
-
df = pd.DataFrame(
|
|
240
|
-
{'Row': rows, 'Col': cols, 'Value': data})
|
|
241
|
-
return df
|
|
242
|
-
|
|
243
|
-
def create_results_dict(self) -> dict:
|
|
244
|
-
|
|
245
|
-
self.stars_int = self.make_stars()
|
|
246
|
-
self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
|
|
247
|
-
|
|
248
|
-
return {
|
|
249
|
-
'p-value': self.make_p_value_printed(),
|
|
250
|
-
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
251
|
-
'Stars_Printed': self.stars_str,
|
|
252
|
-
'Test_Name': self.test_name,
|
|
253
|
-
'Groups_Compared': self.n_groups,
|
|
254
|
-
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
255
|
-
'Data_Normaly_Distributed': self.parametric,
|
|
256
|
-
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
257
|
-
'Paired_Test_Applied': self.paired,
|
|
258
|
-
'Tails': self.tails,
|
|
259
|
-
'p-value_exact': self.p_value.item(),
|
|
260
|
-
'Stars': self.stars_int,
|
|
261
|
-
# 'Stat_Value': self.test_stat.item(),
|
|
262
|
-
'Warnings': self.warnings,
|
|
263
|
-
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
264
|
-
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
265
|
-
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
266
|
-
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
267
|
-
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
|
|
268
|
-
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
269
|
-
'Samples': self.data,
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
def log(self, *args, **kwargs):
|
|
273
|
-
message = ' '.join(map(str, args))
|
|
274
|
-
# print(message, **kwargs)
|
|
275
|
-
self.summary += '\n' + message
|
|
276
|
-
|
|
277
|
-
def AddWarning(self, warning_id):
|
|
278
|
-
message = self.warning_ids_all[warning_id]
|
|
279
|
-
self.log(message)
|
|
280
|
-
self.warnings.append(message)
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
class __TextFormatting():
|
|
284
|
-
'''
|
|
285
|
-
Text formatting mixin
|
|
286
|
-
'''
|
|
287
|
-
|
|
288
|
-
def autospace(self, elements_list, space, delimiter=' ') -> str:
|
|
289
|
-
output = ''
|
|
290
|
-
for i, element in enumerate(elements_list):
|
|
291
|
-
if i == len(elements_list):
|
|
292
|
-
output += element
|
|
293
|
-
else:
|
|
294
|
-
output += element + (space-len(element))*delimiter
|
|
295
|
-
return output
|
|
296
|
-
|
|
297
|
-
def print_groups(self, space=24, max_length=15):
|
|
298
|
-
self.log('')
|
|
299
|
-
# Get the number of groups (rows) and the maximum length of rows
|
|
300
|
-
data = self.data
|
|
301
|
-
num_groups = len(data)
|
|
302
|
-
group_longest = max(len(row) for row in data)
|
|
303
|
-
|
|
304
|
-
# Print the header
|
|
305
|
-
header = [f'Group {i+1}' for i in range(num_groups)]
|
|
306
|
-
line = [''*7]
|
|
307
|
-
self.log(self.autospace(header, space))
|
|
308
|
-
self.log(self.autospace(line, space))
|
|
309
|
-
|
|
310
|
-
# Print each column with a placeholder if longer than max_length
|
|
311
|
-
for i in range(group_longest):
|
|
312
|
-
row_values = []
|
|
313
|
-
all_values_empty = True
|
|
314
|
-
for row in data:
|
|
315
|
-
if len(row) > max_length:
|
|
316
|
-
if i < max_length:
|
|
317
|
-
row_values.append(str(row[i]))
|
|
318
|
-
all_values_empty = False
|
|
319
|
-
elif i == max_length:
|
|
320
|
-
row_values.append(f'[{len(row) - max_length} more]')
|
|
321
|
-
all_values_empty = False
|
|
322
|
-
else:
|
|
323
|
-
continue
|
|
324
|
-
else:
|
|
325
|
-
if i < len(row):
|
|
326
|
-
row_values.append(str(row[i]))
|
|
327
|
-
all_values_empty = False
|
|
328
|
-
else:
|
|
329
|
-
row_values.append('')
|
|
330
|
-
if all_values_empty:
|
|
331
|
-
break
|
|
332
|
-
self.log(self.autospace(row_values, space))
|
|
333
|
-
|
|
334
|
-
def make_stars(self) -> int:
|
|
335
|
-
p = self.p_value.item()
|
|
336
|
-
if p is not None:
|
|
337
|
-
if p < 0.0001:
|
|
338
|
-
return 4
|
|
339
|
-
if p < 0.001:
|
|
340
|
-
return 3
|
|
341
|
-
elif p < 0.01:
|
|
342
|
-
return 2
|
|
343
|
-
elif p < 0.05:
|
|
344
|
-
return 1
|
|
345
|
-
else:
|
|
346
|
-
return 0
|
|
347
|
-
return 0
|
|
348
|
-
|
|
349
|
-
def make_p_value_printed(self) -> str:
|
|
350
|
-
p = self.p_value.item()
|
|
351
|
-
if p is not None:
|
|
352
|
-
if p > 0.99:
|
|
353
|
-
return 'p>0.99'
|
|
354
|
-
elif p >= 0.01:
|
|
355
|
-
return f'p={p:.2g}'
|
|
356
|
-
elif p >= 0.001:
|
|
357
|
-
return f'p={p:.2g}'
|
|
358
|
-
elif p >= 0.0001:
|
|
359
|
-
return f'p={p:.1g}'
|
|
360
|
-
elif p < 0.0001:
|
|
361
|
-
return 'p<0.0001'
|
|
362
|
-
else:
|
|
363
|
-
return 'N/A'
|
|
364
|
-
return 'N/A'
|
|
365
|
-
|
|
366
|
-
def print_results(self):
|
|
367
|
-
self.log('\n\nResults: \n')
|
|
368
|
-
for i in self.results:
|
|
369
|
-
shift = 27 - len(i)
|
|
370
|
-
if i == 'Warnings':
|
|
371
|
-
self.log(i, ':', ' ' * shift, len(self.results[i]))
|
|
372
|
-
elif i == 'Samples':
|
|
373
|
-
pass
|
|
374
|
-
else:
|
|
375
|
-
self.log(i, ':', ' ' * shift, self.results[i])
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
class __InputFormatting():
|
|
379
|
-
def floatify_recursive(self, data):
|
|
380
|
-
if isinstance(data, list):
|
|
381
|
-
# Recursively process sublists and filter out None values
|
|
382
|
-
processed_list = [self.floatify_recursive(item) for item in data]
|
|
383
|
-
return [item for item in processed_list if item is not None]
|
|
384
|
-
else:
|
|
385
|
-
try:
|
|
386
|
-
# Try to convert the item to float
|
|
387
|
-
return np.float64(data)
|
|
388
|
-
except (ValueError, TypeError):
|
|
389
|
-
# If conversion fails, replace with None
|
|
390
|
-
self.warning_flag_non_numeric_data = True
|
|
391
|
-
return None
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting, __Helpers):
|
|
7
|
+
class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Helpers):
|
|
395
8
|
'''
|
|
396
9
|
The main class
|
|
397
10
|
*documentation placeholder*
|
|
@@ -403,6 +16,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
403
16
|
paired=False,
|
|
404
17
|
tails=2,
|
|
405
18
|
popmean=None,
|
|
19
|
+
posthoc=False,
|
|
406
20
|
verbose=True):
|
|
407
21
|
self.results = None
|
|
408
22
|
self.error = False
|
|
@@ -410,6 +24,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
410
24
|
self.paired = paired
|
|
411
25
|
self.tails = tails
|
|
412
26
|
self.popmean = popmean
|
|
27
|
+
self.posthoc = posthoc
|
|
413
28
|
self.verbose = verbose
|
|
414
29
|
self.n_groups = len(self.groups_list)
|
|
415
30
|
self.warning_flag_non_numeric_data = False
|
|
@@ -464,7 +79,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
464
79
|
'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
|
|
465
80
|
}
|
|
466
81
|
|
|
467
|
-
def
|
|
82
|
+
def run_test(self, test='auto'):
|
|
468
83
|
|
|
469
84
|
# reset values from previous tests
|
|
470
85
|
self.results = None
|
|
@@ -475,9 +90,11 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
475
90
|
self.test_id = None
|
|
476
91
|
self.test_stat = None
|
|
477
92
|
self.p_value = None
|
|
93
|
+
self.posthoc_matrix_df = None
|
|
94
|
+
self.posthoc_matrix = []
|
|
478
95
|
|
|
479
96
|
self.log('\n' + '-'*67)
|
|
480
|
-
self.log('Statistical analysis
|
|
97
|
+
self.log('Statistical analysis __init__iated for data in {} groups\n'.format(
|
|
481
98
|
len(self.groups_list)))
|
|
482
99
|
|
|
483
100
|
# adjusting input data type
|
|
@@ -550,29 +167,13 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
550
167
|
if not test == 'auto' and self.parametric and not test in self.test_ids_parametric:
|
|
551
168
|
self.AddWarning('non-param_test_with_normal_data')
|
|
552
169
|
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
self.
|
|
557
|
-
elif test == 'friedman':
|
|
558
|
-
self.friedman_test()
|
|
559
|
-
elif test == 'kruskal_wallis':
|
|
560
|
-
self.kruskal_wallis_test()
|
|
561
|
-
elif test == 'mann_whitney':
|
|
562
|
-
self.mann_whitney_u_test()
|
|
563
|
-
elif test == 't_test_independent':
|
|
564
|
-
self.t_test_independent()
|
|
565
|
-
elif test == 't_test_paired':
|
|
566
|
-
self.t_test_paired()
|
|
567
|
-
elif test == 't_test_single_sample':
|
|
568
|
-
self.t_test_single_sample()
|
|
569
|
-
elif test == 'wilcoxon':
|
|
570
|
-
self.wilcoxon()
|
|
571
|
-
elif test == 'wilcoxon_single_sample':
|
|
572
|
-
self.wilcoxon_single_sample()
|
|
170
|
+
# run the test
|
|
171
|
+
|
|
172
|
+
if test in self.test_ids_all:
|
|
173
|
+
self.run_test_by_id(test)
|
|
573
174
|
else:
|
|
574
|
-
self.
|
|
575
|
-
|
|
175
|
+
self.run_test_auto()
|
|
176
|
+
|
|
576
177
|
|
|
577
178
|
# print the results
|
|
578
179
|
self.results = self.create_results_dict()
|
|
@@ -585,82 +186,49 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
585
186
|
if self.verbose == True:
|
|
586
187
|
print(self.summary)
|
|
587
188
|
|
|
588
|
-
def __auto(self):
|
|
589
|
-
|
|
590
|
-
if self.n_groups == 1:
|
|
591
|
-
if self.parametric:
|
|
592
|
-
return self.t_test_single_sample()
|
|
593
|
-
else:
|
|
594
|
-
return self.wilcoxon_single_sample()
|
|
595
|
-
|
|
596
|
-
elif self.n_groups == 2:
|
|
597
|
-
if self.paired:
|
|
598
|
-
if self.parametric:
|
|
599
|
-
return self.t_test_paired()
|
|
600
|
-
else:
|
|
601
|
-
return self.wilcoxon()
|
|
602
|
-
else:
|
|
603
|
-
if self.parametric:
|
|
604
|
-
return self.t_test_independent()
|
|
605
|
-
else:
|
|
606
|
-
return self.mann_whitney_u_test()
|
|
607
|
-
|
|
608
|
-
elif self.n_groups >= 3:
|
|
609
|
-
if self.paired:
|
|
610
|
-
if self.parametric:
|
|
611
|
-
return self.anova_1w_rm()
|
|
612
|
-
else:
|
|
613
|
-
return self.friedman_test()
|
|
614
|
-
else:
|
|
615
|
-
if self.parametric:
|
|
616
|
-
return self.anova_1w_ordinary()
|
|
617
|
-
else:
|
|
618
|
-
return self.kruskal_wallis_test()
|
|
619
189
|
|
|
620
|
-
else:
|
|
621
|
-
pass
|
|
622
190
|
|
|
623
191
|
# public methods:
|
|
624
192
|
def RunAuto(self):
|
|
625
|
-
self.
|
|
193
|
+
self.run_test(test='auto')
|
|
626
194
|
|
|
627
195
|
def RunManual(self, test):
|
|
628
|
-
self.
|
|
196
|
+
self.run_test(test)
|
|
629
197
|
|
|
630
198
|
def RunOnewayAnova(self):
|
|
631
|
-
self.
|
|
199
|
+
self.run_test(test='anova_1w_ordinary')
|
|
632
200
|
|
|
633
201
|
def RunOnewayAnovaRM(self):
|
|
634
|
-
self.
|
|
202
|
+
self.run_test(test='anova_1w_rm')
|
|
635
203
|
|
|
636
204
|
def RunFriedman(self):
|
|
637
|
-
self.
|
|
205
|
+
self.run_test(test='friedman')
|
|
638
206
|
|
|
639
207
|
def RunKruskalWallis(self):
|
|
640
|
-
self.
|
|
208
|
+
self.run_test(test='kruskal_wallis')
|
|
641
209
|
|
|
642
210
|
def RunMannWhitney(self):
|
|
643
|
-
self.
|
|
211
|
+
self.run_test(test='mann_whitney')
|
|
644
212
|
|
|
645
213
|
def RunTtest(self):
|
|
646
|
-
self.
|
|
214
|
+
self.run_test(test='t_test_independent')
|
|
647
215
|
|
|
648
216
|
def RunTtestPaired(self):
|
|
649
|
-
self.
|
|
217
|
+
self.run_test(test='t_test_paired')
|
|
650
218
|
|
|
651
219
|
def RunTtestSingleSample(self):
|
|
652
|
-
self.
|
|
220
|
+
self.run_test(test='t_test_single_sample')
|
|
653
221
|
|
|
654
222
|
def RunWilcoxonSingleSample(self):
|
|
655
|
-
self.
|
|
223
|
+
self.run_test(test='wilcoxon_single_sample')
|
|
656
224
|
|
|
657
225
|
def RunWilcoxon(self):
|
|
658
|
-
self.
|
|
226
|
+
self.run_test(test='wilcoxon')
|
|
659
227
|
|
|
660
228
|
def GetResult(self):
|
|
661
229
|
if not self.results and not self.error:
|
|
662
230
|
print('No test chosen, no results to output')
|
|
663
|
-
# self.
|
|
231
|
+
# self.run_test(test='auto')
|
|
664
232
|
return self.results
|
|
665
233
|
if not self.results and self.error:
|
|
666
234
|
print('Error occured, no results to output')
|
|
@@ -671,7 +239,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
671
239
|
def GetSummary(self):
|
|
672
240
|
if not self.results and not self.error:
|
|
673
241
|
print('No test chosen, no summary to output')
|
|
674
|
-
# self.
|
|
242
|
+
# self.run_test(test='auto')
|
|
675
243
|
return self.summary
|
|
676
244
|
else:
|
|
677
245
|
return self.summary
|
AutoStatLib/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.2.
|
|
2
|
+
__version__ = "0.2.5"
|
AutoStatLib/helpers.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
class Helpers():
|
|
5
|
+
|
|
6
|
+
def matrix_to_dataframe(self, matrix):
|
|
7
|
+
data = []
|
|
8
|
+
cols = []
|
|
9
|
+
rows = []
|
|
10
|
+
|
|
11
|
+
order_number = 1
|
|
12
|
+
for i, row in enumerate(matrix):
|
|
13
|
+
for j, value in enumerate(row):
|
|
14
|
+
data.append(value)
|
|
15
|
+
cols.append(i)
|
|
16
|
+
rows.append(j)
|
|
17
|
+
order_number += 1
|
|
18
|
+
|
|
19
|
+
df = pd.DataFrame(
|
|
20
|
+
{'Row': rows, 'Col': cols, 'Value': data})
|
|
21
|
+
return df
|
|
22
|
+
|
|
23
|
+
def floatify_recursive(self, data):
|
|
24
|
+
if isinstance(data, list):
|
|
25
|
+
# Recursively process sublists and filter out None values
|
|
26
|
+
processed_list = [self.floatify_recursive(item) for item in data]
|
|
27
|
+
return [item for item in processed_list if item is not None]
|
|
28
|
+
else:
|
|
29
|
+
try:
|
|
30
|
+
# Try to convert the item to float
|
|
31
|
+
return np.float64(data)
|
|
32
|
+
except (ValueError, TypeError):
|
|
33
|
+
# If conversion fails, replace with None
|
|
34
|
+
self.warning_flag_non_numeric_data = True
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
def create_results_dict(self) -> dict:
|
|
38
|
+
|
|
39
|
+
self.stars_int = self.make_stars(self.p_value.item())
|
|
40
|
+
self.stars_str = self.make_stars_printed(self.stars_int)
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
'p-value': self.make_p_value_printed(self.p_value.item()),
|
|
44
|
+
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
45
|
+
'Stars_Printed': self.stars_str,
|
|
46
|
+
'Test_Name': self.test_name,
|
|
47
|
+
'Groups_Compared': self.n_groups,
|
|
48
|
+
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
49
|
+
'Data_Normaly_Distributed': self.parametric,
|
|
50
|
+
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
51
|
+
'Paired_Test_Applied': self.paired,
|
|
52
|
+
'Tails': self.tails,
|
|
53
|
+
'p-value_exact': self.p_value.item(),
|
|
54
|
+
'Stars': self.stars_int,
|
|
55
|
+
# 'Stat_Value': self.test_stat.item(),
|
|
56
|
+
'Warnings': self.warnings,
|
|
57
|
+
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
58
|
+
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
59
|
+
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
60
|
+
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
61
|
+
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
|
|
62
|
+
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
63
|
+
'Samples': self.data,
|
|
64
|
+
'Posthoc_Matrix': self.posthoc_matrix if self.posthoc_matrix else 'N/A',
|
|
65
|
+
'Posthoc_Matrix_printed': [[self.make_p_value_printed(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else 'N/A',
|
|
66
|
+
'Posthoc_Matrix_stars': [[self.make_stars_printed(self.make_stars(element)) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else 'N/A',
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
def log(self, *args, **kwargs):
|
|
70
|
+
message = ' '.join(map(str, args))
|
|
71
|
+
# print(message, **kwargs)
|
|
72
|
+
self.summary += '\n' + message
|
|
73
|
+
|
|
74
|
+
def AddWarning(self, warning_id):
|
|
75
|
+
message = self.warning_ids_all[warning_id]
|
|
76
|
+
self.log(message)
|
|
77
|
+
self.warnings.append(message)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from statsmodels.stats.diagnostic import lilliefors
|
|
2
|
+
from scipy.stats import shapiro, normaltest, anderson
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class NormalityTests():
|
|
6
|
+
'''
|
|
7
|
+
Normality tests mixin
|
|
8
|
+
|
|
9
|
+
see the article about minimal sample size for tests:
|
|
10
|
+
Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
|
|
11
|
+
Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
def check_normality(self, data):
|
|
15
|
+
sw = None
|
|
16
|
+
lf = None
|
|
17
|
+
ad = None
|
|
18
|
+
ap = None
|
|
19
|
+
n = len(data)
|
|
20
|
+
|
|
21
|
+
# Shapiro-Wilk test
|
|
22
|
+
sw_stat, sw_p_value = shapiro(data)
|
|
23
|
+
if sw_p_value > 0.05:
|
|
24
|
+
sw = True
|
|
25
|
+
else:
|
|
26
|
+
sw = False
|
|
27
|
+
|
|
28
|
+
# Lilliefors test
|
|
29
|
+
lf_stat, lf_p_value = lilliefors(
|
|
30
|
+
data, dist='norm')
|
|
31
|
+
if lf_p_value > 0.05:
|
|
32
|
+
lf = True
|
|
33
|
+
else:
|
|
34
|
+
lf = False
|
|
35
|
+
|
|
36
|
+
# Anderson-Darling test
|
|
37
|
+
if n >= 20:
|
|
38
|
+
ad_stat, ad_p_value = self.anderson_get_p(
|
|
39
|
+
data, dist='norm')
|
|
40
|
+
if ad_p_value > 0.05:
|
|
41
|
+
ad = True
|
|
42
|
+
else:
|
|
43
|
+
ad = False
|
|
44
|
+
|
|
45
|
+
# D'Agostino-Pearson test
|
|
46
|
+
# test result is skewed if n<20
|
|
47
|
+
if n >= 20:
|
|
48
|
+
ap_stat, ap_p_value = normaltest(data)
|
|
49
|
+
if ap_p_value > 0.05:
|
|
50
|
+
ap = True
|
|
51
|
+
else:
|
|
52
|
+
ap = False
|
|
53
|
+
|
|
54
|
+
# print(ap_p_value, ad_p_value, sw_p_value, lf_p_value)
|
|
55
|
+
|
|
56
|
+
return (sw, lf, ad, ap)
|
|
57
|
+
|
|
58
|
+
def anderson_get_p(self, data, dist='norm'):
|
|
59
|
+
'''
|
|
60
|
+
calculating p-value for Anderson-Darling test using the method described here:
|
|
61
|
+
Computation of Probability Associated with Anderson-Darling Statistic
|
|
62
|
+
Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
|
|
63
|
+
|
|
64
|
+
'''
|
|
65
|
+
e = 2.718281828459045
|
|
66
|
+
n = len(data)
|
|
67
|
+
|
|
68
|
+
ad, critical_values, significance_levels = anderson(
|
|
69
|
+
data, dist=dist)
|
|
70
|
+
|
|
71
|
+
# adjust ad_stat for small sample sizes:
|
|
72
|
+
s = ad*(1 + 0.75/n + 2.25/(n**2))
|
|
73
|
+
|
|
74
|
+
if s >= 0.6:
|
|
75
|
+
p = e**(1.2937 - 5.709*s + 0.0186*s**2)
|
|
76
|
+
elif s > 0.34:
|
|
77
|
+
p = e**(0.9177 - 4.279*s - 1.38*s**2)
|
|
78
|
+
elif s > 0.2:
|
|
79
|
+
p = 1 - e**(-8.318 + 42.796*s - 59.938*s**2)
|
|
80
|
+
elif s <= 0.2:
|
|
81
|
+
p = 1 - e**(-13.436 + 101.14*s - 223.73*s**2)
|
|
82
|
+
else:
|
|
83
|
+
p = None
|
|
84
|
+
|
|
85
|
+
return ad, p
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import scikit_posthocs as sp
|
|
3
|
+
from statsmodels.stats.anova import AnovaRM
|
|
4
|
+
from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StatisticalTests():
|
|
9
|
+
'''
|
|
10
|
+
Statistical tests mixin
|
|
11
|
+
'''
|
|
12
|
+
|
|
13
|
+
def run_test_auto(self):
|
|
14
|
+
|
|
15
|
+
if self.n_groups == 1:
|
|
16
|
+
if self.parametric:
|
|
17
|
+
self.run_test_by_id('t_test_single_sample')
|
|
18
|
+
else:
|
|
19
|
+
self.run_test_by_id('wilcoxon_single_sample')
|
|
20
|
+
|
|
21
|
+
elif self.n_groups == 2:
|
|
22
|
+
if self.paired:
|
|
23
|
+
if self.parametric:
|
|
24
|
+
self.run_test_by_id('t_test_paired')
|
|
25
|
+
else:
|
|
26
|
+
self.run_test_by_id('wilcoxon')
|
|
27
|
+
else:
|
|
28
|
+
if self.parametric:
|
|
29
|
+
self.run_test_by_id('t_test_independent')
|
|
30
|
+
else:
|
|
31
|
+
self.run_test_by_id('mann_whitney')
|
|
32
|
+
|
|
33
|
+
elif self.n_groups >= 3:
|
|
34
|
+
if self.paired:
|
|
35
|
+
if self.parametric:
|
|
36
|
+
self.run_test_by_id('anova_1w_rm')
|
|
37
|
+
else:
|
|
38
|
+
self.run_test_by_id('friedman')
|
|
39
|
+
else:
|
|
40
|
+
if self.parametric:
|
|
41
|
+
self.run_test_by_id('anova_1w_ordinary')
|
|
42
|
+
else:
|
|
43
|
+
self.run_test_by_id('kruskal_wallis')
|
|
44
|
+
|
|
45
|
+
else:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
def run_test_by_id(self, test_id):
|
|
49
|
+
|
|
50
|
+
test_names_dict = {
|
|
51
|
+
'anova_1w_ordinary': 'Ordinary One-Way ANOVA',
|
|
52
|
+
'anova_1w_rm': 'Repeated Measures One-Way ANOVA',
|
|
53
|
+
'friedman': 'Friedman test',
|
|
54
|
+
'kruskal_wallis': 'Kruskal-Wallis test',
|
|
55
|
+
'mann_whitney': 'Mann-Whitney U test',
|
|
56
|
+
't_test_independent': 't-test for independent samples',
|
|
57
|
+
't_test_paired': 't-test for paired samples',
|
|
58
|
+
't_test_single_sample': 'Single-sample t-test',
|
|
59
|
+
'wilcoxon': 'Wilcoxon signed-rank test',
|
|
60
|
+
'wilcoxon_single_sample': 'Wilcoxon signed-rank test for single sample',
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
match test_id:
|
|
64
|
+
case 'anova_1w_ordinary': stat, p_value = self.anova_1w_ordinary()
|
|
65
|
+
case 'anova_1w_rm': stat, p_value = self.anova_1w_rm()
|
|
66
|
+
case 'friedman': stat, p_value = self.friedman()
|
|
67
|
+
case 'kruskal_wallis': stat, p_value = self.kruskal_wallis()
|
|
68
|
+
case 'mann_whitney': stat, p_value = self.mann_whitney()
|
|
69
|
+
case 't_test_independent': stat, p_value = self.t_test_independent()
|
|
70
|
+
case 't_test_paired': stat, p_value = self.t_test_paired()
|
|
71
|
+
case 't_test_single_sample': stat, p_value = self.t_test_single_sample()
|
|
72
|
+
case 'wilcoxon': stat, p_value = self.wilcoxon()
|
|
73
|
+
case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
|
|
74
|
+
|
|
75
|
+
if test_id in self.test_ids_dependent:
|
|
76
|
+
self.paired = True
|
|
77
|
+
else:
|
|
78
|
+
self.paired = False
|
|
79
|
+
|
|
80
|
+
self.test_name = test_names_dict[test_id]
|
|
81
|
+
self.test_id = test_id
|
|
82
|
+
self.test_stat = stat
|
|
83
|
+
self.p_value = p_value
|
|
84
|
+
|
|
85
|
+
def anova_1w_ordinary(self):
|
|
86
|
+
stat, p_value = f_oneway(*self.data)
|
|
87
|
+
self.tails = 2
|
|
88
|
+
# if self.tails == 1 and p_value > 0.5:
|
|
89
|
+
# p_value /= 2
|
|
90
|
+
# if self.tails == 1:
|
|
91
|
+
# p_value /= 2
|
|
92
|
+
return stat, p_value
|
|
93
|
+
|
|
94
|
+
def anova_1w_rm(self):
|
|
95
|
+
"""
|
|
96
|
+
Perform repeated measures one-way ANOVA test.
|
|
97
|
+
|
|
98
|
+
Parameters:
|
|
99
|
+
data: list of lists, where each sublist represents repeated measures for a subject
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
df = self.matrix_to_dataframe(self.data)
|
|
103
|
+
res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
|
|
104
|
+
stat = res.anova_table['F Value'][0]
|
|
105
|
+
p_value = res.anova_table['Pr > F'][0]
|
|
106
|
+
|
|
107
|
+
self.tails = 2
|
|
108
|
+
return stat, p_value
|
|
109
|
+
|
|
110
|
+
def friedman(self):
|
|
111
|
+
stat, p_value = friedmanchisquare(*self.data)
|
|
112
|
+
self.tails = 2
|
|
113
|
+
return stat, p_value
|
|
114
|
+
|
|
115
|
+
def kruskal_wallis(self):
|
|
116
|
+
stat, p_value = kruskal(*self.data)
|
|
117
|
+
|
|
118
|
+
# Perform Dunn's multiple comparisons if Kruskal-Wallis is significant
|
|
119
|
+
if p_value < 0.05 and self.posthoc:
|
|
120
|
+
self.posthoc_matrix = sp.posthoc_dunn(self.data, p_adjust='bonferroni').values.tolist()
|
|
121
|
+
return stat, p_value
|
|
122
|
+
|
|
123
|
+
def mann_whitney(self):
|
|
124
|
+
stat, p_value = mannwhitneyu(
|
|
125
|
+
self.data[0], self.data[1], alternative='two-sided')
|
|
126
|
+
if self.tails == 1:
|
|
127
|
+
p_value /= 2
|
|
128
|
+
# alternative method of one-tailed calculation
|
|
129
|
+
# gives the same result:
|
|
130
|
+
# stat, p_value = mannwhitneyu(
|
|
131
|
+
# self.data[0], self.data[1], alternative='two-sided' if self.tails == 2 else 'less')
|
|
132
|
+
# if self.tails == 1 and p_value > 0.5:
|
|
133
|
+
# p_value = 1-p_value
|
|
134
|
+
return stat, p_value
|
|
135
|
+
|
|
136
|
+
def t_test_independent(self):
|
|
137
|
+
stat, p_value = ttest_ind(
|
|
138
|
+
self.data[0], self.data[1])
|
|
139
|
+
if self.tails == 1:
|
|
140
|
+
p_value /= 2
|
|
141
|
+
return stat, p_value
|
|
142
|
+
|
|
143
|
+
def t_test_paired(self):
|
|
144
|
+
stat, p_value = ttest_rel(
|
|
145
|
+
self.data[0], self.data[1])
|
|
146
|
+
if self.tails == 1:
|
|
147
|
+
p_value /= 2
|
|
148
|
+
return stat, p_value
|
|
149
|
+
|
|
150
|
+
def t_test_single_sample(self):
|
|
151
|
+
if self.popmean == None:
|
|
152
|
+
self.popmean = 0
|
|
153
|
+
self.AddWarning('no_pop_mean_set')
|
|
154
|
+
stat, p_value = ttest_1samp(self.data[0], self.popmean)
|
|
155
|
+
if self.tails == 1:
|
|
156
|
+
p_value /= 2
|
|
157
|
+
return stat, p_value
|
|
158
|
+
|
|
159
|
+
def wilcoxon(self):
|
|
160
|
+
stat, p_value = wilcoxon(self.data[0], self.data[1])
|
|
161
|
+
if self.tails == 1:
|
|
162
|
+
p_value /= 2
|
|
163
|
+
return stat, p_value
|
|
164
|
+
|
|
165
|
+
def wilcoxon_single_sample(self):
|
|
166
|
+
if self.popmean == None:
|
|
167
|
+
self.popmean = 0
|
|
168
|
+
self.AddWarning('no_pop_mean_set')
|
|
169
|
+
data = [i - self.popmean for i in self.data[0]]
|
|
170
|
+
stat, p_value = wilcoxon(data)
|
|
171
|
+
if self.tails == 1:
|
|
172
|
+
p_value /= 2
|
|
173
|
+
return stat, p_value
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
class TextFormatting():
|
|
4
|
+
'''
|
|
5
|
+
Text formatting mixin
|
|
6
|
+
'''
|
|
7
|
+
|
|
8
|
+
def autospace(self, elements_list, space, delimiter=' ') -> str:
|
|
9
|
+
output = ''
|
|
10
|
+
for i, element in enumerate(elements_list):
|
|
11
|
+
if i == len(elements_list):
|
|
12
|
+
output += element
|
|
13
|
+
else:
|
|
14
|
+
output += element + (space-len(element))*delimiter
|
|
15
|
+
return output
|
|
16
|
+
|
|
17
|
+
def print_groups(self, space=24, max_length=15):
|
|
18
|
+
self.log('')
|
|
19
|
+
# Get the number of groups (rows) and the maximum length of rows
|
|
20
|
+
data = self.data
|
|
21
|
+
num_groups = len(data)
|
|
22
|
+
group_longest = max(len(row) for row in data)
|
|
23
|
+
|
|
24
|
+
# Print the header
|
|
25
|
+
header = [f'Group {i+1}' for i in range(num_groups)]
|
|
26
|
+
line = [''*7]
|
|
27
|
+
self.log(self.autospace(header, space))
|
|
28
|
+
self.log(self.autospace(line, space))
|
|
29
|
+
|
|
30
|
+
# Print each column with a placeholder if longer than max_length
|
|
31
|
+
for i in range(group_longest):
|
|
32
|
+
row_values = []
|
|
33
|
+
all_values_empty = True
|
|
34
|
+
for row in data:
|
|
35
|
+
if len(row) > max_length:
|
|
36
|
+
if i < max_length:
|
|
37
|
+
row_values.append(str(row[i]))
|
|
38
|
+
all_values_empty = False
|
|
39
|
+
elif i == max_length:
|
|
40
|
+
row_values.append(f'[{len(row) - max_length} more]')
|
|
41
|
+
all_values_empty = False
|
|
42
|
+
else:
|
|
43
|
+
continue
|
|
44
|
+
else:
|
|
45
|
+
if i < len(row):
|
|
46
|
+
row_values.append(str(row[i]))
|
|
47
|
+
all_values_empty = False
|
|
48
|
+
else:
|
|
49
|
+
row_values.append('')
|
|
50
|
+
if all_values_empty:
|
|
51
|
+
break
|
|
52
|
+
self.log(self.autospace(row_values, space))
|
|
53
|
+
|
|
54
|
+
def make_stars(self, p) -> int:
|
|
55
|
+
if p is not None:
|
|
56
|
+
if p < 0.0001:
|
|
57
|
+
return 4
|
|
58
|
+
if p < 0.001:
|
|
59
|
+
return 3
|
|
60
|
+
elif p < 0.01:
|
|
61
|
+
return 2
|
|
62
|
+
elif p < 0.05:
|
|
63
|
+
return 1
|
|
64
|
+
else:
|
|
65
|
+
return 0
|
|
66
|
+
return 0
|
|
67
|
+
|
|
68
|
+
def make_stars_printed(self, n) -> str:
|
|
69
|
+
return '*' * n if n else 'ns'
|
|
70
|
+
|
|
71
|
+
def make_p_value_printed(self, p) -> str:
|
|
72
|
+
if p is not None:
|
|
73
|
+
if p > 0.99:
|
|
74
|
+
return 'p>0.99'
|
|
75
|
+
elif p >= 0.01:
|
|
76
|
+
return f'p={p:.2g}'
|
|
77
|
+
elif p >= 0.001:
|
|
78
|
+
return f'p={p:.2g}'
|
|
79
|
+
elif p >= 0.0001:
|
|
80
|
+
return f'p={p:.1g}'
|
|
81
|
+
elif p < 0.0001:
|
|
82
|
+
return 'p<0.0001'
|
|
83
|
+
else:
|
|
84
|
+
return 'N/A'
|
|
85
|
+
return 'N/A'
|
|
86
|
+
|
|
87
|
+
def print_results(self):
|
|
88
|
+
self.log('\n\nResults: \n')
|
|
89
|
+
for i in self.results:
|
|
90
|
+
shift = 27 - len(i)
|
|
91
|
+
if i == 'Warnings':
|
|
92
|
+
self.log(i, ':', ' ' * shift, len(self.results[i]))
|
|
93
|
+
if i == 'Posthoc_Matrix':
|
|
94
|
+
self.log(i, ':', ' ' * shift, '{0}x{0} matrix'.format(len(self.results[i])))
|
|
95
|
+
elif i == 'Samples' or i == 'Posthoc_Matrix_printed' or i == 'Posthoc_Matrix_stars':
|
|
96
|
+
pass
|
|
97
|
+
else:
|
|
98
|
+
self.log(i, ':', ' ' * shift, self.results[i])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: AutoStatLib
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: AutoStatLib - a simple statistical analysis tool
|
|
5
5
|
Author: Stemonitis, SciWare LLC
|
|
6
6
|
Author-email: konung-yaropolk <yaropolk1995@gmail.com>
|
|
@@ -531,6 +531,7 @@ License-File: LICENSE
|
|
|
531
531
|
Requires-Dist: numpy
|
|
532
532
|
Requires-Dist: scipy
|
|
533
533
|
Requires-Dist: statsmodels
|
|
534
|
+
Requires-Dist: scikit-posthocs
|
|
534
535
|
Requires-Dist: pandas
|
|
535
536
|
|
|
536
537
|
# AutoStatLib - python library for automated statistical analysis
|
|
@@ -595,10 +596,11 @@ analysis.RunTtestPaired()
|
|
|
595
596
|
analysis.RunWilcoxon()
|
|
596
597
|
|
|
597
598
|
# 3 and more independed groups comparison:
|
|
598
|
-
analysis.
|
|
599
|
+
analysis.RunOnewayAnova()
|
|
599
600
|
analysis.RunKruskalWallis()
|
|
600
601
|
|
|
601
602
|
# 3 and more depended groups comparison:
|
|
603
|
+
analysis.RunOnewayAnovaRM()
|
|
602
604
|
analysis.RunFriedman()
|
|
603
605
|
|
|
604
606
|
# single group tests"
|
|
@@ -648,20 +650,42 @@ If errors occured, *GetResult()* returns an empty dictionary
|
|
|
648
650
|
|
|
649
651
|
|
|
650
652
|
---
|
|
651
|
-
## Pre-Alpha dev status.
|
|
652
|
-
|
|
653
|
-
### TODO:
|
|
654
|
-
|
|
655
|
-
--
|
|
656
|
-
--Anova: add 2-way anova and 3-way
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
--
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
--
|
|
664
|
-
--
|
|
653
|
+
## Pre-Alpha dev status.
|
|
654
|
+
|
|
655
|
+
### TODO:
|
|
656
|
+
|
|
657
|
+
-- Anova: posthocs
|
|
658
|
+
-- Anova: add 2-way anova and 3-way anova
|
|
659
|
+
-- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
|
|
660
|
+
-- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
|
|
661
|
+
-- paired T-test: add ratio-paired t-test (ratios of paired values are consistent)
|
|
662
|
+
-- add Welch test (for norm data unequal variances)
|
|
663
|
+
-- add Kolmogorov-smirnov test (unpaired nonparametric 2 sample, compare cumulative distributions)
|
|
664
|
+
-- add independent t-test with Welch correction (do not assume equal SDs in groups)
|
|
665
|
+
-- add correlation test, correlation diagram
|
|
666
|
+
-- add linear regression, regression diagram
|
|
667
|
+
-- add QQ plot
|
|
668
|
+
-- n-sample tests: add onetail option
|
|
669
|
+
|
|
670
|
+
✅ done -- detailed normality test results
|
|
671
|
+
✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
tests check:
|
|
675
|
+
1-sample:
|
|
676
|
+
--Wilcoxon 2,1 tails - ok
|
|
677
|
+
--t-tests 2,1 tails -ok
|
|
678
|
+
|
|
679
|
+
2-sample:
|
|
680
|
+
--Wilcoxon 2,1 tails - ok
|
|
681
|
+
--Mann-whitney 2,1 tails - ok
|
|
682
|
+
--t-tests 2,1 tails -ok
|
|
683
|
+
|
|
684
|
+
n-sample:
|
|
685
|
+
--Kruskal-Wallis 2 tail - ok
|
|
686
|
+
--Dunn's multiple comparisons - ??
|
|
687
|
+
--Friedman 2 tail - ok
|
|
688
|
+
--one-way ANOWA 2 tail - ok
|
|
665
689
|
|
|
666
690
|
|
|
667
691
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
AutoStatLib/AutoStatLib.py,sha256=yPNnwCvHSSlEKQvtnoaLFDq6znPlXCz-CrzGInG-1Ys,9647
|
|
2
|
+
AutoStatLib/__init__.py,sha256=0wHYnglzKRPqSHtZlfbMEA2Bj5rDR4LLaXbOrJi-sqM,101
|
|
3
|
+
AutoStatLib/__main__.py,sha256=ROKWensrxDh3Gl-yhexJ-BYFohDSh9y-CuMkaLpmnnQ,247
|
|
4
|
+
AutoStatLib/_version.py,sha256=-QrGYOb9bx4vC_twSInOBJoijtj78lvUzV19y4-tH38,53
|
|
5
|
+
AutoStatLib/helpers.py,sha256=9Fj9pHlXSM3tGHF5L0-i6DilA9VZk6Re93ob_IRxsYg,3424
|
|
6
|
+
AutoStatLib/normality_tests.py,sha256=wvOmo6F7drnhhikoGltyQJC4OBk3PLCszY6ItJk1e0M,2385
|
|
7
|
+
AutoStatLib/statistical_tests.py,sha256=LDcBRkq56hepR23RZtbBnZOs9k9frVjmiB2EKiEkCYs,5990
|
|
8
|
+
AutoStatLib/text_formatting.py,sha256=ShE4BRO69lsC1VT3SsYrmPkuvW7QnyfHVPZEbjNQ_hI,3250
|
|
9
|
+
AutoStatLib-0.2.5.dist-info/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
|
|
10
|
+
AutoStatLib-0.2.5.dist-info/METADATA,sha256=qJxSrqHlL0wsqaH-ah6MAJa15ikH4NCco1dyVxuNlWs,36572
|
|
11
|
+
AutoStatLib-0.2.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
12
|
+
AutoStatLib-0.2.5.dist-info/top_level.txt,sha256=BuHzVyE2andc7RwD_UPmDjLl9CUAyBH6WHZGjaIReUI,12
|
|
13
|
+
AutoStatLib-0.2.5.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
AutoStatLib/AutoStatLib.py,sha256=_Id6bJb1OmGpUyfB0ho6-2F9S_8YO8euMB-prLjfpPI,23976
|
|
2
|
-
AutoStatLib/__init__.py,sha256=0wHYnglzKRPqSHtZlfbMEA2Bj5rDR4LLaXbOrJi-sqM,101
|
|
3
|
-
AutoStatLib/__main__.py,sha256=ROKWensrxDh3Gl-yhexJ-BYFohDSh9y-CuMkaLpmnnQ,247
|
|
4
|
-
AutoStatLib/_version.py,sha256=jkitUHmog4Z-O5_8BUMHBBb92A758Kea22juu9b2a2Q,53
|
|
5
|
-
AutoStatLib-0.2.1.dist-info/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
|
|
6
|
-
AutoStatLib-0.2.1.dist-info/METADATA,sha256=PFpKRRElBXYciMgOuMRI8wsoCKkX9iiwMVNJthvC_3A,35569
|
|
7
|
-
AutoStatLib-0.2.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
8
|
-
AutoStatLib-0.2.1.dist-info/top_level.txt,sha256=BuHzVyE2andc7RwD_UPmDjLl9CUAyBH6WHZGjaIReUI,12
|
|
9
|
-
AutoStatLib-0.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|