AutoStatLib 0.1.8__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AutoStatLib might be problematic. Click here for more details.
- {autostatlib-0.1.8/src/AutoStatLib.egg-info → autostatlib-0.2.1}/PKG-INFO +4 -3
- {autostatlib-0.1.8 → autostatlib-0.2.1}/README.md +2 -2
- autostatlib-0.1.8/src/AutoStatLib.egg-info/requires.txt → autostatlib-0.2.1/requirements.txt +1 -0
- {autostatlib-0.1.8 → autostatlib-0.2.1}/src/AutoStatLib/AutoStatLib.py +188 -108
- autostatlib-0.2.1/src/AutoStatLib/__init__.py +2 -0
- autostatlib-0.2.1/src/AutoStatLib/__main__.py +6 -0
- {autostatlib-0.1.8 → autostatlib-0.2.1}/src/AutoStatLib/_version.py +1 -1
- {autostatlib-0.1.8 → autostatlib-0.2.1/src/AutoStatLib.egg-info}/PKG-INFO +4 -3
- autostatlib-0.2.1/src/AutoStatLib.egg-info/requires.txt +4 -0
- autostatlib-0.1.8/requirements.txt +0 -3
- autostatlib-0.1.8/src/AutoStatLib/__init__.py +0 -2
- autostatlib-0.1.8/src/AutoStatLib/__main__.py +0 -6
- {autostatlib-0.1.8 → autostatlib-0.2.1}/LICENSE +0 -0
- {autostatlib-0.1.8 → autostatlib-0.2.1}/MANIFEST.in +0 -0
- {autostatlib-0.1.8 → autostatlib-0.2.1}/pyproject.toml +0 -0
- {autostatlib-0.1.8 → autostatlib-0.2.1}/setup.cfg +0 -0
- {autostatlib-0.1.8 → autostatlib-0.2.1}/src/AutoStatLib.egg-info/SOURCES.txt +0 -0
- {autostatlib-0.1.8 → autostatlib-0.2.1}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
- {autostatlib-0.1.8 → autostatlib-0.2.1}/src/AutoStatLib.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: AutoStatLib
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: AutoStatLib - a simple statistical analysis tool
|
|
5
5
|
Author: Stemonitis, SciWare LLC
|
|
6
6
|
Author-email: konung-yaropolk <yaropolk1995@gmail.com>
|
|
@@ -531,6 +531,7 @@ License-File: LICENSE
|
|
|
531
531
|
Requires-Dist: numpy
|
|
532
532
|
Requires-Dist: scipy
|
|
533
533
|
Requires-Dist: statsmodels
|
|
534
|
+
Requires-Dist: pandas
|
|
534
535
|
|
|
535
536
|
# AutoStatLib - python library for automated statistical analysis
|
|
536
537
|
|
|
@@ -569,7 +570,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
|
|
|
569
570
|
|
|
570
571
|
|
|
571
572
|
# set the parameters:
|
|
572
|
-
paired = False # is groups
|
|
573
|
+
paired = False # is groups dependent or not
|
|
573
574
|
tails = 2 # two-tailed or one-tailed result
|
|
574
575
|
popmean = 0 # population mean - only for single-sample tests needed
|
|
575
576
|
|
|
@@ -585,7 +586,7 @@ analysis.RunAuto()
|
|
|
585
586
|
|
|
586
587
|
or you can choose specific tests:
|
|
587
588
|
```python
|
|
588
|
-
# 2 groups
|
|
589
|
+
# 2 groups independent:
|
|
589
590
|
analysis.RunTtest()
|
|
590
591
|
analysis.RunMannWhitney()
|
|
591
592
|
|
|
@@ -35,7 +35,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
# set the parameters:
|
|
38
|
-
paired = False # is groups
|
|
38
|
+
paired = False # is groups dependent or not
|
|
39
39
|
tails = 2 # two-tailed or one-tailed result
|
|
40
40
|
popmean = 0 # population mean - only for single-sample tests needed
|
|
41
41
|
|
|
@@ -51,7 +51,7 @@ analysis.RunAuto()
|
|
|
51
51
|
|
|
52
52
|
or you can choose specific tests:
|
|
53
53
|
```python
|
|
54
|
-
# 2 groups
|
|
54
|
+
# 2 groups independent:
|
|
55
55
|
analysis.RunTtest()
|
|
56
56
|
analysis.RunMannWhitney()
|
|
57
57
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
2
3
|
from statsmodels.stats.diagnostic import lilliefors
|
|
3
|
-
from
|
|
4
|
+
from statsmodels.stats.anova import AnovaRM
|
|
5
|
+
from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare, shapiro, anderson, normaltest
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
class __StatisticalTests():
|
|
@@ -8,19 +10,39 @@ class __StatisticalTests():
|
|
|
8
10
|
Statistical tests mixin
|
|
9
11
|
'''
|
|
10
12
|
|
|
11
|
-
def
|
|
13
|
+
def anova_1w_ordinary(self):
|
|
12
14
|
stat, p_value = f_oneway(*self.data)
|
|
13
15
|
self.tails = 2
|
|
14
16
|
# if self.tails == 1 and p_value > 0.5:
|
|
15
17
|
# p_value /= 2
|
|
16
18
|
# if self.tails == 1:
|
|
17
19
|
# p_value /= 2
|
|
18
|
-
self.test_name = 'ANOVA'
|
|
19
|
-
self.test_id = '
|
|
20
|
+
self.test_name = 'Ordinary One-Way ANOVA'
|
|
21
|
+
self.test_id = 'anova_1w_ordinary'
|
|
20
22
|
self.paired = False
|
|
21
23
|
self.test_stat = stat
|
|
22
24
|
self.p_value = p_value
|
|
23
25
|
|
|
26
|
+
def anova_1w_rm(self):
|
|
27
|
+
"""
|
|
28
|
+
Perform repeated measures one-way ANOVA test.
|
|
29
|
+
|
|
30
|
+
Parameters:
|
|
31
|
+
data: list of lists, where each sublist represents repeated measures for a subject
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
df = self.matrix_to_dataframe(self.data)
|
|
35
|
+
res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
|
|
36
|
+
f_stat = res.anova_table['F Value'][0]
|
|
37
|
+
p_value = res.anova_table['Pr > F'][0]
|
|
38
|
+
|
|
39
|
+
self.tails = 2
|
|
40
|
+
self.test_name = 'Repeated Measures One-Way ANOVA'
|
|
41
|
+
self.test_id = 'anova_1w_rm'
|
|
42
|
+
self.paired = True
|
|
43
|
+
self.test_stat = f_stat
|
|
44
|
+
self.p_value = p_value
|
|
45
|
+
|
|
24
46
|
def friedman_test(self):
|
|
25
47
|
stat, p_value = friedmanchisquare(*self.data)
|
|
26
48
|
self.tails = 2
|
|
@@ -56,13 +78,13 @@ class __StatisticalTests():
|
|
|
56
78
|
self.test_stat = stat
|
|
57
79
|
self.p_value = p_value
|
|
58
80
|
|
|
59
|
-
def
|
|
81
|
+
def t_test_independent(self):
|
|
60
82
|
t_stat, t_p_value = ttest_ind(
|
|
61
83
|
self.data[0], self.data[1])
|
|
62
84
|
if self.tails == 1:
|
|
63
85
|
t_p_value /= 2
|
|
64
|
-
self.test_name = 't-test for
|
|
65
|
-
self.test_id = '
|
|
86
|
+
self.test_name = 't-test for independent samples'
|
|
87
|
+
self.test_id = 't_test_independent'
|
|
66
88
|
self.paired = False
|
|
67
89
|
self.test_stat = t_stat
|
|
68
90
|
self.p_value = t_p_value
|
|
@@ -96,14 +118,14 @@ class __StatisticalTests():
|
|
|
96
118
|
self.popmean = 0
|
|
97
119
|
self.AddWarning('no_pop_mean_set')
|
|
98
120
|
data = [i - self.popmean for i in self.data[0]]
|
|
99
|
-
w_stat,
|
|
121
|
+
w_stat, p_value = wilcoxon(data)
|
|
100
122
|
if self.tails == 1:
|
|
101
123
|
p_value /= 2
|
|
102
124
|
self.test_name = 'Wilcoxon signed-rank test for single sample'
|
|
103
125
|
self.test_id = 'wilcoxon_single_sample'
|
|
104
126
|
self.paired = False
|
|
105
127
|
self.test_stat = w_stat
|
|
106
|
-
self.p_value =
|
|
128
|
+
self.p_value = p_value
|
|
107
129
|
|
|
108
130
|
def wilcoxon(self):
|
|
109
131
|
stat, p_value = wilcoxon(self.data[0], self.data[1])
|
|
@@ -120,7 +142,7 @@ class __NormalityTests():
|
|
|
120
142
|
'''
|
|
121
143
|
Normality tests mixin
|
|
122
144
|
|
|
123
|
-
see the article about
|
|
145
|
+
see the article about minimal sample size for tests:
|
|
124
146
|
Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
|
|
125
147
|
Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
|
|
126
148
|
'''
|
|
@@ -148,20 +170,22 @@ class __NormalityTests():
|
|
|
148
170
|
lf = False
|
|
149
171
|
|
|
150
172
|
# Anderson-Darling test
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
173
|
+
if n >= 20:
|
|
174
|
+
ad_stat, ad_p_value = self.anderson_get_p(
|
|
175
|
+
data, dist='norm')
|
|
176
|
+
if ad_p_value > 0.05:
|
|
177
|
+
ad = True
|
|
178
|
+
else:
|
|
179
|
+
ad = False
|
|
157
180
|
|
|
158
181
|
# D'Agostino-Pearson test
|
|
159
|
-
ap_stat, ap_p_value = normaltest(data)
|
|
160
182
|
# test result is skewed if n<20
|
|
161
|
-
if
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
183
|
+
if n >= 20:
|
|
184
|
+
ap_stat, ap_p_value = normaltest(data)
|
|
185
|
+
if ap_p_value > 0.05:
|
|
186
|
+
ap = True
|
|
187
|
+
else:
|
|
188
|
+
ap = False
|
|
165
189
|
|
|
166
190
|
# print(ap_p_value, ad_p_value, sw_p_value, lf_p_value)
|
|
167
191
|
|
|
@@ -169,7 +193,7 @@ class __NormalityTests():
|
|
|
169
193
|
|
|
170
194
|
def anderson_get_p(self, data, dist='norm'):
|
|
171
195
|
'''
|
|
172
|
-
calculating p-value for Anderson-Darling test using the method described here:
|
|
196
|
+
calculating p-value for Anderson-Darling test using the method described here:
|
|
173
197
|
Computation of Probability Associated with Anderson-Darling Statistic
|
|
174
198
|
Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
|
|
175
199
|
|
|
@@ -197,6 +221,65 @@ class __NormalityTests():
|
|
|
197
221
|
return ad, p
|
|
198
222
|
|
|
199
223
|
|
|
224
|
+
class __Helpers():
|
|
225
|
+
|
|
226
|
+
def matrix_to_dataframe(self, matrix):
|
|
227
|
+
data = []
|
|
228
|
+
cols = []
|
|
229
|
+
rows = []
|
|
230
|
+
|
|
231
|
+
order_number = 1
|
|
232
|
+
for i, row in enumerate(matrix):
|
|
233
|
+
for j, value in enumerate(row):
|
|
234
|
+
data.append(value)
|
|
235
|
+
cols.append(i)
|
|
236
|
+
rows.append(j)
|
|
237
|
+
order_number += 1
|
|
238
|
+
|
|
239
|
+
df = pd.DataFrame(
|
|
240
|
+
{'Row': rows, 'Col': cols, 'Value': data})
|
|
241
|
+
return df
|
|
242
|
+
|
|
243
|
+
def create_results_dict(self) -> dict:
|
|
244
|
+
|
|
245
|
+
self.stars_int = self.make_stars()
|
|
246
|
+
self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
'p-value': self.make_p_value_printed(),
|
|
250
|
+
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
251
|
+
'Stars_Printed': self.stars_str,
|
|
252
|
+
'Test_Name': self.test_name,
|
|
253
|
+
'Groups_Compared': self.n_groups,
|
|
254
|
+
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
255
|
+
'Data_Normaly_Distributed': self.parametric,
|
|
256
|
+
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
257
|
+
'Paired_Test_Applied': self.paired,
|
|
258
|
+
'Tails': self.tails,
|
|
259
|
+
'p-value_exact': self.p_value.item(),
|
|
260
|
+
'Stars': self.stars_int,
|
|
261
|
+
# 'Stat_Value': self.test_stat.item(),
|
|
262
|
+
'Warnings': self.warnings,
|
|
263
|
+
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
264
|
+
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
265
|
+
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
266
|
+
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
267
|
+
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
|
|
268
|
+
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
269
|
+
'Samples': self.data,
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
def log(self, *args, **kwargs):
|
|
273
|
+
message = ' '.join(map(str, args))
|
|
274
|
+
# print(message, **kwargs)
|
|
275
|
+
self.summary += '\n' + message
|
|
276
|
+
|
|
277
|
+
def AddWarning(self, warning_id):
|
|
278
|
+
message = self.warning_ids_all[warning_id]
|
|
279
|
+
self.log(message)
|
|
280
|
+
self.warnings.append(message)
|
|
281
|
+
|
|
282
|
+
|
|
200
283
|
class __TextFormatting():
|
|
201
284
|
'''
|
|
202
285
|
Text formatting mixin
|
|
@@ -291,45 +374,6 @@ class __TextFormatting():
|
|
|
291
374
|
else:
|
|
292
375
|
self.log(i, ':', ' ' * shift, self.results[i])
|
|
293
376
|
|
|
294
|
-
def create_results_dict(self) -> dict:
|
|
295
|
-
|
|
296
|
-
self.stars_int = self.make_stars()
|
|
297
|
-
self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
|
|
298
|
-
|
|
299
|
-
return {
|
|
300
|
-
'p-value': self.make_p_value_printed(),
|
|
301
|
-
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
302
|
-
'Stars_Printed': self.stars_str,
|
|
303
|
-
'Test_Name': self.test_name,
|
|
304
|
-
'Groups_Compared': self.n_groups,
|
|
305
|
-
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
306
|
-
'Data_Normaly_Distributed': self.parametric,
|
|
307
|
-
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
308
|
-
'Paired_Test_Applied': self.paired,
|
|
309
|
-
'Tails': self.tails,
|
|
310
|
-
'p-value_exact': self.p_value.item(),
|
|
311
|
-
'Stars': self.stars_int,
|
|
312
|
-
# 'Stat_Value': self.test_stat.item(),
|
|
313
|
-
'Warnings': self.warnings,
|
|
314
|
-
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
315
|
-
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
316
|
-
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
317
|
-
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
318
|
-
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
|
|
319
|
-
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
320
|
-
'Samples': self.data,
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
def log(self, *args, **kwargs):
|
|
324
|
-
message = ' '.join(map(str, args))
|
|
325
|
-
# print(message, **kwargs)
|
|
326
|
-
self.summary += '\n' + message
|
|
327
|
-
|
|
328
|
-
def AddWarning(self, warning_id):
|
|
329
|
-
message = self.warning_ids_all[warning_id]
|
|
330
|
-
self.log(message)
|
|
331
|
-
self.warnings.append(message)
|
|
332
|
-
|
|
333
377
|
|
|
334
378
|
class __InputFormatting():
|
|
335
379
|
def floatify_recursive(self, data):
|
|
@@ -347,7 +391,7 @@ class __InputFormatting():
|
|
|
347
391
|
return None
|
|
348
392
|
|
|
349
393
|
|
|
350
|
-
class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting):
|
|
394
|
+
class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting, __Helpers):
|
|
351
395
|
'''
|
|
352
396
|
The main class
|
|
353
397
|
*documentation placeholder*
|
|
@@ -370,23 +414,51 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
370
414
|
self.n_groups = len(self.groups_list)
|
|
371
415
|
self.warning_flag_non_numeric_data = False
|
|
372
416
|
self.summary = ''
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
't_test_paired',
|
|
376
|
-
't_test_single_sample',]
|
|
417
|
+
|
|
418
|
+
# test IDs classification:
|
|
377
419
|
self.test_ids_all = [ # in aplhabetical order
|
|
378
|
-
'
|
|
420
|
+
'anova_1w_ordinary',
|
|
421
|
+
'anova_1w_rm',
|
|
379
422
|
'friedman',
|
|
380
423
|
'kruskal_wallis',
|
|
381
424
|
'mann_whitney',
|
|
382
|
-
'
|
|
425
|
+
't_test_independent',
|
|
383
426
|
't_test_paired',
|
|
384
427
|
't_test_single_sample',
|
|
385
428
|
'wilcoxon',
|
|
386
429
|
'wilcoxon_single_sample',
|
|
387
430
|
]
|
|
431
|
+
self.test_ids_parametric = [
|
|
432
|
+
'anova_1w_ordinary',
|
|
433
|
+
'anova_1w_rm'
|
|
434
|
+
't_test_independent',
|
|
435
|
+
't_test_paired',
|
|
436
|
+
't_test_single_sample',
|
|
437
|
+
]
|
|
438
|
+
self.test_ids_dependent = [
|
|
439
|
+
'anova_1w_rm',
|
|
440
|
+
'friedman',
|
|
441
|
+
't_test_paired',
|
|
442
|
+
'wilcoxon',
|
|
443
|
+
]
|
|
444
|
+
self.test_ids_3sample = [
|
|
445
|
+
'anova_1w_ordinary',
|
|
446
|
+
'anova_1w_rm',
|
|
447
|
+
'friedman',
|
|
448
|
+
'kruskal_wallis',
|
|
449
|
+
]
|
|
450
|
+
self.test_ids_2sample = [
|
|
451
|
+
'mann_whitney',
|
|
452
|
+
't_test_independent',
|
|
453
|
+
't_test_paired',
|
|
454
|
+
'wilcoxon',
|
|
455
|
+
]
|
|
456
|
+
self.test_ids_1sample = [
|
|
457
|
+
't_test_single_sample',
|
|
458
|
+
'wilcoxon_single_sample',
|
|
459
|
+
]
|
|
388
460
|
self.warning_ids_all = {
|
|
389
|
-
'not-numeric': '\nWarning: Non-numeric data was found in input and ignored.\n Make sure the input data is correct to get the correct results\n',
|
|
461
|
+
# 'not-numeric': '\nWarning: Non-numeric data was found in input and ignored.\n Make sure the input data is correct to get the correct results\n',
|
|
390
462
|
'param_test_with_non-normal_data': '\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n The results might be skewed. \n Please, run non-parametric test or preform automatic test selection.\n',
|
|
391
463
|
'non-param_test_with_normal_data': '\nWarning: Non-Parametric test was manualy chosen for Normaly distributed data.\n The results might be skewed. \n Please, run parametric test or preform automatic test selection.\n',
|
|
392
464
|
'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
|
|
@@ -411,7 +483,8 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
411
483
|
# adjusting input data type
|
|
412
484
|
self.data = self.floatify_recursive(self.groups_list)
|
|
413
485
|
if self.warning_flag_non_numeric_data:
|
|
414
|
-
self.
|
|
486
|
+
self.log(
|
|
487
|
+
'Text or other non-numeric data in the input was ignored:')
|
|
415
488
|
|
|
416
489
|
# delete the empty cols from input
|
|
417
490
|
self.data = [col for col in self.data if any(
|
|
@@ -419,30 +492,21 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
419
492
|
|
|
420
493
|
# User input assertion block
|
|
421
494
|
try:
|
|
495
|
+
assert self.data, 'There is no input data'
|
|
422
496
|
assert self.tails in [1, 2], 'Tails parameter can be 1 or 2 only'
|
|
423
497
|
assert test in self.test_ids_all or test == 'auto', 'Wrong test id choosen, ensure you called correct function'
|
|
424
|
-
assert not (self.n_groups > 1
|
|
425
|
-
and (test == 't_test_single_sample'
|
|
426
|
-
or test == 'wilcoxon_single_sample')), 'Only one group of data must be given for single-group tests'
|
|
427
498
|
assert all(len(
|
|
428
499
|
group) >= 4 for group in self.data), 'Each group must contain at least four values'
|
|
429
|
-
assert not (self.paired == True
|
|
430
|
-
|
|
431
|
-
assert not (test
|
|
432
|
-
|
|
433
|
-
assert not (test
|
|
434
|
-
|
|
435
|
-
assert not (test
|
|
436
|
-
|
|
437
|
-
assert not (test
|
|
438
|
-
3), 'At least three groups of data must be given for
|
|
439
|
-
assert not ((test == 'anova'
|
|
440
|
-
or test == 'kruskal_wallis') and self.n_groups < 2), 'At least two groups of data must be given for ANOVA or Kruskal Wallis tests'
|
|
441
|
-
assert not ((test == 'wilcoxon'
|
|
442
|
-
or test == 't_test_independend'
|
|
443
|
-
or test == 't_test_paired'
|
|
444
|
-
or test == 'mann_whitney')
|
|
445
|
-
and self.n_groups != 2), 'Only two groups of data must be given for 2-groups tests'
|
|
500
|
+
assert not (self.paired == True
|
|
501
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired groups must have the same length'
|
|
502
|
+
assert not (test in self.test_ids_dependent
|
|
503
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Groups must have the same length for dependent groups test'
|
|
504
|
+
assert not (test in self.test_ids_2sample
|
|
505
|
+
and self.n_groups != 2), f'Only two groups of data must be given for 2-groups tests, got {self.n_groups}'
|
|
506
|
+
assert not (test in self.test_ids_1sample
|
|
507
|
+
and self.n_groups > 1), f'Only one group of data must be given for single-group tests, got {self.n_groups}'
|
|
508
|
+
assert not (test in self.test_ids_3sample
|
|
509
|
+
and self.n_groups < 3), f'At least three groups of data must be given for multi-groups tests, got {self.n_groups}'
|
|
446
510
|
except AssertionError as error:
|
|
447
511
|
self.log('\nTest :', test)
|
|
448
512
|
self.log('Error :', error)
|
|
@@ -486,16 +550,18 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
486
550
|
if not test == 'auto' and self.parametric and not test in self.test_ids_parametric:
|
|
487
551
|
self.AddWarning('non-param_test_with_normal_data')
|
|
488
552
|
|
|
489
|
-
if test == '
|
|
490
|
-
self.
|
|
553
|
+
if test == 'anova_1w_ordinary':
|
|
554
|
+
self.anova_1w_ordinary()
|
|
555
|
+
elif test == 'anova_1w_rm':
|
|
556
|
+
self.anova_1w_rm()
|
|
491
557
|
elif test == 'friedman':
|
|
492
558
|
self.friedman_test()
|
|
493
559
|
elif test == 'kruskal_wallis':
|
|
494
560
|
self.kruskal_wallis_test()
|
|
495
561
|
elif test == 'mann_whitney':
|
|
496
562
|
self.mann_whitney_u_test()
|
|
497
|
-
elif test == '
|
|
498
|
-
self.
|
|
563
|
+
elif test == 't_test_independent':
|
|
564
|
+
self.t_test_independent()
|
|
499
565
|
elif test == 't_test_paired':
|
|
500
566
|
self.t_test_paired()
|
|
501
567
|
elif test == 't_test_single_sample':
|
|
@@ -521,7 +587,13 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
521
587
|
|
|
522
588
|
def __auto(self):
|
|
523
589
|
|
|
524
|
-
if self.n_groups ==
|
|
590
|
+
if self.n_groups == 1:
|
|
591
|
+
if self.parametric:
|
|
592
|
+
return self.t_test_single_sample()
|
|
593
|
+
else:
|
|
594
|
+
return self.wilcoxon_single_sample()
|
|
595
|
+
|
|
596
|
+
elif self.n_groups == 2:
|
|
525
597
|
if self.paired:
|
|
526
598
|
if self.parametric:
|
|
527
599
|
return self.t_test_paired()
|
|
@@ -529,23 +601,25 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
529
601
|
return self.wilcoxon()
|
|
530
602
|
else:
|
|
531
603
|
if self.parametric:
|
|
532
|
-
return self.
|
|
604
|
+
return self.t_test_independent()
|
|
533
605
|
else:
|
|
534
606
|
return self.mann_whitney_u_test()
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
return self.t_test_single_sample()
|
|
538
|
-
else:
|
|
539
|
-
return self.wilcoxon_single_sample()
|
|
540
|
-
else:
|
|
607
|
+
|
|
608
|
+
elif self.n_groups >= 3:
|
|
541
609
|
if self.paired:
|
|
542
|
-
|
|
610
|
+
if self.parametric:
|
|
611
|
+
return self.anova_1w_rm()
|
|
612
|
+
else:
|
|
613
|
+
return self.friedman_test()
|
|
543
614
|
else:
|
|
544
615
|
if self.parametric:
|
|
545
|
-
return self.
|
|
616
|
+
return self.anova_1w_ordinary()
|
|
546
617
|
else:
|
|
547
618
|
return self.kruskal_wallis_test()
|
|
548
619
|
|
|
620
|
+
else:
|
|
621
|
+
pass
|
|
622
|
+
|
|
549
623
|
# public methods:
|
|
550
624
|
def RunAuto(self):
|
|
551
625
|
self.__run_test(test='auto')
|
|
@@ -553,8 +627,11 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
553
627
|
def RunManual(self, test):
|
|
554
628
|
self.__run_test(test)
|
|
555
629
|
|
|
556
|
-
def
|
|
557
|
-
self.__run_test(test='
|
|
630
|
+
def RunOnewayAnova(self):
|
|
631
|
+
self.__run_test(test='anova_1w_ordinary')
|
|
632
|
+
|
|
633
|
+
def RunOnewayAnovaRM(self):
|
|
634
|
+
self.__run_test(test='anova_1w_rm')
|
|
558
635
|
|
|
559
636
|
def RunFriedman(self):
|
|
560
637
|
self.__run_test(test='friedman')
|
|
@@ -566,7 +643,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
566
643
|
self.__run_test(test='mann_whitney')
|
|
567
644
|
|
|
568
645
|
def RunTtest(self):
|
|
569
|
-
self.__run_test(test='
|
|
646
|
+
self.__run_test(test='t_test_independent')
|
|
570
647
|
|
|
571
648
|
def RunTtestPaired(self):
|
|
572
649
|
self.__run_test(test='t_test_paired')
|
|
@@ -599,9 +676,12 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
|
|
|
599
676
|
else:
|
|
600
677
|
return self.summary
|
|
601
678
|
|
|
679
|
+
def GetTestIDs(self):
|
|
680
|
+
return self.test_ids_all
|
|
681
|
+
|
|
602
682
|
def PrintSummary(self):
|
|
603
683
|
print(self.summary)
|
|
604
684
|
|
|
605
685
|
|
|
606
686
|
if __name__ == '__main__':
|
|
607
|
-
print('
|
|
687
|
+
print('This package works as an imported module only.\nUse "import autostatlib" statement')
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.1
|
|
2
|
+
__version__ = "0.2.1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: AutoStatLib
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: AutoStatLib - a simple statistical analysis tool
|
|
5
5
|
Author: Stemonitis, SciWare LLC
|
|
6
6
|
Author-email: konung-yaropolk <yaropolk1995@gmail.com>
|
|
@@ -531,6 +531,7 @@ License-File: LICENSE
|
|
|
531
531
|
Requires-Dist: numpy
|
|
532
532
|
Requires-Dist: scipy
|
|
533
533
|
Requires-Dist: statsmodels
|
|
534
|
+
Requires-Dist: pandas
|
|
534
535
|
|
|
535
536
|
# AutoStatLib - python library for automated statistical analysis
|
|
536
537
|
|
|
@@ -569,7 +570,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
|
|
|
569
570
|
|
|
570
571
|
|
|
571
572
|
# set the parameters:
|
|
572
|
-
paired = False # is groups
|
|
573
|
+
paired = False # is groups dependent or not
|
|
573
574
|
tails = 2 # two-tailed or one-tailed result
|
|
574
575
|
popmean = 0 # population mean - only for single-sample tests needed
|
|
575
576
|
|
|
@@ -585,7 +586,7 @@ analysis.RunAuto()
|
|
|
585
586
|
|
|
586
587
|
or you can choose specific tests:
|
|
587
588
|
```python
|
|
588
|
-
# 2 groups
|
|
589
|
+
# 2 groups independent:
|
|
589
590
|
analysis.RunTtest()
|
|
590
591
|
analysis.RunMannWhitney()
|
|
591
592
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|