AutoStatLib 0.2.2__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AutoStatLib might be problematic. Click here for more details.
- {autostatlib-0.2.2/src/AutoStatLib.egg-info → autostatlib-0.2.6}/PKG-INFO +34 -26
- {autostatlib-0.2.2 → autostatlib-0.2.6}/README.md +28 -22
- {autostatlib-0.2.2 → autostatlib-0.2.6}/pyproject.toml +5 -4
- {autostatlib-0.2.2 → autostatlib-0.2.6}/requirements.txt +2 -1
- autostatlib-0.2.6/src/AutoStatLib/AutoStatLib.py +254 -0
- {autostatlib-0.2.2 → autostatlib-0.2.6}/src/AutoStatLib/_version.py +1 -1
- autostatlib-0.2.6/src/AutoStatLib/helpers.py +80 -0
- autostatlib-0.2.6/src/AutoStatLib/normality_tests.py +83 -0
- autostatlib-0.2.6/src/AutoStatLib/statistical_tests.py +184 -0
- autostatlib-0.2.6/src/AutoStatLib/text_formatting.py +106 -0
- {autostatlib-0.2.2 → autostatlib-0.2.6/src/AutoStatLib.egg-info}/PKG-INFO +34 -26
- {autostatlib-0.2.2 → autostatlib-0.2.6}/src/AutoStatLib.egg-info/SOURCES.txt +4 -0
- {autostatlib-0.2.2 → autostatlib-0.2.6}/src/AutoStatLib.egg-info/requires.txt +1 -0
- autostatlib-0.2.2/src/AutoStatLib/AutoStatLib.py +0 -669
- {autostatlib-0.2.2 → autostatlib-0.2.6}/LICENSE +0 -0
- {autostatlib-0.2.2 → autostatlib-0.2.6}/MANIFEST.in +0 -0
- {autostatlib-0.2.2 → autostatlib-0.2.6}/setup.cfg +0 -0
- {autostatlib-0.2.2 → autostatlib-0.2.6}/src/AutoStatLib/__init__.py +0 -0
- {autostatlib-0.2.2 → autostatlib-0.2.6}/src/AutoStatLib/__main__.py +0 -0
- {autostatlib-0.2.2 → autostatlib-0.2.6}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
- {autostatlib-0.2.2 → autostatlib-0.2.6}/src/AutoStatLib.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: AutoStatLib
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: AutoStatLib - a simple statistical analysis tool
|
|
5
5
|
Author: Stemonitis, SciWare LLC
|
|
6
6
|
Author-email: konung-yaropolk <yaropolk1995@gmail.com>
|
|
@@ -509,15 +509,16 @@ License: GNU LESSER GENERAL PUBLIC LICENSE
|
|
|
509
509
|
|
|
510
510
|
That's all there is to it!
|
|
511
511
|
|
|
512
|
-
Project-URL: Homepage, https://github.com/konung-yaropolk/
|
|
513
|
-
Project-URL:
|
|
512
|
+
Project-URL: Homepage, https://github.com/konung-yaropolk/AutoStatLib
|
|
513
|
+
Project-URL: Repository, https://github.com/konung-yaropolk/AutoStatLib.git
|
|
514
|
+
Project-URL: Issues, https://github.com/konung-yaropolk/AutoStatLib/issues
|
|
514
515
|
Keywords: Science,Statistics
|
|
515
516
|
Classifier: Programming Language :: Python
|
|
516
517
|
Classifier: Programming Language :: Python :: 3
|
|
517
518
|
Classifier: Programming Language :: Python :: 3.12
|
|
518
519
|
Classifier: License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)
|
|
519
520
|
Classifier: Operating System :: OS Independent
|
|
520
|
-
Classifier: Development Status ::
|
|
521
|
+
Classifier: Development Status :: 4 - Beta
|
|
521
522
|
Classifier: Intended Audience :: Developers
|
|
522
523
|
Classifier: Intended Audience :: Science/Research
|
|
523
524
|
Classifier: Natural Language :: English
|
|
@@ -531,6 +532,7 @@ License-File: LICENSE
|
|
|
531
532
|
Requires-Dist: numpy
|
|
532
533
|
Requires-Dist: scipy
|
|
533
534
|
Requires-Dist: statsmodels
|
|
535
|
+
Requires-Dist: scikit-posthocs
|
|
534
536
|
Requires-Dist: pandas
|
|
535
537
|
|
|
536
538
|
# AutoStatLib - python library for automated statistical analysis
|
|
@@ -619,26 +621,30 @@ results = analysis.GetResult()
|
|
|
619
621
|
The results dictionary keys with representing value types:
|
|
620
622
|
```
|
|
621
623
|
{
|
|
622
|
-
'p-value':
|
|
623
|
-
'Significance(p<0.05)':
|
|
624
|
-
'Stars_Printed':
|
|
625
|
-
'Test_Name':
|
|
626
|
-
'Groups_Compared':
|
|
627
|
-
'Population_Mean':
|
|
628
|
-
'Data_Normaly_Distributed':
|
|
629
|
-
'Parametric_Test_Applied':
|
|
630
|
-
'Paired_Test_Applied':
|
|
631
|
-
'Tails':
|
|
632
|
-
'p-value_exact':
|
|
633
|
-
'Stars':
|
|
634
|
-
'Warnings':
|
|
635
|
-
'Groups_N':
|
|
636
|
-
'Groups_Median':
|
|
637
|
-
'Groups_Mean':
|
|
638
|
-
'Groups_SD':
|
|
639
|
-
'Groups_SE':
|
|
640
|
-
'Samples':
|
|
624
|
+
'p-value' : String
|
|
625
|
+
'Significance(p<0.05)' : Boolean
|
|
626
|
+
'Stars_Printed' : String
|
|
627
|
+
'Test_Name' : String
|
|
628
|
+
'Groups_Compared' : Integer
|
|
629
|
+
'Population_Mean' : Float (taken from the input)
|
|
630
|
+
'Data_Normaly_Distributed' : Boolean
|
|
631
|
+
'Parametric_Test_Applied' : Boolean
|
|
632
|
+
'Paired_Test_Applied' : Boolean
|
|
633
|
+
'Tails' : Integer (taken from the input)
|
|
634
|
+
'p-value_exact' : Float
|
|
635
|
+
'Stars' : Integer
|
|
636
|
+
'Warnings' : String
|
|
637
|
+
'Groups_N' : List of integers
|
|
638
|
+
'Groups_Median' : List of floats
|
|
639
|
+
'Groups_Mean' : List of floats
|
|
640
|
+
'Groups_SD' : List of floats
|
|
641
|
+
'Groups_SE' : List of floats
|
|
642
|
+
'Samples' : List of input values by groups
|
|
641
643
|
(taken from the input)
|
|
644
|
+
'Posthoc_Matrix' : 2D List of floats
|
|
645
|
+
'Posthoc_Matrix_bool' : 2D List of Boolean
|
|
646
|
+
'Posthoc_Matrix_printed': 2D List of String
|
|
647
|
+
'Posthoc_Matrix_stars': 2D List of String
|
|
642
648
|
}
|
|
643
649
|
```
|
|
644
650
|
If errors occured, *GetResult()* returns an empty dictionary
|
|
@@ -653,7 +659,7 @@ If errors occured, *GetResult()* returns an empty dictionary
|
|
|
653
659
|
|
|
654
660
|
### TODO:
|
|
655
661
|
|
|
656
|
-
--
|
|
662
|
+
-- Anova: posthocs
|
|
657
663
|
-- Anova: add 2-way anova and 3-way anova
|
|
658
664
|
-- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
|
|
659
665
|
-- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
|
|
@@ -666,10 +672,11 @@ If errors occured, *GetResult()* returns an empty dictionary
|
|
|
666
672
|
-- add QQ plot
|
|
667
673
|
-- n-sample tests: add onetail option
|
|
668
674
|
|
|
669
|
-
✅ done -- detailed normality test results
|
|
675
|
+
✅ done -- detailed normality test results
|
|
676
|
+
✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
|
|
670
677
|
|
|
671
678
|
|
|
672
|
-
|
|
679
|
+
tests check:
|
|
673
680
|
1-sample:
|
|
674
681
|
--Wilcoxon 2,1 tails - ok
|
|
675
682
|
--t-tests 2,1 tails -ok
|
|
@@ -681,6 +688,7 @@ checked tests:
|
|
|
681
688
|
|
|
682
689
|
n-sample:
|
|
683
690
|
--Kruskal-Wallis 2 tail - ok
|
|
691
|
+
--Dunn's multiple comparisons - ??
|
|
684
692
|
--Friedman 2 tail - ok
|
|
685
693
|
--one-way ANOWA 2 tail - ok
|
|
686
694
|
|
|
@@ -84,26 +84,30 @@ results = analysis.GetResult()
|
|
|
84
84
|
The results dictionary keys with representing value types:
|
|
85
85
|
```
|
|
86
86
|
{
|
|
87
|
-
'p-value':
|
|
88
|
-
'Significance(p<0.05)':
|
|
89
|
-
'Stars_Printed':
|
|
90
|
-
'Test_Name':
|
|
91
|
-
'Groups_Compared':
|
|
92
|
-
'Population_Mean':
|
|
93
|
-
'Data_Normaly_Distributed':
|
|
94
|
-
'Parametric_Test_Applied':
|
|
95
|
-
'Paired_Test_Applied':
|
|
96
|
-
'Tails':
|
|
97
|
-
'p-value_exact':
|
|
98
|
-
'Stars':
|
|
99
|
-
'Warnings':
|
|
100
|
-
'Groups_N':
|
|
101
|
-
'Groups_Median':
|
|
102
|
-
'Groups_Mean':
|
|
103
|
-
'Groups_SD':
|
|
104
|
-
'Groups_SE':
|
|
105
|
-
'Samples':
|
|
87
|
+
'p-value' : String
|
|
88
|
+
'Significance(p<0.05)' : Boolean
|
|
89
|
+
'Stars_Printed' : String
|
|
90
|
+
'Test_Name' : String
|
|
91
|
+
'Groups_Compared' : Integer
|
|
92
|
+
'Population_Mean' : Float (taken from the input)
|
|
93
|
+
'Data_Normaly_Distributed' : Boolean
|
|
94
|
+
'Parametric_Test_Applied' : Boolean
|
|
95
|
+
'Paired_Test_Applied' : Boolean
|
|
96
|
+
'Tails' : Integer (taken from the input)
|
|
97
|
+
'p-value_exact' : Float
|
|
98
|
+
'Stars' : Integer
|
|
99
|
+
'Warnings' : String
|
|
100
|
+
'Groups_N' : List of integers
|
|
101
|
+
'Groups_Median' : List of floats
|
|
102
|
+
'Groups_Mean' : List of floats
|
|
103
|
+
'Groups_SD' : List of floats
|
|
104
|
+
'Groups_SE' : List of floats
|
|
105
|
+
'Samples' : List of input values by groups
|
|
106
106
|
(taken from the input)
|
|
107
|
+
'Posthoc_Matrix' : 2D List of floats
|
|
108
|
+
'Posthoc_Matrix_bool' : 2D List of Boolean
|
|
109
|
+
'Posthoc_Matrix_printed': 2D List of String
|
|
110
|
+
'Posthoc_Matrix_stars': 2D List of String
|
|
107
111
|
}
|
|
108
112
|
```
|
|
109
113
|
If errors occured, *GetResult()* returns an empty dictionary
|
|
@@ -118,7 +122,7 @@ If errors occured, *GetResult()* returns an empty dictionary
|
|
|
118
122
|
|
|
119
123
|
### TODO:
|
|
120
124
|
|
|
121
|
-
--
|
|
125
|
+
-- Anova: posthocs
|
|
122
126
|
-- Anova: add 2-way anova and 3-way anova
|
|
123
127
|
-- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
|
|
124
128
|
-- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
|
|
@@ -131,10 +135,11 @@ If errors occured, *GetResult()* returns an empty dictionary
|
|
|
131
135
|
-- add QQ plot
|
|
132
136
|
-- n-sample tests: add onetail option
|
|
133
137
|
|
|
134
|
-
✅ done -- detailed normality test results
|
|
138
|
+
✅ done -- detailed normality test results
|
|
139
|
+
✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
|
|
135
140
|
|
|
136
141
|
|
|
137
|
-
|
|
142
|
+
tests check:
|
|
138
143
|
1-sample:
|
|
139
144
|
--Wilcoxon 2,1 tails - ok
|
|
140
145
|
--t-tests 2,1 tails -ok
|
|
@@ -146,6 +151,7 @@ checked tests:
|
|
|
146
151
|
|
|
147
152
|
n-sample:
|
|
148
153
|
--Kruskal-Wallis 2 tail - ok
|
|
154
|
+
--Dunn's multiple comparisons - ??
|
|
149
155
|
--Friedman 2 tail - ok
|
|
150
156
|
--one-way ANOWA 2 tail - ok
|
|
151
157
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = ["setuptools>=62.6", "numpy", "scipy", "statsmodels"]
|
|
2
|
+
requires = ["setuptools>=62.6", "numpy", "scipy", "statsmodels", "scikit-posthocs"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
|
4
4
|
|
|
5
5
|
[project]
|
|
@@ -21,7 +21,7 @@ classifiers = [
|
|
|
21
21
|
"Programming Language :: Python :: 3.12",
|
|
22
22
|
"License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)",
|
|
23
23
|
"Operating System :: OS Independent",
|
|
24
|
-
"Development Status ::
|
|
24
|
+
"Development Status :: 4 - Beta",
|
|
25
25
|
"Intended Audience :: Developers",
|
|
26
26
|
"Intended Audience :: Science/Research",
|
|
27
27
|
"Natural Language :: English",
|
|
@@ -32,8 +32,9 @@ classifiers = [
|
|
|
32
32
|
]
|
|
33
33
|
|
|
34
34
|
[project.urls]
|
|
35
|
-
Homepage = "https://github.com/konung-yaropolk/
|
|
36
|
-
|
|
35
|
+
Homepage = "https://github.com/konung-yaropolk/AutoStatLib"
|
|
36
|
+
Repository = "https://github.com/konung-yaropolk/AutoStatLib.git"
|
|
37
|
+
Issues = "https://github.com/konung-yaropolk/AutoStatLib/issues"
|
|
37
38
|
|
|
38
39
|
[tool.setuptools.dynamic]
|
|
39
40
|
version = {attr = "AutoStatLib.__version__"}
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
from AutoStatLib.statistical_tests import StatisticalTests
|
|
2
|
+
from AutoStatLib.normality_tests import NormalityTests
|
|
3
|
+
from AutoStatLib.helpers import Helpers
|
|
4
|
+
from AutoStatLib.text_formatting import TextFormatting
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Helpers):
|
|
8
|
+
'''
|
|
9
|
+
The main class
|
|
10
|
+
*documentation placeholder*
|
|
11
|
+
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
def __init__(self,
|
|
15
|
+
groups_list,
|
|
16
|
+
paired=False,
|
|
17
|
+
tails=2,
|
|
18
|
+
popmean=None,
|
|
19
|
+
posthoc=True,
|
|
20
|
+
verbose=True):
|
|
21
|
+
self.results = None
|
|
22
|
+
self.error = False
|
|
23
|
+
self.groups_list = groups_list
|
|
24
|
+
self.paired = paired
|
|
25
|
+
self.tails = tails
|
|
26
|
+
self.popmean = popmean
|
|
27
|
+
self.posthoc = posthoc
|
|
28
|
+
self.verbose = verbose
|
|
29
|
+
self.n_groups = len(self.groups_list)
|
|
30
|
+
self.warning_flag_non_numeric_data = False
|
|
31
|
+
self.summary = ''
|
|
32
|
+
|
|
33
|
+
# test IDs classification:
|
|
34
|
+
self.test_ids_all = [ # in aplhabetical order
|
|
35
|
+
'anova_1w_ordinary',
|
|
36
|
+
'anova_1w_rm',
|
|
37
|
+
'friedman',
|
|
38
|
+
'kruskal_wallis',
|
|
39
|
+
'mann_whitney',
|
|
40
|
+
't_test_independent',
|
|
41
|
+
't_test_paired',
|
|
42
|
+
't_test_single_sample',
|
|
43
|
+
'wilcoxon',
|
|
44
|
+
'wilcoxon_single_sample',
|
|
45
|
+
]
|
|
46
|
+
self.test_ids_parametric = [
|
|
47
|
+
'anova_1w_ordinary',
|
|
48
|
+
'anova_1w_rm'
|
|
49
|
+
't_test_independent',
|
|
50
|
+
't_test_paired',
|
|
51
|
+
't_test_single_sample',
|
|
52
|
+
]
|
|
53
|
+
self.test_ids_dependent = [
|
|
54
|
+
'anova_1w_rm',
|
|
55
|
+
'friedman',
|
|
56
|
+
't_test_paired',
|
|
57
|
+
'wilcoxon',
|
|
58
|
+
]
|
|
59
|
+
self.test_ids_3sample = [
|
|
60
|
+
'anova_1w_ordinary',
|
|
61
|
+
'anova_1w_rm',
|
|
62
|
+
'friedman',
|
|
63
|
+
'kruskal_wallis',
|
|
64
|
+
]
|
|
65
|
+
self.test_ids_2sample = [
|
|
66
|
+
'mann_whitney',
|
|
67
|
+
't_test_independent',
|
|
68
|
+
't_test_paired',
|
|
69
|
+
'wilcoxon',
|
|
70
|
+
]
|
|
71
|
+
self.test_ids_1sample = [
|
|
72
|
+
't_test_single_sample',
|
|
73
|
+
'wilcoxon_single_sample',
|
|
74
|
+
]
|
|
75
|
+
self.warning_ids_all = {
|
|
76
|
+
# 'not-numeric': '\nWarning: Non-numeric data was found in input and ignored.\n Make sure the input data is correct to get the correct results\n',
|
|
77
|
+
'param_test_with_non-normal_data': '\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n The results might be skewed. \n Please, run non-parametric test or preform automatic test selection.\n',
|
|
78
|
+
'non-param_test_with_normal_data': '\nWarning: Non-Parametric test was manualy chosen for Normaly distributed data.\n The results might be skewed. \n Please, run parametric test or preform automatic test selection.\n',
|
|
79
|
+
'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
def run_test(self, test='auto'):
|
|
83
|
+
|
|
84
|
+
# reset values from previous tests
|
|
85
|
+
self.results = None
|
|
86
|
+
self.error = False
|
|
87
|
+
self.warnings = []
|
|
88
|
+
self.normals = []
|
|
89
|
+
self.test_name = None
|
|
90
|
+
self.test_id = None
|
|
91
|
+
self.test_stat = None
|
|
92
|
+
self.p_value = None
|
|
93
|
+
self.posthoc_matrix_df = None
|
|
94
|
+
self.posthoc_matrix = []
|
|
95
|
+
self.posthoc_name = None
|
|
96
|
+
|
|
97
|
+
self.log('\n' + '-'*67)
|
|
98
|
+
self.log('Statistical analysis __init__iated for data in {} groups\n'.format(
|
|
99
|
+
len(self.groups_list)))
|
|
100
|
+
|
|
101
|
+
# adjusting input data type
|
|
102
|
+
self.data = self.floatify_recursive(self.groups_list)
|
|
103
|
+
if self.warning_flag_non_numeric_data:
|
|
104
|
+
self.log(
|
|
105
|
+
'Text or other non-numeric data in the input was ignored:')
|
|
106
|
+
|
|
107
|
+
# delete the empty cols from input
|
|
108
|
+
self.data = [col for col in self.data if any(
|
|
109
|
+
x is not None for x in col)]
|
|
110
|
+
|
|
111
|
+
# User input assertion block
|
|
112
|
+
try:
|
|
113
|
+
assert self.data, 'There is no input data'
|
|
114
|
+
assert self.tails in [1, 2], 'Tails parameter can be 1 or 2 only'
|
|
115
|
+
assert test in self.test_ids_all or test == 'auto', 'Wrong test id choosen, ensure you called correct function'
|
|
116
|
+
assert all(len(
|
|
117
|
+
group) >= 4 for group in self.data), 'Each group must contain at least four values'
|
|
118
|
+
assert not (self.paired == True
|
|
119
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired groups must have the same length'
|
|
120
|
+
assert not (test in self.test_ids_dependent
|
|
121
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Groups must have the same length for dependent groups test'
|
|
122
|
+
assert not (test in self.test_ids_2sample
|
|
123
|
+
and self.n_groups != 2), f'Only two groups of data must be given for 2-groups tests, got {self.n_groups}'
|
|
124
|
+
assert not (test in self.test_ids_1sample
|
|
125
|
+
and self.n_groups > 1), f'Only one group of data must be given for single-group tests, got {self.n_groups}'
|
|
126
|
+
assert not (test in self.test_ids_3sample
|
|
127
|
+
and self.n_groups < 3), f'At least three groups of data must be given for multi-groups tests, got {self.n_groups}'
|
|
128
|
+
except AssertionError as error:
|
|
129
|
+
self.log('\nTest :', test)
|
|
130
|
+
self.log('Error :', error)
|
|
131
|
+
self.log('-'*67 + '\n')
|
|
132
|
+
self.error = True
|
|
133
|
+
print(self.summary)
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
# Print the data
|
|
137
|
+
self.print_groups()
|
|
138
|
+
|
|
139
|
+
# Normality tests
|
|
140
|
+
self.log(
|
|
141
|
+
'\n\nThe group is assumed to be normally distributed if at least one')
|
|
142
|
+
self.log(
|
|
143
|
+
'normality test result is positive. Normality checked by tests:')
|
|
144
|
+
self.log('Shapiro-Wilk, Lilliefors, Anderson-Darling, D\'Agostino-Pearson')
|
|
145
|
+
self.log(
|
|
146
|
+
'[+] -positive, [-] -negative, [ ] -too small group for the test\n')
|
|
147
|
+
self.log(' Test : SW LF AD AP ')
|
|
148
|
+
for i, data in enumerate(self.data):
|
|
149
|
+
poll = self.check_normality(data)
|
|
150
|
+
isnormal = any(poll)
|
|
151
|
+
poll_print = tuple(
|
|
152
|
+
'+' if x is True else '-' if x is False else ' ' if x is None else 'e' for x in poll)
|
|
153
|
+
self.normals.append(isnormal)
|
|
154
|
+
self.log(
|
|
155
|
+
f' Group {i+1}: {poll_print[0]} {poll_print[1]} {poll_print[2]} {poll_print[3]} so disrtibution seems {"normal" if isnormal else "not normal"}')
|
|
156
|
+
self.parametric = all(self.normals)
|
|
157
|
+
|
|
158
|
+
# print test choosen
|
|
159
|
+
self.log('\n\nInput:\n')
|
|
160
|
+
self.log('Data Normaly Distributed: ', self.parametric)
|
|
161
|
+
self.log('Paired Groups: ', self.paired)
|
|
162
|
+
self.log('Groups: ', self.n_groups)
|
|
163
|
+
self.log('Test chosen by user: ', test)
|
|
164
|
+
|
|
165
|
+
# Wrong test Warnings
|
|
166
|
+
if not test == 'auto' and not self.parametric and test in self.test_ids_parametric:
|
|
167
|
+
self.AddWarning('param_test_with_non-normal_data')
|
|
168
|
+
if not test == 'auto' and self.parametric and not test in self.test_ids_parametric:
|
|
169
|
+
self.AddWarning('non-param_test_with_normal_data')
|
|
170
|
+
|
|
171
|
+
# run the test
|
|
172
|
+
|
|
173
|
+
if test in self.test_ids_all:
|
|
174
|
+
self.run_test_by_id(test)
|
|
175
|
+
else:
|
|
176
|
+
self.run_test_auto()
|
|
177
|
+
|
|
178
|
+
# print the results
|
|
179
|
+
self.results = self.create_results_dict()
|
|
180
|
+
self.print_results()
|
|
181
|
+
self.log(
|
|
182
|
+
'\n\nResults above are accessible as a dictionary via GetResult() method')
|
|
183
|
+
self.log('-'*67 + '\n')
|
|
184
|
+
|
|
185
|
+
# print the results to console:
|
|
186
|
+
if self.verbose == True:
|
|
187
|
+
print(self.summary)
|
|
188
|
+
|
|
189
|
+
# public methods:
|
|
190
|
+
|
|
191
|
+
def RunAuto(self):
|
|
192
|
+
self.run_test(test='auto')
|
|
193
|
+
|
|
194
|
+
def RunManual(self, test):
|
|
195
|
+
self.run_test(test)
|
|
196
|
+
|
|
197
|
+
def RunOnewayAnova(self):
|
|
198
|
+
self.run_test(test='anova_1w_ordinary')
|
|
199
|
+
|
|
200
|
+
def RunOnewayAnovaRM(self):
|
|
201
|
+
self.run_test(test='anova_1w_rm')
|
|
202
|
+
|
|
203
|
+
def RunFriedman(self):
|
|
204
|
+
self.run_test(test='friedman')
|
|
205
|
+
|
|
206
|
+
def RunKruskalWallis(self):
|
|
207
|
+
self.run_test(test='kruskal_wallis')
|
|
208
|
+
|
|
209
|
+
def RunMannWhitney(self):
|
|
210
|
+
self.run_test(test='mann_whitney')
|
|
211
|
+
|
|
212
|
+
def RunTtest(self):
|
|
213
|
+
self.run_test(test='t_test_independent')
|
|
214
|
+
|
|
215
|
+
def RunTtestPaired(self):
|
|
216
|
+
self.run_test(test='t_test_paired')
|
|
217
|
+
|
|
218
|
+
def RunTtestSingleSample(self):
|
|
219
|
+
self.run_test(test='t_test_single_sample')
|
|
220
|
+
|
|
221
|
+
def RunWilcoxonSingleSample(self):
|
|
222
|
+
self.run_test(test='wilcoxon_single_sample')
|
|
223
|
+
|
|
224
|
+
def RunWilcoxon(self):
|
|
225
|
+
self.run_test(test='wilcoxon')
|
|
226
|
+
|
|
227
|
+
def GetResult(self):
|
|
228
|
+
if not self.results and not self.error:
|
|
229
|
+
print('No test chosen, no results to output')
|
|
230
|
+
# self.run_test(test='auto')
|
|
231
|
+
return self.results
|
|
232
|
+
if not self.results and self.error:
|
|
233
|
+
print('Error occured, no results to output')
|
|
234
|
+
return {}
|
|
235
|
+
else:
|
|
236
|
+
return self.results
|
|
237
|
+
|
|
238
|
+
def GetSummary(self):
|
|
239
|
+
if not self.results and not self.error:
|
|
240
|
+
print('No test chosen, no summary to output')
|
|
241
|
+
# self.run_test(test='auto')
|
|
242
|
+
return self.summary
|
|
243
|
+
else:
|
|
244
|
+
return self.summary
|
|
245
|
+
|
|
246
|
+
def GetTestIDs(self):
|
|
247
|
+
return self.test_ids_all
|
|
248
|
+
|
|
249
|
+
def PrintSummary(self):
|
|
250
|
+
print(self.summary)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
if __name__ == '__main__':
|
|
254
|
+
print('This package works as an imported module only.\nUse "import autostatlib" statement')
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.2.
|
|
2
|
+
__version__ = "0.2.6"
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Helpers():
|
|
6
|
+
|
|
7
|
+
def matrix_to_dataframe(self, matrix):
|
|
8
|
+
data = []
|
|
9
|
+
cols = []
|
|
10
|
+
rows = []
|
|
11
|
+
|
|
12
|
+
order_number = 1
|
|
13
|
+
for i, row in enumerate(matrix):
|
|
14
|
+
for j, value in enumerate(row):
|
|
15
|
+
data.append(value)
|
|
16
|
+
cols.append(i)
|
|
17
|
+
rows.append(j)
|
|
18
|
+
order_number += 1
|
|
19
|
+
|
|
20
|
+
df = pd.DataFrame(
|
|
21
|
+
{'Row': rows, 'Col': cols, 'Value': data})
|
|
22
|
+
return df
|
|
23
|
+
|
|
24
|
+
def floatify_recursive(self, data):
|
|
25
|
+
if isinstance(data, list):
|
|
26
|
+
# Recursively process sublists and filter out None values
|
|
27
|
+
processed_list = [self.floatify_recursive(item) for item in data]
|
|
28
|
+
return [item for item in processed_list if item is not None]
|
|
29
|
+
else:
|
|
30
|
+
try:
|
|
31
|
+
# Try to convert the item to float
|
|
32
|
+
return np.float64(data)
|
|
33
|
+
except (ValueError, TypeError):
|
|
34
|
+
# If conversion fails, replace with None
|
|
35
|
+
self.warning_flag_non_numeric_data = True
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
def create_results_dict(self) -> dict:
|
|
39
|
+
|
|
40
|
+
self.stars_int = self.make_stars(self.p_value.item())
|
|
41
|
+
self.stars_str = self.make_stars_printed(self.stars_int)
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
'p-value': self.make_p_value_printed(self.p_value.item()),
|
|
45
|
+
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
46
|
+
'Stars_Printed': self.stars_str,
|
|
47
|
+
'Test_Name': self.test_name,
|
|
48
|
+
'Groups_Compared': self.n_groups,
|
|
49
|
+
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
50
|
+
'Data_Normaly_Distributed': self.parametric,
|
|
51
|
+
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
52
|
+
'Paired_Test_Applied': self.paired,
|
|
53
|
+
'Tails': self.tails,
|
|
54
|
+
'p-value_exact': self.p_value.item(),
|
|
55
|
+
'Stars': self.stars_int,
|
|
56
|
+
# 'Stat_Value': self.test_stat.item(),
|
|
57
|
+
'Warnings': self.warnings,
|
|
58
|
+
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
59
|
+
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
60
|
+
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
61
|
+
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
62
|
+
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
|
|
63
|
+
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
64
|
+
'Samples': self.data,
|
|
65
|
+
'Posthoc_Tests_Name': self.posthoc_name if self.posthoc_name is not None else '',
|
|
66
|
+
'Posthoc_Matrix': self.posthoc_matrix if self.posthoc_matrix else [],
|
|
67
|
+
'Posthoc_Matrix_bool': [[bool(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
|
|
68
|
+
'Posthoc_Matrix_printed': [[self.make_p_value_printed(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
|
|
69
|
+
'Posthoc_Matrix_stars': [[self.make_stars_printed(self.make_stars(element)) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
def log(self, *args, **kwargs):
|
|
73
|
+
message = ' '.join(map(str, args))
|
|
74
|
+
# print(message, **kwargs)
|
|
75
|
+
self.summary += '\n' + message
|
|
76
|
+
|
|
77
|
+
def AddWarning(self, warning_id):
|
|
78
|
+
message = self.warning_ids_all[warning_id]
|
|
79
|
+
self.log(message)
|
|
80
|
+
self.warnings.append(message)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from statsmodels.stats.diagnostic import lilliefors
|
|
2
|
+
from scipy.stats import shapiro, normaltest, anderson
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class NormalityTests():
|
|
6
|
+
'''
|
|
7
|
+
Normality tests mixin
|
|
8
|
+
|
|
9
|
+
see the article about minimal sample size for tests:
|
|
10
|
+
Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
|
|
11
|
+
Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
def check_normality(self, data):
|
|
15
|
+
sw = None
|
|
16
|
+
lf = None
|
|
17
|
+
ad = None
|
|
18
|
+
ap = None
|
|
19
|
+
n = len(data)
|
|
20
|
+
|
|
21
|
+
# Shapiro-Wilk test
|
|
22
|
+
sw_stat, sw_p_value = shapiro(data)
|
|
23
|
+
if sw_p_value and sw_p_value > 0.05:
|
|
24
|
+
sw = True
|
|
25
|
+
else:
|
|
26
|
+
sw = False
|
|
27
|
+
|
|
28
|
+
# Lilliefors test
|
|
29
|
+
lf_stat, lf_p_value = lilliefors(
|
|
30
|
+
data, dist='norm')
|
|
31
|
+
if lf_p_value and lf_p_value > 0.05:
|
|
32
|
+
lf = True
|
|
33
|
+
else:
|
|
34
|
+
lf = False
|
|
35
|
+
|
|
36
|
+
# Anderson-Darling test
|
|
37
|
+
if n >= 20:
|
|
38
|
+
ad_stat, ad_p_value = self.anderson_get_p(
|
|
39
|
+
data, dist='norm')
|
|
40
|
+
if ad_p_value and ad_p_value > 0.05:
|
|
41
|
+
ad = True
|
|
42
|
+
else:
|
|
43
|
+
ad = False
|
|
44
|
+
|
|
45
|
+
# D'Agostino-Pearson test
|
|
46
|
+
# test result is skewed if n<20
|
|
47
|
+
if n >= 20:
|
|
48
|
+
ap_stat, ap_p_value = normaltest(data)
|
|
49
|
+
if ap_p_value and ap_p_value > 0.05:
|
|
50
|
+
ap = True
|
|
51
|
+
else:
|
|
52
|
+
ap = False
|
|
53
|
+
|
|
54
|
+
return (sw, lf, ad, ap)
|
|
55
|
+
|
|
56
|
+
def anderson_get_p(self, data, dist='norm'):
|
|
57
|
+
'''
|
|
58
|
+
calculating p-value for Anderson-Darling test using the method described here:
|
|
59
|
+
Computation of Probability Associated with Anderson-Darling Statistic
|
|
60
|
+
Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
|
|
61
|
+
|
|
62
|
+
'''
|
|
63
|
+
e = 2.718281828459045
|
|
64
|
+
n = len(data)
|
|
65
|
+
|
|
66
|
+
ad, critical_values, significance_levels = anderson(
|
|
67
|
+
data, dist=dist)
|
|
68
|
+
|
|
69
|
+
# adjust ad_stat for small sample sizes:
|
|
70
|
+
s = ad*(1 + 0.75/n + 2.25/(n**2))
|
|
71
|
+
|
|
72
|
+
if s >= 0.6:
|
|
73
|
+
p = e**(1.2937 - 5.709*s + 0.0186*s**2)
|
|
74
|
+
elif s > 0.34:
|
|
75
|
+
p = e**(0.9177 - 4.279*s - 1.38*s**2)
|
|
76
|
+
elif s > 0.2:
|
|
77
|
+
p = 1 - e**(-8.318 + 42.796*s - 59.938*s**2)
|
|
78
|
+
elif s <= 0.2:
|
|
79
|
+
p = 1 - e**(-13.436 + 101.14*s - 223.73*s**2)
|
|
80
|
+
else:
|
|
81
|
+
p = None
|
|
82
|
+
|
|
83
|
+
return ad, p
|