AutoStatLib 0.2.21__tar.gz → 0.2.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autostatlib-0.2.22/PKG-INFO +192 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/pyproject.toml +4 -3
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib/AutoStatLib.py +20 -12
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib/StatPlots.py +1 -1
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib/_version.py +1 -1
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib/helpers.py +9 -7
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib/statistical_tests.py +2 -0
- autostatlib-0.2.22/src/AutoStatLib.egg-info/PKG-INFO +192 -0
- autostatlib-0.2.21/PKG-INFO +0 -697
- autostatlib-0.2.21/src/AutoStatLib.egg-info/PKG-INFO +0 -697
- {autostatlib-0.2.21 → autostatlib-0.2.22}/LICENSE +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/MANIFEST.in +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/README.md +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/requirements.txt +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/setup.cfg +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib/__init__.py +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib/__main__.py +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib/normality_tests.py +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib/text_formatting.py +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib.egg-info/SOURCES.txt +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib.egg-info/requires.txt +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.22}/src/AutoStatLib.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: AutoStatLib
|
|
3
|
+
Version: 0.2.22
|
|
4
|
+
Summary: AutoStatLib - a simple statistical analysis tool
|
|
5
|
+
Author: Stemonitis, SciWare LLC
|
|
6
|
+
Author-email: konung-yaropolk <yaropolk1995@gmail.com>
|
|
7
|
+
License-Expression: LGPL-2.1-or-later
|
|
8
|
+
Project-URL: Homepage, https://github.com/konung-yaropolk/AutoStatLib
|
|
9
|
+
Project-URL: Repository, https://github.com/konung-yaropolk/AutoStatLib.git
|
|
10
|
+
Project-URL: Issues, https://github.com/konung-yaropolk/AutoStatLib/issues
|
|
11
|
+
Keywords: Science,Statistics
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Development Status :: 4 - Beta
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Intended Audience :: Science/Research
|
|
19
|
+
Classifier: Natural Language :: English
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: numpy
|
|
28
|
+
Requires-Dist: scipy
|
|
29
|
+
Requires-Dist: statsmodels
|
|
30
|
+
Requires-Dist: matplotlib
|
|
31
|
+
Requires-Dist: seaborn
|
|
32
|
+
Requires-Dist: scikit-posthocs
|
|
33
|
+
Requires-Dist: pandas
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# AutoStatLib - python library for automated statistical analysis
|
|
37
|
+
|
|
38
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
39
|
+
[](https://github.com/konung-yaropolk/AutoStatLib)
|
|
40
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
41
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
42
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
43
|
+
|
|
44
|
+
### To install run the command:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install autostatlib
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Example use case:
|
|
51
|
+
|
|
52
|
+
See the /demo directory on Git repo or
|
|
53
|
+
use the following example:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
import numpy as np
|
|
57
|
+
import AutoStatLib
|
|
58
|
+
|
|
59
|
+
# generate random data:
|
|
60
|
+
groups = 2
|
|
61
|
+
n = 30
|
|
62
|
+
|
|
63
|
+
# normal data
|
|
64
|
+
data_norm = [list(np.random.normal(.5*i + 4, abs(1-.2*i), n))
|
|
65
|
+
for i in range(groups)]
|
|
66
|
+
|
|
67
|
+
# non-normal data
|
|
68
|
+
data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# set the parameters:
|
|
72
|
+
paired = False # is groups dependent or not
|
|
73
|
+
tails = 2 # two-tailed or one-tailed result
|
|
74
|
+
popmean = 0 # population mean - only for single-sample tests needed
|
|
75
|
+
|
|
76
|
+
# initiate the analysis
|
|
77
|
+
analysis = AutoStatLib.StatisticalAnalysis(
|
|
78
|
+
data_norm, paired=paired, tails=tails, popmean=popmean)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
now you can preform automated statistical test selection:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
analysis.RunAuto()
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
or you can choose specific tests:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
# 2 groups independent:
|
|
91
|
+
analysis.RunTtest()
|
|
92
|
+
analysis.RunMannWhitney()
|
|
93
|
+
|
|
94
|
+
# 2 groups paired"
|
|
95
|
+
analysis.RunTtestPaired()
|
|
96
|
+
analysis.RunWilcoxon()
|
|
97
|
+
|
|
98
|
+
# 3 and more independed groups comparison:
|
|
99
|
+
analysis.RunOnewayAnova()
|
|
100
|
+
analysis.RunKruskalWallis()
|
|
101
|
+
|
|
102
|
+
# 3 and more depended groups comparison:
|
|
103
|
+
analysis.RunOnewayAnovaRM()
|
|
104
|
+
analysis.RunFriedman()
|
|
105
|
+
|
|
106
|
+
# single group tests"
|
|
107
|
+
analysis.RunTtestSingleSample()
|
|
108
|
+
analysis.RunWilcoxonSingleSample()
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Test summary will be printed to the console.
|
|
112
|
+
You can also get it as a python string via *GetSummary()* method.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
Test results are accessible as a dictionary via *GetResult()* method:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
results = analysis.GetResult()
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
The results dictionary keys with representing value types:
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
{
|
|
126
|
+
'p_value' : String
|
|
127
|
+
'Significance(p<0.05)' : Boolean
|
|
128
|
+
'Stars_Printed' : String
|
|
129
|
+
'Test_Name' : String
|
|
130
|
+
'Groups_Compared' : Integer
|
|
131
|
+
'Population_Mean' : Float (taken from the input)
|
|
132
|
+
'Data_Normaly_Distributed' : Boolean
|
|
133
|
+
'Parametric_Test_Applied' : Boolean
|
|
134
|
+
'Paired_Test_Applied' : Boolean
|
|
135
|
+
'Tails' : Integer (taken from the input)
|
|
136
|
+
'p_value_exact' : Float
|
|
137
|
+
'Stars' : Integer
|
|
138
|
+
'Warnings' : String
|
|
139
|
+
'Groups_N' : List of integers
|
|
140
|
+
'Groups_Median' : List of floats
|
|
141
|
+
'Groups_Mean' : List of floats
|
|
142
|
+
'Groups_SD' : List of floats
|
|
143
|
+
'Groups_SE' : List of floats
|
|
144
|
+
'Samples' : List of input values by groups
|
|
145
|
+
(taken from the input)
|
|
146
|
+
'Posthoc_Matrix' : 2D List of floats
|
|
147
|
+
'Posthoc_Matrix_bool' : 2D List of Boolean
|
|
148
|
+
'Posthoc_Matrix_printed': 2D List of String
|
|
149
|
+
'Posthoc_Matrix_stars': 2D List of String
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
If errors occured, *GetResult()* returns an empty dictionary
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Alpha dev status.
|
|
158
|
+
|
|
159
|
+
### TODO:
|
|
160
|
+
|
|
161
|
+
-- Anova: posthocs
|
|
162
|
+
-- Anova: add 2-way anova and 3-way anova
|
|
163
|
+
-- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
|
|
164
|
+
-- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
|
|
165
|
+
-- paired T-test: add ratio-paired t-test (ratios of paired values are consistent)
|
|
166
|
+
-- add Welch test (for norm data unequal variances)
|
|
167
|
+
-- add Kolmogorov-smirnov test (unpaired nonparametric 2 sample, compare cumulative distributions)
|
|
168
|
+
-- add independent t-test with Welch correction (do not assume equal SDs in groups)
|
|
169
|
+
-- add correlation test, correlation diagram
|
|
170
|
+
-- add linear regression, regression diagram
|
|
171
|
+
-- add QQ plot
|
|
172
|
+
-- n-sample tests: add onetail option
|
|
173
|
+
|
|
174
|
+
✅ done -- detailed normality test results
|
|
175
|
+
✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
|
|
176
|
+
|
|
177
|
+
tests check:
|
|
178
|
+
1-sample:
|
|
179
|
+
✅ok --Wilcoxon 2,1 tails
|
|
180
|
+
✅ok --t-tests 2,1 tails
|
|
181
|
+
|
|
182
|
+
2-sample:
|
|
183
|
+
✅ok --Wilcoxon 2,1 tails
|
|
184
|
+
✅ok --Mann-whitney 2,1 tails
|
|
185
|
+
✅ok --t-tests 2,1 tails
|
|
186
|
+
|
|
187
|
+
n-sample:
|
|
188
|
+
✅ok --Kruskal-Wallis 2 tail
|
|
189
|
+
✅ok --Dunn's multiple comparisons
|
|
190
|
+
✅ok --Friedman 2 tail
|
|
191
|
+
✅ok --one-way ANOVA 2-tailed
|
|
192
|
+
✅ok --Tukey`s multiple comparisons
|
|
@@ -5,7 +5,8 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "AutoStatLib"
|
|
7
7
|
dynamic = ["version", "dependencies"]
|
|
8
|
-
license =
|
|
8
|
+
license = "LGPL-2.1-or-later"
|
|
9
|
+
# license = {file = "LICENSE"}
|
|
9
10
|
authors = [
|
|
10
11
|
{ name="konung-yaropolk", email="yaropolk1995@gmail.com" },
|
|
11
12
|
{ name="Stemonitis"},
|
|
@@ -18,8 +19,8 @@ requires-python = ">=3.10"
|
|
|
18
19
|
classifiers = [
|
|
19
20
|
"Programming Language :: Python",
|
|
20
21
|
"Programming Language :: Python :: 3",
|
|
21
|
-
"Programming Language :: Python :: 3.
|
|
22
|
-
"License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
# "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)",
|
|
23
24
|
"Operating System :: OS Independent",
|
|
24
25
|
"Development Status :: 4 - Beta",
|
|
25
26
|
"Intended Audience :: Developers",
|
|
@@ -30,7 +30,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
30
30
|
self.verbose = verbose
|
|
31
31
|
self.n_groups = len(self.groups_list)
|
|
32
32
|
self.groups_name = [groups_name[i % len(groups_name)]
|
|
33
|
-
|
|
33
|
+
for i in range(self.n_groups)] if groups_name and groups_name != [''] else [f'Group {i+1}' for i in range(self.n_groups)]
|
|
34
34
|
|
|
35
35
|
self.warning_flag_non_numeric_data = False
|
|
36
36
|
self.summary = 'AutoStatLib v{}'.format(__version__)
|
|
@@ -99,7 +99,6 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
99
99
|
'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
|
|
100
100
|
}
|
|
101
101
|
|
|
102
|
-
|
|
103
102
|
def run_test(self, test='auto'):
|
|
104
103
|
|
|
105
104
|
# reset values from previous tests
|
|
@@ -111,6 +110,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
111
110
|
self.test_id = None
|
|
112
111
|
self.test_stat = None
|
|
113
112
|
self.p_value = None
|
|
113
|
+
self.parametric = None
|
|
114
114
|
self.posthoc_matrix_df = None
|
|
115
115
|
self.posthoc_matrix = []
|
|
116
116
|
self.posthoc_name = ''
|
|
@@ -128,7 +128,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
128
128
|
# delete the empty cols from input
|
|
129
129
|
self.data = [col for col in self.data if any(
|
|
130
130
|
x is not None for x in col)]
|
|
131
|
-
|
|
131
|
+
|
|
132
132
|
# User input assertion block
|
|
133
133
|
try:
|
|
134
134
|
assert self.data, 'There is no input data'
|
|
@@ -137,9 +137,9 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
137
137
|
assert all(len(
|
|
138
138
|
group) >= 4 for group in self.data), 'Each group must contain at least four values'
|
|
139
139
|
assert not (self.paired is True
|
|
140
|
-
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired
|
|
140
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired samples must have the same length'
|
|
141
141
|
assert not (test in self.test_ids_dependent
|
|
142
|
-
and not all(len(lst) == len(self.data[0]) for lst in self.data)), '
|
|
142
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Samples must have the same length for the dependend statistics test'
|
|
143
143
|
assert not (test in self.test_ids_2sample
|
|
144
144
|
and self.n_groups != 2), f'Only two groups of data must be given for 2-groups tests, got {self.n_groups}'
|
|
145
145
|
assert not (test in self.test_ids_1sample
|
|
@@ -147,11 +147,19 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
147
147
|
assert not (test in self.test_ids_3sample
|
|
148
148
|
and self.n_groups < 3), f'At least three groups of data must be given for multi-groups tests, got {self.n_groups}'
|
|
149
149
|
except AssertionError as error:
|
|
150
|
-
self.
|
|
151
|
-
self.
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
150
|
+
self.run_test_by_id('none')
|
|
151
|
+
self.results = self.create_results_dict()
|
|
152
|
+
|
|
153
|
+
# Print errmessage:
|
|
154
|
+
if self.verbose:
|
|
155
|
+
self.log('\nTest :', test)
|
|
156
|
+
self.log('Error :', error)
|
|
157
|
+
self.log('-'*67 + '\n')
|
|
158
|
+
self.error = True
|
|
159
|
+
print(self.summary)
|
|
160
|
+
else:
|
|
161
|
+
print('AutoStatLib Error :', error)
|
|
162
|
+
|
|
155
163
|
return
|
|
156
164
|
|
|
157
165
|
# Print the data
|
|
@@ -165,7 +173,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
165
173
|
self.log('Shapiro-Wilk, Lilliefors, Anderson-Darling, D\'Agostino-Pearson')
|
|
166
174
|
self.log(
|
|
167
175
|
'[+] -positive, [-] -negative, [ ] -too small group for the test\n')
|
|
168
|
-
self.log('
|
|
176
|
+
self.log(' SW LF AD AP ')
|
|
169
177
|
for i, data in enumerate(self.data):
|
|
170
178
|
poll = self.check_normality(data)
|
|
171
179
|
isnormal = any(poll)
|
|
@@ -173,7 +181,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
173
181
|
'+' if x is True else '-' if x is False else ' ' if x is None else 'e' for x in poll)
|
|
174
182
|
self.normals.append(isnormal)
|
|
175
183
|
self.log(
|
|
176
|
-
f'
|
|
184
|
+
f' {self.groups_name[i].ljust(11, ' ')[:11]}: {poll_print[0]} {poll_print[1]} {poll_print[2]} {poll_print[3]} so disrtibution seems {"normal" if isnormal else "not normal"}')
|
|
177
185
|
self.parametric = all(self.normals)
|
|
178
186
|
|
|
179
187
|
# print test choosen
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.2.
|
|
2
|
+
__version__ = "0.2.22"
|
|
@@ -47,21 +47,23 @@ class Helpers():
|
|
|
47
47
|
|
|
48
48
|
def create_results_dict(self) -> dict:
|
|
49
49
|
|
|
50
|
-
self.stars_int = self.make_stars(
|
|
51
|
-
|
|
50
|
+
self.stars_int = self.make_stars(
|
|
51
|
+
self.p_value.item()) if self.p_value else None
|
|
52
|
+
self.stars_str = self.make_stars_printed(
|
|
53
|
+
self.stars_int) if self.p_value else ''
|
|
52
54
|
|
|
53
55
|
return {
|
|
54
|
-
'p_value': self.make_p_value_printed(self.p_value.item()),
|
|
55
|
-
'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
|
|
56
|
+
'p_value': self.make_p_value_printed(self.p_value.item()) if self.p_value else None,
|
|
57
|
+
'Significance(p<0.05)': True if self.p_value and self.p_value.item() < 0.05 else False,
|
|
56
58
|
'Stars_Printed': self.stars_str,
|
|
57
59
|
'Test_Name': self.test_name,
|
|
58
60
|
'Groups_Compared': self.n_groups,
|
|
59
61
|
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
60
|
-
'Data_Normaly_Distributed': self.parametric,
|
|
62
|
+
'Data_Normaly_Distributed': self.parametric if self.p_value else None,
|
|
61
63
|
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
62
|
-
'Paired_Test_Applied': self.paired,
|
|
64
|
+
'Paired_Test_Applied': self.paired if self.p_value else None,
|
|
63
65
|
'Tails': self.tails,
|
|
64
|
-
'p_value_exact': self.p_value.item(),
|
|
66
|
+
'p_value_exact': self.p_value.item() if self.p_value else None,
|
|
65
67
|
'Stars': self.stars_int,
|
|
66
68
|
# 'Stat_Value': self.test_stat.item(),
|
|
67
69
|
'Warnings': self.warnings,
|
|
@@ -60,6 +60,7 @@ class StatisticalTests():
|
|
|
60
60
|
't_test_single_sample': 'Single-sample t-test',
|
|
61
61
|
'wilcoxon': 'Wilcoxon signed-rank test',
|
|
62
62
|
'wilcoxon_single_sample': 'Wilcoxon signed-rank test for single sample',
|
|
63
|
+
'none': 'No statictical tests preformed'
|
|
63
64
|
}
|
|
64
65
|
|
|
65
66
|
match test_id:
|
|
@@ -73,6 +74,7 @@ class StatisticalTests():
|
|
|
73
74
|
case 't_test_single_sample': stat, p_value = self.t_test_single_sample()
|
|
74
75
|
case 'wilcoxon': stat, p_value = self.wilcoxon()
|
|
75
76
|
case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
|
|
77
|
+
case 'none': stat, p_value = (None, None)
|
|
76
78
|
|
|
77
79
|
if test_id in self.test_ids_dependent:
|
|
78
80
|
self.paired = True
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: AutoStatLib
|
|
3
|
+
Version: 0.2.22
|
|
4
|
+
Summary: AutoStatLib - a simple statistical analysis tool
|
|
5
|
+
Author: Stemonitis, SciWare LLC
|
|
6
|
+
Author-email: konung-yaropolk <yaropolk1995@gmail.com>
|
|
7
|
+
License-Expression: LGPL-2.1-or-later
|
|
8
|
+
Project-URL: Homepage, https://github.com/konung-yaropolk/AutoStatLib
|
|
9
|
+
Project-URL: Repository, https://github.com/konung-yaropolk/AutoStatLib.git
|
|
10
|
+
Project-URL: Issues, https://github.com/konung-yaropolk/AutoStatLib/issues
|
|
11
|
+
Keywords: Science,Statistics
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Development Status :: 4 - Beta
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Intended Audience :: Science/Research
|
|
19
|
+
Classifier: Natural Language :: English
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: numpy
|
|
28
|
+
Requires-Dist: scipy
|
|
29
|
+
Requires-Dist: statsmodels
|
|
30
|
+
Requires-Dist: matplotlib
|
|
31
|
+
Requires-Dist: seaborn
|
|
32
|
+
Requires-Dist: scikit-posthocs
|
|
33
|
+
Requires-Dist: pandas
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# AutoStatLib - python library for automated statistical analysis
|
|
37
|
+
|
|
38
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
39
|
+
[](https://github.com/konung-yaropolk/AutoStatLib)
|
|
40
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
41
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
42
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
43
|
+
|
|
44
|
+
### To install run the command:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install autostatlib
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Example use case:
|
|
51
|
+
|
|
52
|
+
See the /demo directory on Git repo or
|
|
53
|
+
use the following example:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
import numpy as np
|
|
57
|
+
import AutoStatLib
|
|
58
|
+
|
|
59
|
+
# generate random data:
|
|
60
|
+
groups = 2
|
|
61
|
+
n = 30
|
|
62
|
+
|
|
63
|
+
# normal data
|
|
64
|
+
data_norm = [list(np.random.normal(.5*i + 4, abs(1-.2*i), n))
|
|
65
|
+
for i in range(groups)]
|
|
66
|
+
|
|
67
|
+
# non-normal data
|
|
68
|
+
data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# set the parameters:
|
|
72
|
+
paired = False # is groups dependent or not
|
|
73
|
+
tails = 2 # two-tailed or one-tailed result
|
|
74
|
+
popmean = 0 # population mean - only for single-sample tests needed
|
|
75
|
+
|
|
76
|
+
# initiate the analysis
|
|
77
|
+
analysis = AutoStatLib.StatisticalAnalysis(
|
|
78
|
+
data_norm, paired=paired, tails=tails, popmean=popmean)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
now you can preform automated statistical test selection:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
analysis.RunAuto()
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
or you can choose specific tests:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
# 2 groups independent:
|
|
91
|
+
analysis.RunTtest()
|
|
92
|
+
analysis.RunMannWhitney()
|
|
93
|
+
|
|
94
|
+
# 2 groups paired"
|
|
95
|
+
analysis.RunTtestPaired()
|
|
96
|
+
analysis.RunWilcoxon()
|
|
97
|
+
|
|
98
|
+
# 3 and more independed groups comparison:
|
|
99
|
+
analysis.RunOnewayAnova()
|
|
100
|
+
analysis.RunKruskalWallis()
|
|
101
|
+
|
|
102
|
+
# 3 and more depended groups comparison:
|
|
103
|
+
analysis.RunOnewayAnovaRM()
|
|
104
|
+
analysis.RunFriedman()
|
|
105
|
+
|
|
106
|
+
# single group tests"
|
|
107
|
+
analysis.RunTtestSingleSample()
|
|
108
|
+
analysis.RunWilcoxonSingleSample()
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Test summary will be printed to the console.
|
|
112
|
+
You can also get it as a python string via *GetSummary()* method.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
Test results are accessible as a dictionary via *GetResult()* method:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
results = analysis.GetResult()
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
The results dictionary keys with representing value types:
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
{
|
|
126
|
+
'p_value' : String
|
|
127
|
+
'Significance(p<0.05)' : Boolean
|
|
128
|
+
'Stars_Printed' : String
|
|
129
|
+
'Test_Name' : String
|
|
130
|
+
'Groups_Compared' : Integer
|
|
131
|
+
'Population_Mean' : Float (taken from the input)
|
|
132
|
+
'Data_Normaly_Distributed' : Boolean
|
|
133
|
+
'Parametric_Test_Applied' : Boolean
|
|
134
|
+
'Paired_Test_Applied' : Boolean
|
|
135
|
+
'Tails' : Integer (taken from the input)
|
|
136
|
+
'p_value_exact' : Float
|
|
137
|
+
'Stars' : Integer
|
|
138
|
+
'Warnings' : String
|
|
139
|
+
'Groups_N' : List of integers
|
|
140
|
+
'Groups_Median' : List of floats
|
|
141
|
+
'Groups_Mean' : List of floats
|
|
142
|
+
'Groups_SD' : List of floats
|
|
143
|
+
'Groups_SE' : List of floats
|
|
144
|
+
'Samples' : List of input values by groups
|
|
145
|
+
(taken from the input)
|
|
146
|
+
'Posthoc_Matrix' : 2D List of floats
|
|
147
|
+
'Posthoc_Matrix_bool' : 2D List of Boolean
|
|
148
|
+
'Posthoc_Matrix_printed': 2D List of String
|
|
149
|
+
'Posthoc_Matrix_stars': 2D List of String
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
If errors occured, *GetResult()* returns an empty dictionary
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Alpha dev status.
|
|
158
|
+
|
|
159
|
+
### TODO:
|
|
160
|
+
|
|
161
|
+
-- Anova: posthocs
|
|
162
|
+
-- Anova: add 2-way anova and 3-way anova
|
|
163
|
+
-- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
|
|
164
|
+
-- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
|
|
165
|
+
-- paired T-test: add ratio-paired t-test (ratios of paired values are consistent)
|
|
166
|
+
-- add Welch test (for norm data unequal variances)
|
|
167
|
+
-- add Kolmogorov-smirnov test (unpaired nonparametric 2 sample, compare cumulative distributions)
|
|
168
|
+
-- add independent t-test with Welch correction (do not assume equal SDs in groups)
|
|
169
|
+
-- add correlation test, correlation diagram
|
|
170
|
+
-- add linear regression, regression diagram
|
|
171
|
+
-- add QQ plot
|
|
172
|
+
-- n-sample tests: add onetail option
|
|
173
|
+
|
|
174
|
+
✅ done -- detailed normality test results
|
|
175
|
+
✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
|
|
176
|
+
|
|
177
|
+
tests check:
|
|
178
|
+
1-sample:
|
|
179
|
+
✅ok --Wilcoxon 2,1 tails
|
|
180
|
+
✅ok --t-tests 2,1 tails
|
|
181
|
+
|
|
182
|
+
2-sample:
|
|
183
|
+
✅ok --Wilcoxon 2,1 tails
|
|
184
|
+
✅ok --Mann-whitney 2,1 tails
|
|
185
|
+
✅ok --t-tests 2,1 tails
|
|
186
|
+
|
|
187
|
+
n-sample:
|
|
188
|
+
✅ok --Kruskal-Wallis 2 tail
|
|
189
|
+
✅ok --Dunn's multiple comparisons
|
|
190
|
+
✅ok --Friedman 2 tail
|
|
191
|
+
✅ok --one-way ANOVA 2-tailed
|
|
192
|
+
✅ok --Tukey`s multiple comparisons
|