AutoStatLib 0.2.21__tar.gz → 0.2.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autostatlib-0.2.23/PKG-INFO +192 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/pyproject.toml +4 -3
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib/AutoStatLib.py +28 -14
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib/StatPlots.py +251 -71
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib/_version.py +1 -1
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib/helpers.py +10 -7
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib/statistical_tests.py +2 -0
- autostatlib-0.2.23/src/AutoStatLib.egg-info/PKG-INFO +192 -0
- autostatlib-0.2.21/PKG-INFO +0 -697
- autostatlib-0.2.21/src/AutoStatLib.egg-info/PKG-INFO +0 -697
- {autostatlib-0.2.21 → autostatlib-0.2.23}/LICENSE +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/MANIFEST.in +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/README.md +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/requirements.txt +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/setup.cfg +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib/__init__.py +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib/__main__.py +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib/normality_tests.py +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib/text_formatting.py +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib.egg-info/SOURCES.txt +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib.egg-info/requires.txt +0 -0
- {autostatlib-0.2.21 → autostatlib-0.2.23}/src/AutoStatLib.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: AutoStatLib
|
|
3
|
+
Version: 0.2.23
|
|
4
|
+
Summary: AutoStatLib - a simple statistical analysis tool
|
|
5
|
+
Author: Stemonitis, SciWare LLC
|
|
6
|
+
Author-email: konung-yaropolk <yaropolk1995@gmail.com>
|
|
7
|
+
License-Expression: LGPL-2.1-or-later
|
|
8
|
+
Project-URL: Homepage, https://github.com/konung-yaropolk/AutoStatLib
|
|
9
|
+
Project-URL: Repository, https://github.com/konung-yaropolk/AutoStatLib.git
|
|
10
|
+
Project-URL: Issues, https://github.com/konung-yaropolk/AutoStatLib/issues
|
|
11
|
+
Keywords: Science,Statistics
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Development Status :: 4 - Beta
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Intended Audience :: Science/Research
|
|
19
|
+
Classifier: Natural Language :: English
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: numpy
|
|
28
|
+
Requires-Dist: scipy
|
|
29
|
+
Requires-Dist: statsmodels
|
|
30
|
+
Requires-Dist: matplotlib
|
|
31
|
+
Requires-Dist: seaborn
|
|
32
|
+
Requires-Dist: scikit-posthocs
|
|
33
|
+
Requires-Dist: pandas
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# AutoStatLib - python library for automated statistical analysis
|
|
37
|
+
|
|
38
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
39
|
+
[](https://github.com/konung-yaropolk/AutoStatLib)
|
|
40
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
41
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
42
|
+
[](https://pypi.org/project/AutoStatLib)
|
|
43
|
+
|
|
44
|
+
### To install run the command:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install autostatlib
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Example use case:
|
|
51
|
+
|
|
52
|
+
See the /demo directory on Git repo or
|
|
53
|
+
use the following example:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
import numpy as np
|
|
57
|
+
import AutoStatLib
|
|
58
|
+
|
|
59
|
+
# generate random data:
|
|
60
|
+
groups = 2
|
|
61
|
+
n = 30
|
|
62
|
+
|
|
63
|
+
# normal data
|
|
64
|
+
data_norm = [list(np.random.normal(.5*i + 4, abs(1-.2*i), n))
|
|
65
|
+
for i in range(groups)]
|
|
66
|
+
|
|
67
|
+
# non-normal data
|
|
68
|
+
data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# set the parameters:
|
|
72
|
+
paired = False # is groups dependent or not
|
|
73
|
+
tails = 2 # two-tailed or one-tailed result
|
|
74
|
+
popmean = 0 # population mean - only for single-sample tests needed
|
|
75
|
+
|
|
76
|
+
# initiate the analysis
|
|
77
|
+
analysis = AutoStatLib.StatisticalAnalysis(
|
|
78
|
+
data_norm, paired=paired, tails=tails, popmean=popmean)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
now you can preform automated statistical test selection:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
analysis.RunAuto()
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
or you can choose specific tests:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
# 2 groups independent:
|
|
91
|
+
analysis.RunTtest()
|
|
92
|
+
analysis.RunMannWhitney()
|
|
93
|
+
|
|
94
|
+
# 2 groups paired"
|
|
95
|
+
analysis.RunTtestPaired()
|
|
96
|
+
analysis.RunWilcoxon()
|
|
97
|
+
|
|
98
|
+
# 3 and more independed groups comparison:
|
|
99
|
+
analysis.RunOnewayAnova()
|
|
100
|
+
analysis.RunKruskalWallis()
|
|
101
|
+
|
|
102
|
+
# 3 and more depended groups comparison:
|
|
103
|
+
analysis.RunOnewayAnovaRM()
|
|
104
|
+
analysis.RunFriedman()
|
|
105
|
+
|
|
106
|
+
# single group tests"
|
|
107
|
+
analysis.RunTtestSingleSample()
|
|
108
|
+
analysis.RunWilcoxonSingleSample()
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Test summary will be printed to the console.
|
|
112
|
+
You can also get it as a python string via *GetSummary()* method.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
Test results are accessible as a dictionary via *GetResult()* method:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
results = analysis.GetResult()
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
The results dictionary keys with representing value types:
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
{
|
|
126
|
+
'p_value' : String
|
|
127
|
+
'Significance(p<0.05)' : Boolean
|
|
128
|
+
'Stars_Printed' : String
|
|
129
|
+
'Test_Name' : String
|
|
130
|
+
'Groups_Compared' : Integer
|
|
131
|
+
'Population_Mean' : Float (taken from the input)
|
|
132
|
+
'Data_Normaly_Distributed' : Boolean
|
|
133
|
+
'Parametric_Test_Applied' : Boolean
|
|
134
|
+
'Paired_Test_Applied' : Boolean
|
|
135
|
+
'Tails' : Integer (taken from the input)
|
|
136
|
+
'p_value_exact' : Float
|
|
137
|
+
'Stars' : Integer
|
|
138
|
+
'Warnings' : String
|
|
139
|
+
'Groups_N' : List of integers
|
|
140
|
+
'Groups_Median' : List of floats
|
|
141
|
+
'Groups_Mean' : List of floats
|
|
142
|
+
'Groups_SD' : List of floats
|
|
143
|
+
'Groups_SE' : List of floats
|
|
144
|
+
'Samples' : List of input values by groups
|
|
145
|
+
(taken from the input)
|
|
146
|
+
'Posthoc_Matrix' : 2D List of floats
|
|
147
|
+
'Posthoc_Matrix_bool' : 2D List of Boolean
|
|
148
|
+
'Posthoc_Matrix_printed': 2D List of String
|
|
149
|
+
'Posthoc_Matrix_stars': 2D List of String
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
If errors occured, *GetResult()* returns an empty dictionary
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Alpha dev status.
|
|
158
|
+
|
|
159
|
+
### TODO:
|
|
160
|
+
|
|
161
|
+
-- Anova: posthocs
|
|
162
|
+
-- Anova: add 2-way anova and 3-way anova
|
|
163
|
+
-- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
|
|
164
|
+
-- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
|
|
165
|
+
-- paired T-test: add ratio-paired t-test (ratios of paired values are consistent)
|
|
166
|
+
-- add Welch test (for norm data unequal variances)
|
|
167
|
+
-- add Kolmogorov-smirnov test (unpaired nonparametric 2 sample, compare cumulative distributions)
|
|
168
|
+
-- add independent t-test with Welch correction (do not assume equal SDs in groups)
|
|
169
|
+
-- add correlation test, correlation diagram
|
|
170
|
+
-- add linear regression, regression diagram
|
|
171
|
+
-- add QQ plot
|
|
172
|
+
-- n-sample tests: add onetail option
|
|
173
|
+
|
|
174
|
+
✅ done -- detailed normality test results
|
|
175
|
+
✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
|
|
176
|
+
|
|
177
|
+
tests check:
|
|
178
|
+
1-sample:
|
|
179
|
+
✅ok --Wilcoxon 2,1 tails
|
|
180
|
+
✅ok --t-tests 2,1 tails
|
|
181
|
+
|
|
182
|
+
2-sample:
|
|
183
|
+
✅ok --Wilcoxon 2,1 tails
|
|
184
|
+
✅ok --Mann-whitney 2,1 tails
|
|
185
|
+
✅ok --t-tests 2,1 tails
|
|
186
|
+
|
|
187
|
+
n-sample:
|
|
188
|
+
✅ok --Kruskal-Wallis 2 tail
|
|
189
|
+
✅ok --Dunn's multiple comparisons
|
|
190
|
+
✅ok --Friedman 2 tail
|
|
191
|
+
✅ok --one-way ANOVA 2-tailed
|
|
192
|
+
✅ok --Tukey`s multiple comparisons
|
|
@@ -5,7 +5,8 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "AutoStatLib"
|
|
7
7
|
dynamic = ["version", "dependencies"]
|
|
8
|
-
license =
|
|
8
|
+
license = "LGPL-2.1-or-later"
|
|
9
|
+
# license = {file = "LICENSE"}
|
|
9
10
|
authors = [
|
|
10
11
|
{ name="konung-yaropolk", email="yaropolk1995@gmail.com" },
|
|
11
12
|
{ name="Stemonitis"},
|
|
@@ -18,8 +19,8 @@ requires-python = ">=3.10"
|
|
|
18
19
|
classifiers = [
|
|
19
20
|
"Programming Language :: Python",
|
|
20
21
|
"Programming Language :: Python :: 3",
|
|
21
|
-
"Programming Language :: Python :: 3.
|
|
22
|
-
"License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
# "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)",
|
|
23
24
|
"Operating System :: OS Independent",
|
|
24
25
|
"Development Status :: 4 - Beta",
|
|
25
26
|
"Intended Audience :: Developers",
|
|
@@ -19,7 +19,9 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
19
19
|
popmean=None,
|
|
20
20
|
posthoc=False,
|
|
21
21
|
verbose=True,
|
|
22
|
-
|
|
22
|
+
raise_errors=False,
|
|
23
|
+
groups_name=[],
|
|
24
|
+
subgrouping=[]):
|
|
23
25
|
self.results = None
|
|
24
26
|
self.error = False
|
|
25
27
|
self.groups_list = groups_list
|
|
@@ -28,10 +30,11 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
28
30
|
self.popmean = popmean
|
|
29
31
|
self.posthoc = posthoc
|
|
30
32
|
self.verbose = verbose
|
|
33
|
+
self.raise_errors = raise_errors
|
|
31
34
|
self.n_groups = len(self.groups_list)
|
|
32
35
|
self.groups_name = [groups_name[i % len(groups_name)]
|
|
33
|
-
|
|
34
|
-
|
|
36
|
+
for i in range(self.n_groups)] if groups_name and groups_name != [''] else [f'Group {i+1}' for i in range(self.n_groups)]
|
|
37
|
+
self.subgrouping = subgrouping if subgrouping else [0]
|
|
35
38
|
self.warning_flag_non_numeric_data = False
|
|
36
39
|
self.summary = 'AutoStatLib v{}'.format(__version__)
|
|
37
40
|
|
|
@@ -99,7 +102,6 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
99
102
|
'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
|
|
100
103
|
}
|
|
101
104
|
|
|
102
|
-
|
|
103
105
|
def run_test(self, test='auto'):
|
|
104
106
|
|
|
105
107
|
# reset values from previous tests
|
|
@@ -111,6 +113,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
111
113
|
self.test_id = None
|
|
112
114
|
self.test_stat = None
|
|
113
115
|
self.p_value = None
|
|
116
|
+
self.parametric = None
|
|
114
117
|
self.posthoc_matrix_df = None
|
|
115
118
|
self.posthoc_matrix = []
|
|
116
119
|
self.posthoc_name = ''
|
|
@@ -128,7 +131,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
128
131
|
# delete the empty cols from input
|
|
129
132
|
self.data = [col for col in self.data if any(
|
|
130
133
|
x is not None for x in col)]
|
|
131
|
-
|
|
134
|
+
|
|
132
135
|
# User input assertion block
|
|
133
136
|
try:
|
|
134
137
|
assert self.data, 'There is no input data'
|
|
@@ -137,9 +140,9 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
137
140
|
assert all(len(
|
|
138
141
|
group) >= 4 for group in self.data), 'Each group must contain at least four values'
|
|
139
142
|
assert not (self.paired is True
|
|
140
|
-
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired
|
|
143
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired samples must have the same length'
|
|
141
144
|
assert not (test in self.test_ids_dependent
|
|
142
|
-
and not all(len(lst) == len(self.data[0]) for lst in self.data)), '
|
|
145
|
+
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Samples must have the same length for the dependend statistics test'
|
|
143
146
|
assert not (test in self.test_ids_2sample
|
|
144
147
|
and self.n_groups != 2), f'Only two groups of data must be given for 2-groups tests, got {self.n_groups}'
|
|
145
148
|
assert not (test in self.test_ids_1sample
|
|
@@ -147,11 +150,22 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
147
150
|
assert not (test in self.test_ids_3sample
|
|
148
151
|
and self.n_groups < 3), f'At least three groups of data must be given for multi-groups tests, got {self.n_groups}'
|
|
149
152
|
except AssertionError as error:
|
|
150
|
-
self.
|
|
151
|
-
self.
|
|
152
|
-
|
|
153
|
-
self.
|
|
154
|
-
|
|
153
|
+
self.run_test_by_id('none')
|
|
154
|
+
self.results = self.create_results_dict()
|
|
155
|
+
|
|
156
|
+
if self.raise_errors:
|
|
157
|
+
raise ValueError(error)
|
|
158
|
+
|
|
159
|
+
# Print errmessage:
|
|
160
|
+
if self.verbose:
|
|
161
|
+
self.log('\nTest :', test)
|
|
162
|
+
self.log('Error :', error)
|
|
163
|
+
self.log('-'*67 + '\n')
|
|
164
|
+
self.error = True
|
|
165
|
+
print(self.summary)
|
|
166
|
+
else:
|
|
167
|
+
print('AutoStatLib Error :', error)
|
|
168
|
+
|
|
155
169
|
return
|
|
156
170
|
|
|
157
171
|
# Print the data
|
|
@@ -165,7 +179,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
165
179
|
self.log('Shapiro-Wilk, Lilliefors, Anderson-Darling, D\'Agostino-Pearson')
|
|
166
180
|
self.log(
|
|
167
181
|
'[+] -positive, [-] -negative, [ ] -too small group for the test\n')
|
|
168
|
-
self.log('
|
|
182
|
+
self.log(' SW LF AD AP ')
|
|
169
183
|
for i, data in enumerate(self.data):
|
|
170
184
|
poll = self.check_normality(data)
|
|
171
185
|
isnormal = any(poll)
|
|
@@ -173,7 +187,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
173
187
|
'+' if x is True else '-' if x is False else ' ' if x is None else 'e' for x in poll)
|
|
174
188
|
self.normals.append(isnormal)
|
|
175
189
|
self.log(
|
|
176
|
-
f'
|
|
190
|
+
f' {self.groups_name[i].ljust(11, ' ')[:11]}: {poll_print[0]} {poll_print[1]} {poll_print[2]} {poll_print[3]} so disrtibution seems {"normal" if isnormal else "not normal"}')
|
|
177
191
|
self.parametric = all(self.normals)
|
|
178
192
|
|
|
179
193
|
# print test choosen
|