AutoStatLib 0.2.2__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AutoStatLib might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AutoStatLib
3
- Version: 0.2.2
3
+ Version: 0.2.6
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -509,15 +509,16 @@ License: GNU LESSER GENERAL PUBLIC LICENSE
509
509
 
510
510
  That's all there is to it!
511
511
 
512
- Project-URL: Homepage, https://github.com/konung-yaropolk/NPL
513
- Project-URL: Issues, https://github.com/konung-yaropolk/NPL/issues
512
+ Project-URL: Homepage, https://github.com/konung-yaropolk/AutoStatLib
513
+ Project-URL: Repository, https://github.com/konung-yaropolk/AutoStatLib.git
514
+ Project-URL: Issues, https://github.com/konung-yaropolk/AutoStatLib/issues
514
515
  Keywords: Science,Statistics
515
516
  Classifier: Programming Language :: Python
516
517
  Classifier: Programming Language :: Python :: 3
517
518
  Classifier: Programming Language :: Python :: 3.12
518
519
  Classifier: License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)
519
520
  Classifier: Operating System :: OS Independent
520
- Classifier: Development Status :: 2 - Pre-Alpha
521
+ Classifier: Development Status :: 4 - Beta
521
522
  Classifier: Intended Audience :: Developers
522
523
  Classifier: Intended Audience :: Science/Research
523
524
  Classifier: Natural Language :: English
@@ -531,6 +532,7 @@ License-File: LICENSE
531
532
  Requires-Dist: numpy
532
533
  Requires-Dist: scipy
533
534
  Requires-Dist: statsmodels
535
+ Requires-Dist: scikit-posthocs
534
536
  Requires-Dist: pandas
535
537
 
536
538
  # AutoStatLib - python library for automated statistical analysis
@@ -619,26 +621,30 @@ results = analysis.GetResult()
619
621
  The results dictionary keys with representing value types:
620
622
  ```
621
623
  {
622
- 'p-value': String
623
- 'Significance(p<0.05)': Boolean
624
- 'Stars_Printed': String
625
- 'Test_Name': String
626
- 'Groups_Compared': Integer
627
- 'Population_Mean': Float (taken from the input)
628
- 'Data_Normaly_Distributed': Boolean
629
- 'Parametric_Test_Applied': Boolean
630
- 'Paired_Test_Applied': Boolean
631
- 'Tails': Integer (taken from the input)
632
- 'p-value_exact': Float
633
- 'Stars': Integer
634
- 'Warnings': String
635
- 'Groups_N': List of integers
636
- 'Groups_Median': List of floats
637
- 'Groups_Mean': List of floats
638
- 'Groups_SD': List of floats
639
- 'Groups_SE': List of floats
640
- 'Samples': List of input values by groups
624
+ 'p-value' : String
625
+ 'Significance(p<0.05)' : Boolean
626
+ 'Stars_Printed' : String
627
+ 'Test_Name' : String
628
+ 'Groups_Compared' : Integer
629
+ 'Population_Mean' : Float (taken from the input)
630
+ 'Data_Normaly_Distributed' : Boolean
631
+ 'Parametric_Test_Applied' : Boolean
632
+ 'Paired_Test_Applied' : Boolean
633
+ 'Tails' : Integer (taken from the input)
634
+ 'p-value_exact' : Float
635
+ 'Stars' : Integer
636
+ 'Warnings' : String
637
+ 'Groups_N' : List of integers
638
+ 'Groups_Median' : List of floats
639
+ 'Groups_Mean' : List of floats
640
+ 'Groups_SD' : List of floats
641
+ 'Groups_SE' : List of floats
642
+ 'Samples' : List of input values by groups
641
643
  (taken from the input)
644
+ 'Posthoc_Matrix' : 2D List of floats
645
+ 'Posthoc_Matrix_bool' : 2D List of Boolean
646
+ 'Posthoc_Matrix_printed': 2D List of String
647
+ 'Posthoc_Matrix_stars': 2D List of String
642
648
  }
643
649
  ```
644
650
  If errors occured, *GetResult()* returns an empty dictionary
@@ -653,7 +659,7 @@ If errors occured, *GetResult()* returns an empty dictionary
653
659
 
654
660
  ### TODO:
655
661
 
656
- -- Kruskal-Wallis test - add Dunn's multiple comparisons
662
+ -- Anova: posthocs
657
663
  -- Anova: add 2-way anova and 3-way anova
658
664
  -- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
659
665
  -- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
@@ -666,10 +672,11 @@ If errors occured, *GetResult()* returns an empty dictionary
666
672
  -- add QQ plot
667
673
  -- n-sample tests: add onetail option
668
674
 
669
- ✅ done -- detailed normality test results
675
+ ✅ done -- detailed normality test results
676
+ ✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
670
677
 
671
678
 
672
- checked tests:
679
+ tests check:
673
680
  1-sample:
674
681
  --Wilcoxon 2,1 tails - ok
675
682
  --t-tests 2,1 tails -ok
@@ -681,6 +688,7 @@ checked tests:
681
688
 
682
689
  n-sample:
683
690
  --Kruskal-Wallis 2 tail - ok
691
+ --Dunn's multiple comparisons - ??
684
692
  --Friedman 2 tail - ok
685
693
  --one-way ANOWA 2 tail - ok
686
694
 
@@ -84,26 +84,30 @@ results = analysis.GetResult()
84
84
  The results dictionary keys with representing value types:
85
85
  ```
86
86
  {
87
- 'p-value': String
88
- 'Significance(p<0.05)': Boolean
89
- 'Stars_Printed': String
90
- 'Test_Name': String
91
- 'Groups_Compared': Integer
92
- 'Population_Mean': Float (taken from the input)
93
- 'Data_Normaly_Distributed': Boolean
94
- 'Parametric_Test_Applied': Boolean
95
- 'Paired_Test_Applied': Boolean
96
- 'Tails': Integer (taken from the input)
97
- 'p-value_exact': Float
98
- 'Stars': Integer
99
- 'Warnings': String
100
- 'Groups_N': List of integers
101
- 'Groups_Median': List of floats
102
- 'Groups_Mean': List of floats
103
- 'Groups_SD': List of floats
104
- 'Groups_SE': List of floats
105
- 'Samples': List of input values by groups
87
+ 'p-value' : String
88
+ 'Significance(p<0.05)' : Boolean
89
+ 'Stars_Printed' : String
90
+ 'Test_Name' : String
91
+ 'Groups_Compared' : Integer
92
+ 'Population_Mean' : Float (taken from the input)
93
+ 'Data_Normaly_Distributed' : Boolean
94
+ 'Parametric_Test_Applied' : Boolean
95
+ 'Paired_Test_Applied' : Boolean
96
+ 'Tails' : Integer (taken from the input)
97
+ 'p-value_exact' : Float
98
+ 'Stars' : Integer
99
+ 'Warnings' : String
100
+ 'Groups_N' : List of integers
101
+ 'Groups_Median' : List of floats
102
+ 'Groups_Mean' : List of floats
103
+ 'Groups_SD' : List of floats
104
+ 'Groups_SE' : List of floats
105
+ 'Samples' : List of input values by groups
106
106
  (taken from the input)
107
+ 'Posthoc_Matrix' : 2D List of floats
108
+ 'Posthoc_Matrix_bool' : 2D List of Boolean
109
+ 'Posthoc_Matrix_printed': 2D List of String
110
+ 'Posthoc_Matrix_stars': 2D List of String
107
111
  }
108
112
  ```
109
113
  If errors occured, *GetResult()* returns an empty dictionary
@@ -118,7 +122,7 @@ If errors occured, *GetResult()* returns an empty dictionary
118
122
 
119
123
  ### TODO:
120
124
 
121
- -- Kruskal-Wallis test - add Dunn's multiple comparisons
125
+ -- Anova: posthocs
122
126
  -- Anova: add 2-way anova and 3-way anova
123
127
  -- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
124
128
  -- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
@@ -131,10 +135,11 @@ If errors occured, *GetResult()* returns an empty dictionary
131
135
  -- add QQ plot
132
136
  -- n-sample tests: add onetail option
133
137
 
134
- ✅ done -- detailed normality test results
138
+ ✅ done -- detailed normality test results
139
+ ✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
135
140
 
136
141
 
137
- checked tests:
142
+ tests check:
138
143
  1-sample:
139
144
  --Wilcoxon 2,1 tails - ok
140
145
  --t-tests 2,1 tails -ok
@@ -146,6 +151,7 @@ checked tests:
146
151
 
147
152
  n-sample:
148
153
  --Kruskal-Wallis 2 tail - ok
154
+ --Dunn's multiple comparisons - ??
149
155
  --Friedman 2 tail - ok
150
156
  --one-way ANOWA 2 tail - ok
151
157
 
@@ -1,5 +1,5 @@
1
1
  [build-system]
2
- requires = ["setuptools>=62.6", "numpy", "scipy", "statsmodels"]
2
+ requires = ["setuptools>=62.6", "numpy", "scipy", "statsmodels", "scikit-posthocs"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Programming Language :: Python :: 3.12",
22
22
  "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)",
23
23
  "Operating System :: OS Independent",
24
- "Development Status :: 2 - Pre-Alpha",
24
+ "Development Status :: 4 - Beta",
25
25
  "Intended Audience :: Developers",
26
26
  "Intended Audience :: Science/Research",
27
27
  "Natural Language :: English",
@@ -32,8 +32,9 @@ classifiers = [
32
32
  ]
33
33
 
34
34
  [project.urls]
35
- Homepage = "https://github.com/konung-yaropolk/NPL"
36
- Issues = "https://github.com/konung-yaropolk/NPL/issues"
35
+ Homepage = "https://github.com/konung-yaropolk/AutoStatLib"
36
+ Repository = "https://github.com/konung-yaropolk/AutoStatLib.git"
37
+ Issues = "https://github.com/konung-yaropolk/AutoStatLib/issues"
37
38
 
38
39
  [tool.setuptools.dynamic]
39
40
  version = {attr = "AutoStatLib.__version__"}
@@ -1,4 +1,5 @@
1
1
  numpy
2
2
  scipy
3
3
  statsmodels
4
- pandas
4
+ scikit-posthocs
5
+ pandas
@@ -0,0 +1,254 @@
1
+ from AutoStatLib.statistical_tests import StatisticalTests
2
+ from AutoStatLib.normality_tests import NormalityTests
3
+ from AutoStatLib.helpers import Helpers
4
+ from AutoStatLib.text_formatting import TextFormatting
5
+
6
+
7
+ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Helpers):
8
+ '''
9
+ The main class
10
+ *documentation placeholder*
11
+
12
+ '''
13
+
14
+ def __init__(self,
15
+ groups_list,
16
+ paired=False,
17
+ tails=2,
18
+ popmean=None,
19
+ posthoc=True,
20
+ verbose=True):
21
+ self.results = None
22
+ self.error = False
23
+ self.groups_list = groups_list
24
+ self.paired = paired
25
+ self.tails = tails
26
+ self.popmean = popmean
27
+ self.posthoc = posthoc
28
+ self.verbose = verbose
29
+ self.n_groups = len(self.groups_list)
30
+ self.warning_flag_non_numeric_data = False
31
+ self.summary = ''
32
+
33
+ # test IDs classification:
34
+ self.test_ids_all = [ # in aplhabetical order
35
+ 'anova_1w_ordinary',
36
+ 'anova_1w_rm',
37
+ 'friedman',
38
+ 'kruskal_wallis',
39
+ 'mann_whitney',
40
+ 't_test_independent',
41
+ 't_test_paired',
42
+ 't_test_single_sample',
43
+ 'wilcoxon',
44
+ 'wilcoxon_single_sample',
45
+ ]
46
+ self.test_ids_parametric = [
47
+ 'anova_1w_ordinary',
48
+ 'anova_1w_rm'
49
+ 't_test_independent',
50
+ 't_test_paired',
51
+ 't_test_single_sample',
52
+ ]
53
+ self.test_ids_dependent = [
54
+ 'anova_1w_rm',
55
+ 'friedman',
56
+ 't_test_paired',
57
+ 'wilcoxon',
58
+ ]
59
+ self.test_ids_3sample = [
60
+ 'anova_1w_ordinary',
61
+ 'anova_1w_rm',
62
+ 'friedman',
63
+ 'kruskal_wallis',
64
+ ]
65
+ self.test_ids_2sample = [
66
+ 'mann_whitney',
67
+ 't_test_independent',
68
+ 't_test_paired',
69
+ 'wilcoxon',
70
+ ]
71
+ self.test_ids_1sample = [
72
+ 't_test_single_sample',
73
+ 'wilcoxon_single_sample',
74
+ ]
75
+ self.warning_ids_all = {
76
+ # 'not-numeric': '\nWarning: Non-numeric data was found in input and ignored.\n Make sure the input data is correct to get the correct results\n',
77
+ 'param_test_with_non-normal_data': '\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n The results might be skewed. \n Please, run non-parametric test or preform automatic test selection.\n',
78
+ 'non-param_test_with_normal_data': '\nWarning: Non-Parametric test was manualy chosen for Normaly distributed data.\n The results might be skewed. \n Please, run parametric test or preform automatic test selection.\n',
79
+ 'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
80
+ }
81
+
82
+ def run_test(self, test='auto'):
83
+
84
+ # reset values from previous tests
85
+ self.results = None
86
+ self.error = False
87
+ self.warnings = []
88
+ self.normals = []
89
+ self.test_name = None
90
+ self.test_id = None
91
+ self.test_stat = None
92
+ self.p_value = None
93
+ self.posthoc_matrix_df = None
94
+ self.posthoc_matrix = []
95
+ self.posthoc_name = None
96
+
97
+ self.log('\n' + '-'*67)
98
+ self.log('Statistical analysis __init__iated for data in {} groups\n'.format(
99
+ len(self.groups_list)))
100
+
101
+ # adjusting input data type
102
+ self.data = self.floatify_recursive(self.groups_list)
103
+ if self.warning_flag_non_numeric_data:
104
+ self.log(
105
+ 'Text or other non-numeric data in the input was ignored:')
106
+
107
+ # delete the empty cols from input
108
+ self.data = [col for col in self.data if any(
109
+ x is not None for x in col)]
110
+
111
+ # User input assertion block
112
+ try:
113
+ assert self.data, 'There is no input data'
114
+ assert self.tails in [1, 2], 'Tails parameter can be 1 or 2 only'
115
+ assert test in self.test_ids_all or test == 'auto', 'Wrong test id choosen, ensure you called correct function'
116
+ assert all(len(
117
+ group) >= 4 for group in self.data), 'Each group must contain at least four values'
118
+ assert not (self.paired == True
119
+ and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired groups must have the same length'
120
+ assert not (test in self.test_ids_dependent
121
+ and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Groups must have the same length for dependent groups test'
122
+ assert not (test in self.test_ids_2sample
123
+ and self.n_groups != 2), f'Only two groups of data must be given for 2-groups tests, got {self.n_groups}'
124
+ assert not (test in self.test_ids_1sample
125
+ and self.n_groups > 1), f'Only one group of data must be given for single-group tests, got {self.n_groups}'
126
+ assert not (test in self.test_ids_3sample
127
+ and self.n_groups < 3), f'At least three groups of data must be given for multi-groups tests, got {self.n_groups}'
128
+ except AssertionError as error:
129
+ self.log('\nTest :', test)
130
+ self.log('Error :', error)
131
+ self.log('-'*67 + '\n')
132
+ self.error = True
133
+ print(self.summary)
134
+ return
135
+
136
+ # Print the data
137
+ self.print_groups()
138
+
139
+ # Normality tests
140
+ self.log(
141
+ '\n\nThe group is assumed to be normally distributed if at least one')
142
+ self.log(
143
+ 'normality test result is positive. Normality checked by tests:')
144
+ self.log('Shapiro-Wilk, Lilliefors, Anderson-Darling, D\'Agostino-Pearson')
145
+ self.log(
146
+ '[+] -positive, [-] -negative, [ ] -too small group for the test\n')
147
+ self.log(' Test : SW LF AD AP ')
148
+ for i, data in enumerate(self.data):
149
+ poll = self.check_normality(data)
150
+ isnormal = any(poll)
151
+ poll_print = tuple(
152
+ '+' if x is True else '-' if x is False else ' ' if x is None else 'e' for x in poll)
153
+ self.normals.append(isnormal)
154
+ self.log(
155
+ f' Group {i+1}: {poll_print[0]} {poll_print[1]} {poll_print[2]} {poll_print[3]} so disrtibution seems {"normal" if isnormal else "not normal"}')
156
+ self.parametric = all(self.normals)
157
+
158
+ # print test choosen
159
+ self.log('\n\nInput:\n')
160
+ self.log('Data Normaly Distributed: ', self.parametric)
161
+ self.log('Paired Groups: ', self.paired)
162
+ self.log('Groups: ', self.n_groups)
163
+ self.log('Test chosen by user: ', test)
164
+
165
+ # Wrong test Warnings
166
+ if not test == 'auto' and not self.parametric and test in self.test_ids_parametric:
167
+ self.AddWarning('param_test_with_non-normal_data')
168
+ if not test == 'auto' and self.parametric and not test in self.test_ids_parametric:
169
+ self.AddWarning('non-param_test_with_normal_data')
170
+
171
+ # run the test
172
+
173
+ if test in self.test_ids_all:
174
+ self.run_test_by_id(test)
175
+ else:
176
+ self.run_test_auto()
177
+
178
+ # print the results
179
+ self.results = self.create_results_dict()
180
+ self.print_results()
181
+ self.log(
182
+ '\n\nResults above are accessible as a dictionary via GetResult() method')
183
+ self.log('-'*67 + '\n')
184
+
185
+ # print the results to console:
186
+ if self.verbose == True:
187
+ print(self.summary)
188
+
189
+ # public methods:
190
+
191
+ def RunAuto(self):
192
+ self.run_test(test='auto')
193
+
194
+ def RunManual(self, test):
195
+ self.run_test(test)
196
+
197
+ def RunOnewayAnova(self):
198
+ self.run_test(test='anova_1w_ordinary')
199
+
200
+ def RunOnewayAnovaRM(self):
201
+ self.run_test(test='anova_1w_rm')
202
+
203
+ def RunFriedman(self):
204
+ self.run_test(test='friedman')
205
+
206
+ def RunKruskalWallis(self):
207
+ self.run_test(test='kruskal_wallis')
208
+
209
+ def RunMannWhitney(self):
210
+ self.run_test(test='mann_whitney')
211
+
212
+ def RunTtest(self):
213
+ self.run_test(test='t_test_independent')
214
+
215
+ def RunTtestPaired(self):
216
+ self.run_test(test='t_test_paired')
217
+
218
+ def RunTtestSingleSample(self):
219
+ self.run_test(test='t_test_single_sample')
220
+
221
+ def RunWilcoxonSingleSample(self):
222
+ self.run_test(test='wilcoxon_single_sample')
223
+
224
+ def RunWilcoxon(self):
225
+ self.run_test(test='wilcoxon')
226
+
227
+ def GetResult(self):
228
+ if not self.results and not self.error:
229
+ print('No test chosen, no results to output')
230
+ # self.run_test(test='auto')
231
+ return self.results
232
+ if not self.results and self.error:
233
+ print('Error occured, no results to output')
234
+ return {}
235
+ else:
236
+ return self.results
237
+
238
+ def GetSummary(self):
239
+ if not self.results and not self.error:
240
+ print('No test chosen, no summary to output')
241
+ # self.run_test(test='auto')
242
+ return self.summary
243
+ else:
244
+ return self.summary
245
+
246
+ def GetTestIDs(self):
247
+ return self.test_ids_all
248
+
249
+ def PrintSummary(self):
250
+ print(self.summary)
251
+
252
+
253
+ if __name__ == '__main__':
254
+ print('This package works as an imported module only.\nUse "import autostatlib" statement')
@@ -1,2 +1,2 @@
1
1
  # AutoStatLib package version:
2
- __version__ = "0.2.2"
2
+ __version__ = "0.2.6"
@@ -0,0 +1,80 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+
5
+ class Helpers():
6
+
7
+ def matrix_to_dataframe(self, matrix):
8
+ data = []
9
+ cols = []
10
+ rows = []
11
+
12
+ order_number = 1
13
+ for i, row in enumerate(matrix):
14
+ for j, value in enumerate(row):
15
+ data.append(value)
16
+ cols.append(i)
17
+ rows.append(j)
18
+ order_number += 1
19
+
20
+ df = pd.DataFrame(
21
+ {'Row': rows, 'Col': cols, 'Value': data})
22
+ return df
23
+
24
+ def floatify_recursive(self, data):
25
+ if isinstance(data, list):
26
+ # Recursively process sublists and filter out None values
27
+ processed_list = [self.floatify_recursive(item) for item in data]
28
+ return [item for item in processed_list if item is not None]
29
+ else:
30
+ try:
31
+ # Try to convert the item to float
32
+ return np.float64(data)
33
+ except (ValueError, TypeError):
34
+ # If conversion fails, replace with None
35
+ self.warning_flag_non_numeric_data = True
36
+ return None
37
+
38
+ def create_results_dict(self) -> dict:
39
+
40
+ self.stars_int = self.make_stars(self.p_value.item())
41
+ self.stars_str = self.make_stars_printed(self.stars_int)
42
+
43
+ return {
44
+ 'p-value': self.make_p_value_printed(self.p_value.item()),
45
+ 'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
46
+ 'Stars_Printed': self.stars_str,
47
+ 'Test_Name': self.test_name,
48
+ 'Groups_Compared': self.n_groups,
49
+ 'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
50
+ 'Data_Normaly_Distributed': self.parametric,
51
+ 'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
52
+ 'Paired_Test_Applied': self.paired,
53
+ 'Tails': self.tails,
54
+ 'p-value_exact': self.p_value.item(),
55
+ 'Stars': self.stars_int,
56
+ # 'Stat_Value': self.test_stat.item(),
57
+ 'Warnings': self.warnings,
58
+ 'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
59
+ 'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
60
+ 'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
61
+ 'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
62
+ 'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
63
+ # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
64
+ 'Samples': self.data,
65
+ 'Posthoc_Tests_Name': self.posthoc_name if self.posthoc_name is not None else '',
66
+ 'Posthoc_Matrix': self.posthoc_matrix if self.posthoc_matrix else [],
67
+ 'Posthoc_Matrix_bool': [[bool(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
68
+ 'Posthoc_Matrix_printed': [[self.make_p_value_printed(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
69
+ 'Posthoc_Matrix_stars': [[self.make_stars_printed(self.make_stars(element)) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
70
+ }
71
+
72
+ def log(self, *args, **kwargs):
73
+ message = ' '.join(map(str, args))
74
+ # print(message, **kwargs)
75
+ self.summary += '\n' + message
76
+
77
+ def AddWarning(self, warning_id):
78
+ message = self.warning_ids_all[warning_id]
79
+ self.log(message)
80
+ self.warnings.append(message)
@@ -0,0 +1,83 @@
1
+ from statsmodels.stats.diagnostic import lilliefors
2
+ from scipy.stats import shapiro, normaltest, anderson
3
+
4
+
5
+ class NormalityTests():
6
+ '''
7
+ Normality tests mixin
8
+
9
+ see the article about minimal sample size for tests:
10
+ Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
11
+ Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
12
+ '''
13
+
14
+ def check_normality(self, data):
15
+ sw = None
16
+ lf = None
17
+ ad = None
18
+ ap = None
19
+ n = len(data)
20
+
21
+ # Shapiro-Wilk test
22
+ sw_stat, sw_p_value = shapiro(data)
23
+ if sw_p_value and sw_p_value > 0.05:
24
+ sw = True
25
+ else:
26
+ sw = False
27
+
28
+ # Lilliefors test
29
+ lf_stat, lf_p_value = lilliefors(
30
+ data, dist='norm')
31
+ if lf_p_value and lf_p_value > 0.05:
32
+ lf = True
33
+ else:
34
+ lf = False
35
+
36
+ # Anderson-Darling test
37
+ if n >= 20:
38
+ ad_stat, ad_p_value = self.anderson_get_p(
39
+ data, dist='norm')
40
+ if ad_p_value and ad_p_value > 0.05:
41
+ ad = True
42
+ else:
43
+ ad = False
44
+
45
+ # D'Agostino-Pearson test
46
+ # test result is skewed if n<20
47
+ if n >= 20:
48
+ ap_stat, ap_p_value = normaltest(data)
49
+ if ap_p_value and ap_p_value > 0.05:
50
+ ap = True
51
+ else:
52
+ ap = False
53
+
54
+ return (sw, lf, ad, ap)
55
+
56
+ def anderson_get_p(self, data, dist='norm'):
57
+ '''
58
+ calculating p-value for Anderson-Darling test using the method described here:
59
+ Computation of Probability Associated with Anderson-Darling Statistic
60
+ Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
61
+
62
+ '''
63
+ e = 2.718281828459045
64
+ n = len(data)
65
+
66
+ ad, critical_values, significance_levels = anderson(
67
+ data, dist=dist)
68
+
69
+ # adjust ad_stat for small sample sizes:
70
+ s = ad*(1 + 0.75/n + 2.25/(n**2))
71
+
72
+ if s >= 0.6:
73
+ p = e**(1.2937 - 5.709*s + 0.0186*s**2)
74
+ elif s > 0.34:
75
+ p = e**(0.9177 - 4.279*s - 1.38*s**2)
76
+ elif s > 0.2:
77
+ p = 1 - e**(-8.318 + 42.796*s - 59.938*s**2)
78
+ elif s <= 0.2:
79
+ p = 1 - e**(-13.436 + 101.14*s - 223.73*s**2)
80
+ else:
81
+ p = None
82
+
83
+ return ad, p