AutoStatLib 0.2.2__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AutoStatLib might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AutoStatLib
3
- Version: 0.2.2
3
+ Version: 0.2.5
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -531,6 +531,7 @@ License-File: LICENSE
531
531
  Requires-Dist: numpy
532
532
  Requires-Dist: scipy
533
533
  Requires-Dist: statsmodels
534
+ Requires-Dist: scikit-posthocs
534
535
  Requires-Dist: pandas
535
536
 
536
537
  # AutoStatLib - python library for automated statistical analysis
@@ -653,7 +654,7 @@ If errors occured, *GetResult()* returns an empty dictionary
653
654
 
654
655
  ### TODO:
655
656
 
656
- -- Kruskal-Wallis test - add Dunn's multiple comparisons
657
+ -- Anova: posthocs
657
658
  -- Anova: add 2-way anova and 3-way anova
658
659
  -- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
659
660
  -- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
@@ -666,10 +667,11 @@ If errors occured, *GetResult()* returns an empty dictionary
666
667
  -- add QQ plot
667
668
  -- n-sample tests: add onetail option
668
669
 
669
- ✅ done -- detailed normality test results
670
+ ✅ done -- detailed normality test results
671
+ ✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
670
672
 
671
673
 
672
- checked tests:
674
+ tests check:
673
675
  1-sample:
674
676
  --Wilcoxon 2,1 tails - ok
675
677
  --t-tests 2,1 tails -ok
@@ -681,6 +683,7 @@ checked tests:
681
683
 
682
684
  n-sample:
683
685
  --Kruskal-Wallis 2 tail - ok
686
+ --Dunn's multiple comparisons - ??
684
687
  --Friedman 2 tail - ok
685
688
  --one-way ANOWA 2 tail - ok
686
689
 
@@ -118,7 +118,7 @@ If errors occured, *GetResult()* returns an empty dictionary
118
118
 
119
119
  ### TODO:
120
120
 
121
- -- Kruskal-Wallis test - add Dunn's multiple comparisons
121
+ -- Anova: posthocs
122
122
  -- Anova: add 2-way anova and 3-way anova
123
123
  -- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
124
124
  -- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
@@ -131,10 +131,11 @@ If errors occured, *GetResult()* returns an empty dictionary
131
131
  -- add QQ plot
132
132
  -- n-sample tests: add onetail option
133
133
 
134
- ✅ done -- detailed normality test results
134
+ ✅ done -- detailed normality test results
135
+ ✅ done -- added posthoc: Kruskal-Wallis Dunn's multiple comparisons
135
136
 
136
137
 
137
- checked tests:
138
+ tests check:
138
139
  1-sample:
139
140
  --Wilcoxon 2,1 tails - ok
140
141
  --t-tests 2,1 tails -ok
@@ -146,6 +147,7 @@ checked tests:
146
147
 
147
148
  n-sample:
148
149
  --Kruskal-Wallis 2 tail - ok
150
+ --Dunn's multiple comparisons - ??
149
151
  --Friedman 2 tail - ok
150
152
  --one-way ANOWA 2 tail - ok
151
153
 
@@ -1,5 +1,5 @@
1
1
  [build-system]
2
- requires = ["setuptools>=62.6", "numpy", "scipy", "statsmodels"]
2
+ requires = ["setuptools>=62.6", "numpy", "scipy", "statsmodels", "scikit-posthocs"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
@@ -1,4 +1,5 @@
1
1
  numpy
2
2
  scipy
3
3
  statsmodels
4
- pandas
4
+ scikit-posthocs
5
+ pandas
@@ -0,0 +1,255 @@
1
+ from AutoStatLib.statistical_tests import StatisticalTests
2
+ from AutoStatLib.normality_tests import NormalityTests
3
+ from AutoStatLib.helpers import Helpers
4
+ from AutoStatLib.text_formatting import TextFormatting
5
+
6
+
7
+ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Helpers):
8
+ '''
9
+ The main class
10
+ *documentation placeholder*
11
+
12
+ '''
13
+
14
+ def __init__(self,
15
+ groups_list,
16
+ paired=False,
17
+ tails=2,
18
+ popmean=None,
19
+ posthoc=False,
20
+ verbose=True):
21
+ self.results = None
22
+ self.error = False
23
+ self.groups_list = groups_list
24
+ self.paired = paired
25
+ self.tails = tails
26
+ self.popmean = popmean
27
+ self.posthoc = posthoc
28
+ self.verbose = verbose
29
+ self.n_groups = len(self.groups_list)
30
+ self.warning_flag_non_numeric_data = False
31
+ self.summary = ''
32
+
33
+ # test IDs classification:
34
+ self.test_ids_all = [ # in aplhabetical order
35
+ 'anova_1w_ordinary',
36
+ 'anova_1w_rm',
37
+ 'friedman',
38
+ 'kruskal_wallis',
39
+ 'mann_whitney',
40
+ 't_test_independent',
41
+ 't_test_paired',
42
+ 't_test_single_sample',
43
+ 'wilcoxon',
44
+ 'wilcoxon_single_sample',
45
+ ]
46
+ self.test_ids_parametric = [
47
+ 'anova_1w_ordinary',
48
+ 'anova_1w_rm'
49
+ 't_test_independent',
50
+ 't_test_paired',
51
+ 't_test_single_sample',
52
+ ]
53
+ self.test_ids_dependent = [
54
+ 'anova_1w_rm',
55
+ 'friedman',
56
+ 't_test_paired',
57
+ 'wilcoxon',
58
+ ]
59
+ self.test_ids_3sample = [
60
+ 'anova_1w_ordinary',
61
+ 'anova_1w_rm',
62
+ 'friedman',
63
+ 'kruskal_wallis',
64
+ ]
65
+ self.test_ids_2sample = [
66
+ 'mann_whitney',
67
+ 't_test_independent',
68
+ 't_test_paired',
69
+ 'wilcoxon',
70
+ ]
71
+ self.test_ids_1sample = [
72
+ 't_test_single_sample',
73
+ 'wilcoxon_single_sample',
74
+ ]
75
+ self.warning_ids_all = {
76
+ # 'not-numeric': '\nWarning: Non-numeric data was found in input and ignored.\n Make sure the input data is correct to get the correct results\n',
77
+ 'param_test_with_non-normal_data': '\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n The results might be skewed. \n Please, run non-parametric test or preform automatic test selection.\n',
78
+ 'non-param_test_with_normal_data': '\nWarning: Non-Parametric test was manualy chosen for Normaly distributed data.\n The results might be skewed. \n Please, run parametric test or preform automatic test selection.\n',
79
+ 'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
80
+ }
81
+
82
+ def run_test(self, test='auto'):
83
+
84
+ # reset values from previous tests
85
+ self.results = None
86
+ self.error = False
87
+ self.warnings = []
88
+ self.normals = []
89
+ self.test_name = None
90
+ self.test_id = None
91
+ self.test_stat = None
92
+ self.p_value = None
93
+ self.posthoc_matrix_df = None
94
+ self.posthoc_matrix = []
95
+
96
+ self.log('\n' + '-'*67)
97
+ self.log('Statistical analysis __init__iated for data in {} groups\n'.format(
98
+ len(self.groups_list)))
99
+
100
+ # adjusting input data type
101
+ self.data = self.floatify_recursive(self.groups_list)
102
+ if self.warning_flag_non_numeric_data:
103
+ self.log(
104
+ 'Text or other non-numeric data in the input was ignored:')
105
+
106
+ # delete the empty cols from input
107
+ self.data = [col for col in self.data if any(
108
+ x is not None for x in col)]
109
+
110
+ # User input assertion block
111
+ try:
112
+ assert self.data, 'There is no input data'
113
+ assert self.tails in [1, 2], 'Tails parameter can be 1 or 2 only'
114
+ assert test in self.test_ids_all or test == 'auto', 'Wrong test id choosen, ensure you called correct function'
115
+ assert all(len(
116
+ group) >= 4 for group in self.data), 'Each group must contain at least four values'
117
+ assert not (self.paired == True
118
+ and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired groups must have the same length'
119
+ assert not (test in self.test_ids_dependent
120
+ and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Groups must have the same length for dependent groups test'
121
+ assert not (test in self.test_ids_2sample
122
+ and self.n_groups != 2), f'Only two groups of data must be given for 2-groups tests, got {self.n_groups}'
123
+ assert not (test in self.test_ids_1sample
124
+ and self.n_groups > 1), f'Only one group of data must be given for single-group tests, got {self.n_groups}'
125
+ assert not (test in self.test_ids_3sample
126
+ and self.n_groups < 3), f'At least three groups of data must be given for multi-groups tests, got {self.n_groups}'
127
+ except AssertionError as error:
128
+ self.log('\nTest :', test)
129
+ self.log('Error :', error)
130
+ self.log('-'*67 + '\n')
131
+ self.error = True
132
+ print(self.summary)
133
+ return
134
+
135
+ # Print the data
136
+ self.print_groups()
137
+
138
+ # Normality tests
139
+ self.log(
140
+ '\n\nThe group is assumed to be normally distributed if at least one')
141
+ self.log(
142
+ 'normality test result is positive. Normality checked by tests:')
143
+ self.log('Shapiro-Wilk, Lilliefors, Anderson-Darling, D\'Agostino-Pearson')
144
+ self.log(
145
+ '[+] -positive, [-] -negative, [ ] -too small group for the test\n')
146
+ self.log(' Test : SW LF AD AP ')
147
+ for i, data in enumerate(self.data):
148
+ poll = self.check_normality(data)
149
+ isnormal = any(poll)
150
+ poll_print = tuple(
151
+ '+' if x is True else '-' if x is False else ' ' if x is None else 'e' for x in poll)
152
+ self.normals.append(isnormal)
153
+ self.log(
154
+ f' Group {i+1}: {poll_print[0]} {poll_print[1]} {poll_print[2]} {poll_print[3]} so disrtibution seems {"normal" if isnormal else "not normal"}')
155
+ self.parametric = all(self.normals)
156
+
157
+ # print test choosen
158
+ self.log('\n\nInput:\n')
159
+ self.log('Data Normaly Distributed: ', self.parametric)
160
+ self.log('Paired Groups: ', self.paired)
161
+ self.log('Groups: ', self.n_groups)
162
+ self.log('Test chosen by user: ', test)
163
+
164
+ # Wrong test Warnings
165
+ if not test == 'auto' and not self.parametric and test in self.test_ids_parametric:
166
+ self.AddWarning('param_test_with_non-normal_data')
167
+ if not test == 'auto' and self.parametric and not test in self.test_ids_parametric:
168
+ self.AddWarning('non-param_test_with_normal_data')
169
+
170
+ # run the test
171
+
172
+ if test in self.test_ids_all:
173
+ self.run_test_by_id(test)
174
+ else:
175
+ self.run_test_auto()
176
+
177
+
178
+ # print the results
179
+ self.results = self.create_results_dict()
180
+ self.print_results()
181
+ self.log(
182
+ '\n\nResults above are accessible as a dictionary via GetResult() method')
183
+ self.log('-'*67 + '\n')
184
+
185
+ # print the results to console:
186
+ if self.verbose == True:
187
+ print(self.summary)
188
+
189
+
190
+
191
+ # public methods:
192
+ def RunAuto(self):
193
+ self.run_test(test='auto')
194
+
195
+ def RunManual(self, test):
196
+ self.run_test(test)
197
+
198
+ def RunOnewayAnova(self):
199
+ self.run_test(test='anova_1w_ordinary')
200
+
201
+ def RunOnewayAnovaRM(self):
202
+ self.run_test(test='anova_1w_rm')
203
+
204
+ def RunFriedman(self):
205
+ self.run_test(test='friedman')
206
+
207
+ def RunKruskalWallis(self):
208
+ self.run_test(test='kruskal_wallis')
209
+
210
+ def RunMannWhitney(self):
211
+ self.run_test(test='mann_whitney')
212
+
213
+ def RunTtest(self):
214
+ self.run_test(test='t_test_independent')
215
+
216
+ def RunTtestPaired(self):
217
+ self.run_test(test='t_test_paired')
218
+
219
+ def RunTtestSingleSample(self):
220
+ self.run_test(test='t_test_single_sample')
221
+
222
+ def RunWilcoxonSingleSample(self):
223
+ self.run_test(test='wilcoxon_single_sample')
224
+
225
+ def RunWilcoxon(self):
226
+ self.run_test(test='wilcoxon')
227
+
228
+ def GetResult(self):
229
+ if not self.results and not self.error:
230
+ print('No test chosen, no results to output')
231
+ # self.run_test(test='auto')
232
+ return self.results
233
+ if not self.results and self.error:
234
+ print('Error occured, no results to output')
235
+ return {}
236
+ else:
237
+ return self.results
238
+
239
+ def GetSummary(self):
240
+ if not self.results and not self.error:
241
+ print('No test chosen, no summary to output')
242
+ # self.run_test(test='auto')
243
+ return self.summary
244
+ else:
245
+ return self.summary
246
+
247
+ def GetTestIDs(self):
248
+ return self.test_ids_all
249
+
250
+ def PrintSummary(self):
251
+ print(self.summary)
252
+
253
+
254
+ if __name__ == '__main__':
255
+ print('This package works as an imported module only.\nUse "import autostatlib" statement')
@@ -1,2 +1,2 @@
1
1
  # AutoStatLib package version:
2
- __version__ = "0.2.2"
2
+ __version__ = "0.2.5"
@@ -0,0 +1,77 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ class Helpers():
5
+
6
+ def matrix_to_dataframe(self, matrix):
7
+ data = []
8
+ cols = []
9
+ rows = []
10
+
11
+ order_number = 1
12
+ for i, row in enumerate(matrix):
13
+ for j, value in enumerate(row):
14
+ data.append(value)
15
+ cols.append(i)
16
+ rows.append(j)
17
+ order_number += 1
18
+
19
+ df = pd.DataFrame(
20
+ {'Row': rows, 'Col': cols, 'Value': data})
21
+ return df
22
+
23
+ def floatify_recursive(self, data):
24
+ if isinstance(data, list):
25
+ # Recursively process sublists and filter out None values
26
+ processed_list = [self.floatify_recursive(item) for item in data]
27
+ return [item for item in processed_list if item is not None]
28
+ else:
29
+ try:
30
+ # Try to convert the item to float
31
+ return np.float64(data)
32
+ except (ValueError, TypeError):
33
+ # If conversion fails, replace with None
34
+ self.warning_flag_non_numeric_data = True
35
+ return None
36
+
37
+ def create_results_dict(self) -> dict:
38
+
39
+ self.stars_int = self.make_stars(self.p_value.item())
40
+ self.stars_str = self.make_stars_printed(self.stars_int)
41
+
42
+ return {
43
+ 'p-value': self.make_p_value_printed(self.p_value.item()),
44
+ 'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
45
+ 'Stars_Printed': self.stars_str,
46
+ 'Test_Name': self.test_name,
47
+ 'Groups_Compared': self.n_groups,
48
+ 'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
49
+ 'Data_Normaly_Distributed': self.parametric,
50
+ 'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
51
+ 'Paired_Test_Applied': self.paired,
52
+ 'Tails': self.tails,
53
+ 'p-value_exact': self.p_value.item(),
54
+ 'Stars': self.stars_int,
55
+ # 'Stat_Value': self.test_stat.item(),
56
+ 'Warnings': self.warnings,
57
+ 'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
58
+ 'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
59
+ 'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
60
+ 'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
61
+ 'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
62
+ # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
63
+ 'Samples': self.data,
64
+ 'Posthoc_Matrix': self.posthoc_matrix if self.posthoc_matrix else 'N/A',
65
+ 'Posthoc_Matrix_printed': [[self.make_p_value_printed(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else 'N/A',
66
+ 'Posthoc_Matrix_stars': [[self.make_stars_printed(self.make_stars(element)) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else 'N/A',
67
+ }
68
+
69
+ def log(self, *args, **kwargs):
70
+ message = ' '.join(map(str, args))
71
+ # print(message, **kwargs)
72
+ self.summary += '\n' + message
73
+
74
+ def AddWarning(self, warning_id):
75
+ message = self.warning_ids_all[warning_id]
76
+ self.log(message)
77
+ self.warnings.append(message)
@@ -0,0 +1,85 @@
1
+ from statsmodels.stats.diagnostic import lilliefors
2
+ from scipy.stats import shapiro, normaltest, anderson
3
+
4
+
5
+ class NormalityTests():
6
+ '''
7
+ Normality tests mixin
8
+
9
+ see the article about minimal sample size for tests:
10
+ Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
11
+ Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
12
+ '''
13
+
14
+ def check_normality(self, data):
15
+ sw = None
16
+ lf = None
17
+ ad = None
18
+ ap = None
19
+ n = len(data)
20
+
21
+ # Shapiro-Wilk test
22
+ sw_stat, sw_p_value = shapiro(data)
23
+ if sw_p_value > 0.05:
24
+ sw = True
25
+ else:
26
+ sw = False
27
+
28
+ # Lilliefors test
29
+ lf_stat, lf_p_value = lilliefors(
30
+ data, dist='norm')
31
+ if lf_p_value > 0.05:
32
+ lf = True
33
+ else:
34
+ lf = False
35
+
36
+ # Anderson-Darling test
37
+ if n >= 20:
38
+ ad_stat, ad_p_value = self.anderson_get_p(
39
+ data, dist='norm')
40
+ if ad_p_value > 0.05:
41
+ ad = True
42
+ else:
43
+ ad = False
44
+
45
+ # D'Agostino-Pearson test
46
+ # test result is skewed if n<20
47
+ if n >= 20:
48
+ ap_stat, ap_p_value = normaltest(data)
49
+ if ap_p_value > 0.05:
50
+ ap = True
51
+ else:
52
+ ap = False
53
+
54
+ # print(ap_p_value, ad_p_value, sw_p_value, lf_p_value)
55
+
56
+ return (sw, lf, ad, ap)
57
+
58
+ def anderson_get_p(self, data, dist='norm'):
59
+ '''
60
+ calculating p-value for Anderson-Darling test using the method described here:
61
+ Computation of Probability Associated with Anderson-Darling Statistic
62
+ Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
63
+
64
+ '''
65
+ e = 2.718281828459045
66
+ n = len(data)
67
+
68
+ ad, critical_values, significance_levels = anderson(
69
+ data, dist=dist)
70
+
71
+ # adjust ad_stat for small sample sizes:
72
+ s = ad*(1 + 0.75/n + 2.25/(n**2))
73
+
74
+ if s >= 0.6:
75
+ p = e**(1.2937 - 5.709*s + 0.0186*s**2)
76
+ elif s > 0.34:
77
+ p = e**(0.9177 - 4.279*s - 1.38*s**2)
78
+ elif s > 0.2:
79
+ p = 1 - e**(-8.318 + 42.796*s - 59.938*s**2)
80
+ elif s <= 0.2:
81
+ p = 1 - e**(-13.436 + 101.14*s - 223.73*s**2)
82
+ else:
83
+ p = None
84
+
85
+ return ad, p
@@ -0,0 +1,173 @@
1
+ import numpy as np
2
+ import scikit_posthocs as sp
3
+ from statsmodels.stats.anova import AnovaRM
4
+ from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare
5
+
6
+
7
+
8
+ class StatisticalTests():
9
+ '''
10
+ Statistical tests mixin
11
+ '''
12
+
13
+ def run_test_auto(self):
14
+
15
+ if self.n_groups == 1:
16
+ if self.parametric:
17
+ self.run_test_by_id('t_test_single_sample')
18
+ else:
19
+ self.run_test_by_id('wilcoxon_single_sample')
20
+
21
+ elif self.n_groups == 2:
22
+ if self.paired:
23
+ if self.parametric:
24
+ self.run_test_by_id('t_test_paired')
25
+ else:
26
+ self.run_test_by_id('wilcoxon')
27
+ else:
28
+ if self.parametric:
29
+ self.run_test_by_id('t_test_independent')
30
+ else:
31
+ self.run_test_by_id('mann_whitney')
32
+
33
+ elif self.n_groups >= 3:
34
+ if self.paired:
35
+ if self.parametric:
36
+ self.run_test_by_id('anova_1w_rm')
37
+ else:
38
+ self.run_test_by_id('friedman')
39
+ else:
40
+ if self.parametric:
41
+ self.run_test_by_id('anova_1w_ordinary')
42
+ else:
43
+ self.run_test_by_id('kruskal_wallis')
44
+
45
+ else:
46
+ pass
47
+
48
+ def run_test_by_id(self, test_id):
49
+
50
+ test_names_dict = {
51
+ 'anova_1w_ordinary': 'Ordinary One-Way ANOVA',
52
+ 'anova_1w_rm': 'Repeated Measures One-Way ANOVA',
53
+ 'friedman': 'Friedman test',
54
+ 'kruskal_wallis': 'Kruskal-Wallis test',
55
+ 'mann_whitney': 'Mann-Whitney U test',
56
+ 't_test_independent': 't-test for independent samples',
57
+ 't_test_paired': 't-test for paired samples',
58
+ 't_test_single_sample': 'Single-sample t-test',
59
+ 'wilcoxon': 'Wilcoxon signed-rank test',
60
+ 'wilcoxon_single_sample': 'Wilcoxon signed-rank test for single sample',
61
+ }
62
+
63
+ match test_id:
64
+ case 'anova_1w_ordinary': stat, p_value = self.anova_1w_ordinary()
65
+ case 'anova_1w_rm': stat, p_value = self.anova_1w_rm()
66
+ case 'friedman': stat, p_value = self.friedman()
67
+ case 'kruskal_wallis': stat, p_value = self.kruskal_wallis()
68
+ case 'mann_whitney': stat, p_value = self.mann_whitney()
69
+ case 't_test_independent': stat, p_value = self.t_test_independent()
70
+ case 't_test_paired': stat, p_value = self.t_test_paired()
71
+ case 't_test_single_sample': stat, p_value = self.t_test_single_sample()
72
+ case 'wilcoxon': stat, p_value = self.wilcoxon()
73
+ case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
74
+
75
+ if test_id in self.test_ids_dependent:
76
+ self.paired = True
77
+ else:
78
+ self.paired = False
79
+
80
+ self.test_name = test_names_dict[test_id]
81
+ self.test_id = test_id
82
+ self.test_stat = stat
83
+ self.p_value = p_value
84
+
85
+ def anova_1w_ordinary(self):
86
+ stat, p_value = f_oneway(*self.data)
87
+ self.tails = 2
88
+ # if self.tails == 1 and p_value > 0.5:
89
+ # p_value /= 2
90
+ # if self.tails == 1:
91
+ # p_value /= 2
92
+ return stat, p_value
93
+
94
+ def anova_1w_rm(self):
95
+ """
96
+ Perform repeated measures one-way ANOVA test.
97
+
98
+ Parameters:
99
+ data: list of lists, where each sublist represents repeated measures for a subject
100
+ """
101
+
102
+ df = self.matrix_to_dataframe(self.data)
103
+ res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
104
+ stat = res.anova_table['F Value'][0]
105
+ p_value = res.anova_table['Pr > F'][0]
106
+
107
+ self.tails = 2
108
+ return stat, p_value
109
+
110
+ def friedman(self):
111
+ stat, p_value = friedmanchisquare(*self.data)
112
+ self.tails = 2
113
+ return stat, p_value
114
+
115
+ def kruskal_wallis(self):
116
+ stat, p_value = kruskal(*self.data)
117
+
118
+ # Perform Dunn's multiple comparisons if Kruskal-Wallis is significant
119
+ if p_value < 0.05 and self.posthoc:
120
+ self.posthoc_matrix = sp.posthoc_dunn(self.data, p_adjust='bonferroni').values.tolist()
121
+ return stat, p_value
122
+
123
+ def mann_whitney(self):
124
+ stat, p_value = mannwhitneyu(
125
+ self.data[0], self.data[1], alternative='two-sided')
126
+ if self.tails == 1:
127
+ p_value /= 2
128
+ # alternative method of one-tailed calculation
129
+ # gives the same result:
130
+ # stat, p_value = mannwhitneyu(
131
+ # self.data[0], self.data[1], alternative='two-sided' if self.tails == 2 else 'less')
132
+ # if self.tails == 1 and p_value > 0.5:
133
+ # p_value = 1-p_value
134
+ return stat, p_value
135
+
136
+ def t_test_independent(self):
137
+ stat, p_value = ttest_ind(
138
+ self.data[0], self.data[1])
139
+ if self.tails == 1:
140
+ p_value /= 2
141
+ return stat, p_value
142
+
143
+ def t_test_paired(self):
144
+ stat, p_value = ttest_rel(
145
+ self.data[0], self.data[1])
146
+ if self.tails == 1:
147
+ p_value /= 2
148
+ return stat, p_value
149
+
150
+ def t_test_single_sample(self):
151
+ if self.popmean == None:
152
+ self.popmean = 0
153
+ self.AddWarning('no_pop_mean_set')
154
+ stat, p_value = ttest_1samp(self.data[0], self.popmean)
155
+ if self.tails == 1:
156
+ p_value /= 2
157
+ return stat, p_value
158
+
159
+ def wilcoxon(self):
160
+ stat, p_value = wilcoxon(self.data[0], self.data[1])
161
+ if self.tails == 1:
162
+ p_value /= 2
163
+ return stat, p_value
164
+
165
+ def wilcoxon_single_sample(self):
166
+ if self.popmean == None:
167
+ self.popmean = 0
168
+ self.AddWarning('no_pop_mean_set')
169
+ data = [i - self.popmean for i in self.data[0]]
170
+ stat, p_value = wilcoxon(data)
171
+ if self.tails == 1:
172
+ p_value /= 2
173
+ return stat, p_value