AutoStatLib 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AutoStatLib might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AutoStatLib
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -531,6 +531,7 @@ License-File: LICENSE
531
531
  Requires-Dist: numpy
532
532
  Requires-Dist: scipy
533
533
  Requires-Dist: statsmodels
534
+ Requires-Dist: pandas
534
535
 
535
536
  # AutoStatLib - python library for automated statistical analysis
536
537
 
@@ -569,7 +570,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
569
570
 
570
571
 
571
572
  # set the parameters:
572
- paired = False # is groups dependend or not
573
+ paired = False # is groups dependent or not
573
574
  tails = 2 # two-tailed or one-tailed result
574
575
  popmean = 0 # population mean - only for single-sample tests needed
575
576
 
@@ -585,7 +586,7 @@ analysis.RunAuto()
585
586
 
586
587
  or you can choose specific tests:
587
588
  ```python
588
- # 2 groups independend:
589
+ # 2 groups independent:
589
590
  analysis.RunTtest()
590
591
  analysis.RunMannWhitney()
591
592
 
@@ -594,10 +595,11 @@ analysis.RunTtestPaired()
594
595
  analysis.RunWilcoxon()
595
596
 
596
597
  # 3 and more independed groups comparison:
597
- analysis.RunAnova()
598
+ analysis.RunOnewayAnova()
598
599
  analysis.RunKruskalWallis()
599
600
 
600
601
  # 3 and more depended groups comparison:
602
+ analysis.RunOnewayAnovaRM()
601
603
  analysis.RunFriedman()
602
604
 
603
605
  # single group tests"
@@ -647,20 +649,40 @@ If errors occured, *GetResult()* returns an empty dictionary
647
649
 
648
650
 
649
651
  ---
650
- ## Pre-Alpha dev status.
651
-
652
- ### TODO:
653
-
654
- --Kruskal-Wallis test - add Dunn's multiple comparisons
655
- --Anova: add 2-way anova and 3-way(?)
656
-
657
- check:
658
- --Wilcoxon signed-rank test and Mann-whitney - check mechanism of one-tailed calc, looks like it works wrong
659
-
660
-
661
- checked tests:
662
- --Wilcoxon 2 tail - ok
663
- --Mann-whitney 2 tail - ok
652
+ ## Pre-Alpha dev status.
653
+
654
+ ### TODO:
655
+
656
+ -- Kruskal-Wallis test - add Dunn's multiple comparisons
657
+ -- Anova: add 2-way anova and 3-way anova
658
+ -- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
659
+ -- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
660
+ -- paired T-test: add ratio-paired t-test (ratios of paired values are consistent)
661
+ -- add Welch test (for norm data unequal variances)
662
+ -- add Kolmogorov-smirnov test (unpaired nonparametric 2 sample, compare cumulative distributions)
663
+ -- add independent t-test with Welch correction (do not assume equal SDs in groups)
664
+ -- add correlation test, correlation diagram
665
+ -- add linear regression, regression diagram
666
+ -- add QQ plot
667
+ -- n-sample tests: add onetail option
668
+
669
+ ✅ done -- detailed normality test results
670
+
671
+
672
+ checked tests:
673
+ 1-sample:
674
+ --Wilcoxon 2,1 tails - ok
675
+ --t-tests 2,1 tails -ok
676
+
677
+ 2-sample:
678
+ --Wilcoxon 2,1 tails - ok
679
+ --Mann-whitney 2,1 tails - ok
680
+ --t-tests 2,1 tails -ok
681
+
682
+ n-sample:
683
+ --Kruskal-Wallis 2 tail - ok
684
+ --Friedman 2 tail - ok
685
+ --one-way ANOWA 2 tail - ok
664
686
 
665
687
 
666
688
 
@@ -35,7 +35,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
35
35
 
36
36
 
37
37
  # set the parameters:
38
- paired = False # is groups dependend or not
38
+ paired = False # is groups dependent or not
39
39
  tails = 2 # two-tailed or one-tailed result
40
40
  popmean = 0 # population mean - only for single-sample tests needed
41
41
 
@@ -51,7 +51,7 @@ analysis.RunAuto()
51
51
 
52
52
  or you can choose specific tests:
53
53
  ```python
54
- # 2 groups independend:
54
+ # 2 groups independent:
55
55
  analysis.RunTtest()
56
56
  analysis.RunMannWhitney()
57
57
 
@@ -60,10 +60,11 @@ analysis.RunTtestPaired()
60
60
  analysis.RunWilcoxon()
61
61
 
62
62
  # 3 and more independed groups comparison:
63
- analysis.RunAnova()
63
+ analysis.RunOnewayAnova()
64
64
  analysis.RunKruskalWallis()
65
65
 
66
66
  # 3 and more depended groups comparison:
67
+ analysis.RunOnewayAnovaRM()
67
68
  analysis.RunFriedman()
68
69
 
69
70
  # single group tests"
@@ -113,20 +114,40 @@ If errors occured, *GetResult()* returns an empty dictionary
113
114
 
114
115
 
115
116
  ---
116
- ## Pre-Alpha dev status.
117
-
118
- ### TODO:
119
-
120
- --Kruskal-Wallis test - add Dunn's multiple comparisons
121
- --Anova: add 2-way anova and 3-way(?)
122
-
123
- check:
124
- --Wilcoxon signed-rank test and Mann-whitney - check mechanism of one-tailed calc, looks like it works wrong
125
-
126
-
127
- checked tests:
128
- --Wilcoxon 2 tail - ok
129
- --Mann-whitney 2 tail - ok
117
+ ## Pre-Alpha dev status.
118
+
119
+ ### TODO:
120
+
121
+ -- Kruskal-Wallis test - add Dunn's multiple comparisons
122
+ -- Anova: add 2-way anova and 3-way anova
123
+ -- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
124
+ -- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
125
+ -- paired T-test: add ratio-paired t-test (ratios of paired values are consistent)
126
+ -- add Welch test (for norm data unequal variances)
127
+ -- add Kolmogorov-smirnov test (unpaired nonparametric 2 sample, compare cumulative distributions)
128
+ -- add independent t-test with Welch correction (do not assume equal SDs in groups)
129
+ -- add correlation test, correlation diagram
130
+ -- add linear regression, regression diagram
131
+ -- add QQ plot
132
+ -- n-sample tests: add onetail option
133
+
134
+ ✅ done -- detailed normality test results
135
+
136
+
137
+ checked tests:
138
+ 1-sample:
139
+ --Wilcoxon 2,1 tails - ok
140
+ --t-tests 2,1 tails -ok
141
+
142
+ 2-sample:
143
+ --Wilcoxon 2,1 tails - ok
144
+ --Mann-whitney 2,1 tails - ok
145
+ --t-tests 2,1 tails -ok
146
+
147
+ n-sample:
148
+ --Kruskal-Wallis 2 tail - ok
149
+ --Friedman 2 tail - ok
150
+ --one-way ANOWA 2 tail - ok
130
151
 
131
152
 
132
153
 
@@ -1,6 +1,8 @@
1
1
  import numpy as np
2
+ import pandas as pd
2
3
  from statsmodels.stats.diagnostic import lilliefors
3
- from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare, shapiro, kstest, anderson, normaltest
4
+ from statsmodels.stats.anova import AnovaRM
5
+ from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare, shapiro, anderson, normaltest
4
6
 
5
7
 
6
8
  class __StatisticalTests():
@@ -8,37 +10,113 @@ class __StatisticalTests():
8
10
  Statistical tests mixin
9
11
  '''
10
12
 
11
- def anova(self):
13
+ def run_test_auto(self):
14
+
15
+ if self.n_groups == 1:
16
+ if self.parametric:
17
+ self.run_test_by_id('t_test_single_sample')
18
+ else:
19
+ self.run_test_by_id('wilcoxon_single_sample')
20
+
21
+ elif self.n_groups == 2:
22
+ if self.paired:
23
+ if self.parametric:
24
+ self.run_test_by_id('t_test_paired')
25
+ else:
26
+ self.run_test_by_id('wilcoxon')
27
+ else:
28
+ if self.parametric:
29
+ self.run_test_by_id('t_test_independent')
30
+ else:
31
+ self.run_test_by_id('mann_whitney')
32
+
33
+ elif self.n_groups >= 3:
34
+ if self.paired:
35
+ if self.parametric:
36
+ self.run_test_by_id('anova_1w_rm')
37
+ else:
38
+ self.run_test_by_id('friedman')
39
+ else:
40
+ if self.parametric:
41
+ self.run_test_by_id('anova_1w_ordinary')
42
+ else:
43
+ self.run_test_by_id('kruskal_wallis')
44
+
45
+ else:
46
+ pass
47
+
48
+ def run_test_by_id(self, test_id):
49
+
50
+ test_names_dict = {
51
+ 'anova_1w_ordinary': 'Ordinary One-Way ANOVA',
52
+ 'anova_1w_rm': 'Repeated Measures One-Way ANOVA',
53
+ 'friedman': 'Friedman test',
54
+ 'kruskal_wallis': 'Kruskal-Wallis test',
55
+ 'mann_whitney': 'Mann-Whitney U test',
56
+ 't_test_independent': 't-test for independent samples',
57
+ 't_test_paired': 't-test for paired samples',
58
+ 't_test_single_sample': 'Single-sample t-test',
59
+ 'wilcoxon': 'Wilcoxon signed-rank test',
60
+ 'wilcoxon_single_sample': 'Wilcoxon signed-rank test for single sample',
61
+ }
62
+
63
+ match test_id:
64
+ case 'anova_1w_ordinary': stat, p_value = self.anova_1w_ordinary()
65
+ case 'anova_1w_rm': stat, p_value = self.anova_1w_rm()
66
+ case 'friedman': stat, p_value = self.friedman()
67
+ case 'kruskal_wallis': stat, p_value = self.kruskal_wallis()
68
+ case 'mann_whitney': stat, p_value = self.mann_whitney()
69
+ case 't_test_independent': stat, p_value = self.t_test_independent()
70
+ case 't_test_paired': stat, p_value = self.t_test_paired()
71
+ case 't_test_single_sample': stat, p_value = self.t_test_single_sample()
72
+ case 'wilcoxon': stat, p_value = self.wilcoxon()
73
+ case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
74
+
75
+ if test_id in self.test_ids_dependent:
76
+ self.paired = True
77
+ else:
78
+ self.paired = False
79
+
80
+ self.test_name = test_names_dict[test_id]
81
+ self.test_id = test_id
82
+ self.test_stat = stat
83
+ self.p_value = p_value
84
+
85
+ def anova_1w_ordinary(self):
12
86
  stat, p_value = f_oneway(*self.data)
13
87
  self.tails = 2
14
88
  # if self.tails == 1 and p_value > 0.5:
15
89
  # p_value /= 2
16
90
  # if self.tails == 1:
17
91
  # p_value /= 2
18
- self.test_name = 'ANOVA'
19
- self.test_id = 'anova'
20
- self.paired = False
21
- self.test_stat = stat
22
- self.p_value = p_value
92
+ return stat, p_value
93
+
94
+ def anova_1w_rm(self):
95
+ """
96
+ Perform repeated measures one-way ANOVA test.
97
+
98
+ Parameters:
99
+ data: list of lists, where each sublist represents repeated measures for a subject
100
+ """
101
+
102
+ df = self.matrix_to_dataframe(self.data)
103
+ res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
104
+ stat = res.anova_table['F Value'][0]
105
+ p_value = res.anova_table['Pr > F'][0]
106
+
107
+ self.tails = 2
108
+ return stat, p_value
23
109
 
24
- def friedman_test(self):
110
+ def friedman(self):
25
111
  stat, p_value = friedmanchisquare(*self.data)
26
112
  self.tails = 2
27
- self.test_name = 'Friedman test'
28
- self.test_id = 'friedman'
29
- self.paired = True
30
- self.test_stat = stat
31
- self.p_value = p_value
113
+ return stat, p_value
32
114
 
33
- def kruskal_wallis_test(self):
115
+ def kruskal_wallis(self):
34
116
  stat, p_value = kruskal(*self.data)
35
- self.test_name = 'Kruskal-Wallis test'
36
- self.test_id = 'kruskal_wallis'
37
- self.paired = False
38
- self.test_stat = stat
39
- self.p_value = p_value
117
+ return stat, p_value
40
118
 
41
- def mann_whitney_u_test(self):
119
+ def mann_whitney(self):
42
120
  stat, p_value = mannwhitneyu(
43
121
  self.data[0], self.data[1], alternative='two-sided')
44
122
  if self.tails == 1:
@@ -49,78 +127,53 @@ class __StatisticalTests():
49
127
  # self.data[0], self.data[1], alternative='two-sided' if self.tails == 2 else 'less')
50
128
  # if self.tails == 1 and p_value > 0.5:
51
129
  # p_value = 1-p_value
130
+ return stat, p_value
52
131
 
53
- self.test_name = 'Mann-Whitney U test'
54
- self.test_id = 'mann_whitney'
55
- self.paired = False
56
- self.test_stat = stat
57
- self.p_value = p_value
58
-
59
- def t_test_independend(self):
60
- t_stat, t_p_value = ttest_ind(
132
+ def t_test_independent(self):
133
+ stat, p_value = ttest_ind(
61
134
  self.data[0], self.data[1])
62
135
  if self.tails == 1:
63
- t_p_value /= 2
64
- self.test_name = 't-test for independend samples'
65
- self.test_id = 't_test_independend'
66
- self.paired = False
67
- self.test_stat = t_stat
68
- self.p_value = t_p_value
136
+ p_value /= 2
137
+ return stat, p_value
69
138
 
70
139
  def t_test_paired(self):
71
- t_stat, t_p_value = ttest_rel(
140
+ stat, p_value = ttest_rel(
72
141
  self.data[0], self.data[1])
73
142
  if self.tails == 1:
74
- t_p_value /= 2
75
- self.test_name = 't-test for paired samples'
76
- self.test_id = 't_test_paired'
77
- self.paired = True
78
- self.test_stat = t_stat
79
- self.p_value = t_p_value
143
+ p_value /= 2
144
+ return stat, p_value
80
145
 
81
146
  def t_test_single_sample(self):
82
147
  if self.popmean == None:
83
148
  self.popmean = 0
84
149
  self.AddWarning('no_pop_mean_set')
85
- t_stat, t_p_value = ttest_1samp(self.data[0], self.popmean)
150
+ stat, p_value = ttest_1samp(self.data[0], self.popmean)
86
151
  if self.tails == 1:
87
- t_p_value /= 2
88
- self.test_name = 'Single-sample t-test'
89
- self.test_id = 't_test_single_sample'
90
- self.paired = False
91
- self.test_stat = t_stat
92
- self.p_value = t_p_value
152
+ p_value /= 2
153
+ return stat, p_value
93
154
 
155
+ def wilcoxon(self):
156
+ stat, p_value = wilcoxon(self.data[0], self.data[1])
157
+ if self.tails == 1:
158
+ p_value /= 2
159
+ return stat, p_value
160
+
94
161
  def wilcoxon_single_sample(self):
95
162
  if self.popmean == None:
96
163
  self.popmean = 0
97
164
  self.AddWarning('no_pop_mean_set')
98
165
  data = [i - self.popmean for i in self.data[0]]
99
- w_stat, p_value = wilcoxon(data)
166
+ stat, p_value = wilcoxon(data)
100
167
  if self.tails == 1:
101
168
  p_value /= 2
102
- self.test_name = 'Wilcoxon signed-rank test for single sample'
103
- self.test_id = 'wilcoxon_single_sample'
104
- self.paired = False
105
- self.test_stat = w_stat
106
- self.p_value = p_value
107
-
108
- def wilcoxon(self):
109
- stat, p_value = wilcoxon(self.data[0], self.data[1])
110
- if self.tails == 1:
111
- p_value /= 2
112
- self.test_name = 'Wilcoxon signed-rank test'
113
- self.test_id = 'wilcoxon'
114
- self.paired = True
115
- self.test_stat = stat
116
- self.p_value = p_value
169
+ return stat, p_value
117
170
 
118
171
 
119
172
  class __NormalityTests():
120
173
  '''
121
174
  Normality tests mixin
122
175
 
123
- see the article about minimum sample size for tests:
176
+ see the article about minimal sample size for tests:
124
177
  Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
125
178
  Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
126
179
  '''
@@ -171,7 +224,7 @@ class __NormalityTests():
171
224
 
172
225
  def anderson_get_p(self, data, dist='norm'):
173
226
  '''
174
- calculating p-value for Anderson-Darling test using the method described here:
227
+ calculating p-value for Anderson-Darling test using the method described here:
175
228
  Computation of Probability Associated with Anderson-Darling Statistic
176
229
  Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
177
230
 
@@ -199,6 +252,65 @@ class __NormalityTests():
199
252
  return ad, p
200
253
 
201
254
 
255
+ class __Helpers():
256
+
257
+ def matrix_to_dataframe(self, matrix):
258
+ data = []
259
+ cols = []
260
+ rows = []
261
+
262
+ order_number = 1
263
+ for i, row in enumerate(matrix):
264
+ for j, value in enumerate(row):
265
+ data.append(value)
266
+ cols.append(i)
267
+ rows.append(j)
268
+ order_number += 1
269
+
270
+ df = pd.DataFrame(
271
+ {'Row': rows, 'Col': cols, 'Value': data})
272
+ return df
273
+
274
+ def create_results_dict(self) -> dict:
275
+
276
+ self.stars_int = self.make_stars()
277
+ self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
278
+
279
+ return {
280
+ 'p-value': self.make_p_value_printed(),
281
+ 'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
282
+ 'Stars_Printed': self.stars_str,
283
+ 'Test_Name': self.test_name,
284
+ 'Groups_Compared': self.n_groups,
285
+ 'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
286
+ 'Data_Normaly_Distributed': self.parametric,
287
+ 'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
288
+ 'Paired_Test_Applied': self.paired,
289
+ 'Tails': self.tails,
290
+ 'p-value_exact': self.p_value.item(),
291
+ 'Stars': self.stars_int,
292
+ # 'Stat_Value': self.test_stat.item(),
293
+ 'Warnings': self.warnings,
294
+ 'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
295
+ 'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
296
+ 'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
297
+ 'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
298
+ 'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
299
+ # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
300
+ 'Samples': self.data,
301
+ }
302
+
303
+ def log(self, *args, **kwargs):
304
+ message = ' '.join(map(str, args))
305
+ # print(message, **kwargs)
306
+ self.summary += '\n' + message
307
+
308
+ def AddWarning(self, warning_id):
309
+ message = self.warning_ids_all[warning_id]
310
+ self.log(message)
311
+ self.warnings.append(message)
312
+
313
+
202
314
  class __TextFormatting():
203
315
  '''
204
316
  Text formatting mixin
@@ -293,45 +405,6 @@ class __TextFormatting():
293
405
  else:
294
406
  self.log(i, ':', ' ' * shift, self.results[i])
295
407
 
296
- def create_results_dict(self) -> dict:
297
-
298
- self.stars_int = self.make_stars()
299
- self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
300
-
301
- return {
302
- 'p-value': self.make_p_value_printed(),
303
- 'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
304
- 'Stars_Printed': self.stars_str,
305
- 'Test_Name': self.test_name,
306
- 'Groups_Compared': self.n_groups,
307
- 'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
308
- 'Data_Normaly_Distributed': self.parametric,
309
- 'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
310
- 'Paired_Test_Applied': self.paired,
311
- 'Tails': self.tails,
312
- 'p-value_exact': self.p_value.item(),
313
- 'Stars': self.stars_int,
314
- # 'Stat_Value': self.test_stat.item(),
315
- 'Warnings': self.warnings,
316
- 'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
317
- 'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
318
- 'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
319
- 'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
320
- 'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
321
- # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
322
- 'Samples': self.data,
323
- }
324
-
325
- def log(self, *args, **kwargs):
326
- message = ' '.join(map(str, args))
327
- # print(message, **kwargs)
328
- self.summary += '\n' + message
329
-
330
- def AddWarning(self, warning_id):
331
- message = self.warning_ids_all[warning_id]
332
- self.log(message)
333
- self.warnings.append(message)
334
-
335
408
 
336
409
  class __InputFormatting():
337
410
  def floatify_recursive(self, data):
@@ -349,7 +422,7 @@ class __InputFormatting():
349
422
  return None
350
423
 
351
424
 
352
- class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting):
425
+ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting, __Helpers):
353
426
  '''
354
427
  The main class
355
428
  *documentation placeholder*
@@ -372,21 +445,49 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
372
445
  self.n_groups = len(self.groups_list)
373
446
  self.warning_flag_non_numeric_data = False
374
447
  self.summary = ''
375
- self.test_ids_parametric = ['anova',
376
- 't_test_independend',
377
- 't_test_paired',
378
- 't_test_single_sample',]
448
+
449
+ # test IDs classification:
379
450
  self.test_ids_all = [ # in aplhabetical order
380
- 'anova',
451
+ 'anova_1w_ordinary',
452
+ 'anova_1w_rm',
381
453
  'friedman',
382
454
  'kruskal_wallis',
383
455
  'mann_whitney',
384
- 't_test_independend',
456
+ 't_test_independent',
385
457
  't_test_paired',
386
458
  't_test_single_sample',
387
459
  'wilcoxon',
388
460
  'wilcoxon_single_sample',
389
461
  ]
462
+ self.test_ids_parametric = [
463
+ 'anova_1w_ordinary',
464
+ 'anova_1w_rm'
465
+ 't_test_independent',
466
+ 't_test_paired',
467
+ 't_test_single_sample',
468
+ ]
469
+ self.test_ids_dependent = [
470
+ 'anova_1w_rm',
471
+ 'friedman',
472
+ 't_test_paired',
473
+ 'wilcoxon',
474
+ ]
475
+ self.test_ids_3sample = [
476
+ 'anova_1w_ordinary',
477
+ 'anova_1w_rm',
478
+ 'friedman',
479
+ 'kruskal_wallis',
480
+ ]
481
+ self.test_ids_2sample = [
482
+ 'mann_whitney',
483
+ 't_test_independent',
484
+ 't_test_paired',
485
+ 'wilcoxon',
486
+ ]
487
+ self.test_ids_1sample = [
488
+ 't_test_single_sample',
489
+ 'wilcoxon_single_sample',
490
+ ]
390
491
  self.warning_ids_all = {
391
492
  # 'not-numeric': '\nWarning: Non-numeric data was found in input and ignored.\n Make sure the input data is correct to get the correct results\n',
392
493
  'param_test_with_non-normal_data': '\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n The results might be skewed. \n Please, run non-parametric test or preform automatic test selection.\n',
@@ -425,28 +526,18 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
425
526
  assert self.data, 'There is no input data'
426
527
  assert self.tails in [1, 2], 'Tails parameter can be 1 or 2 only'
427
528
  assert test in self.test_ids_all or test == 'auto', 'Wrong test id choosen, ensure you called correct function'
428
- assert not (self.n_groups > 1
429
- and (test == 't_test_single_sample'
430
- or test == 'wilcoxon_single_sample')), 'Only one group of data must be given for single-group tests'
431
529
  assert all(len(
432
530
  group) >= 4 for group in self.data), 'Each group must contain at least four values'
433
- assert not (self.paired == True and not all(len(lst) == len(
434
- self.data[0]) for lst in self.data)), 'Paired groups must be the same length'
435
- assert not (test == 'friedman' and not all(len(lst) == len(
436
- self.data[0]) for lst in self.data)), 'Paired groups must be the same length for Friedman Chi Square test'
437
- assert not (test == 't_test_paired' and not all(len(lst) == len(
438
- self.data[0]) for lst in self.data)), 'Paired groups must be the same length for Paired t-test'
439
- assert not (test == 'wilcoxon' and not all(len(lst) == len(
440
- self.data[0]) for lst in self.data)), 'Paired groups must be the same length for Wilcoxon signed-rank test'
441
- assert not (test == 'friedman' and self.n_groups <
442
- 3), 'At least three groups of data must be given for 3-groups tests'
443
- assert not ((test == 'anova'
444
- or test == 'kruskal_wallis') and self.n_groups < 2), 'At least two groups of data must be given for ANOVA or Kruskal Wallis tests'
445
- assert not ((test == 'wilcoxon'
446
- or test == 't_test_independend'
447
- or test == 't_test_paired'
448
- or test == 'mann_whitney')
449
- and self.n_groups != 2), 'Only two groups of data must be given for 2-groups tests'
531
+ assert not (self.paired == True
532
+ and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired groups must have the same length'
533
+ assert not (test in self.test_ids_dependent
534
+ and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Groups must have the same length for dependent groups test'
535
+ assert not (test in self.test_ids_2sample
536
+ and self.n_groups != 2), f'Only two groups of data must be given for 2-groups tests, got {self.n_groups}'
537
+ assert not (test in self.test_ids_1sample
538
+ and self.n_groups > 1), f'Only one group of data must be given for single-group tests, got {self.n_groups}'
539
+ assert not (test in self.test_ids_3sample
540
+ and self.n_groups < 3), f'At least three groups of data must be given for multi-groups tests, got {self.n_groups}'
450
541
  except AssertionError as error:
451
542
  self.log('\nTest :', test)
452
543
  self.log('Error :', error)
@@ -490,27 +581,13 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
490
581
  if not test == 'auto' and self.parametric and not test in self.test_ids_parametric:
491
582
  self.AddWarning('non-param_test_with_normal_data')
492
583
 
493
- if test == 'anova':
494
- self.anova()
495
- elif test == 'friedman':
496
- self.friedman_test()
497
- elif test == 'kruskal_wallis':
498
- self.kruskal_wallis_test()
499
- elif test == 'mann_whitney':
500
- self.mann_whitney_u_test()
501
- elif test == 't_test_independend':
502
- self.t_test_independend()
503
- elif test == 't_test_paired':
504
- self.t_test_paired()
505
- elif test == 't_test_single_sample':
506
- self.t_test_single_sample()
507
- elif test == 'wilcoxon':
508
- self.wilcoxon()
509
- elif test == 'wilcoxon_single_sample':
510
- self.wilcoxon_single_sample()
584
+ # run the test
585
+
586
+ if test in self.test_ids_all:
587
+ self.run_test_by_id(test)
511
588
  else:
512
- self.log('Automatic test selection preformed.')
513
- self.__auto()
589
+ self.run_test_auto()
590
+
514
591
 
515
592
  # print the results
516
593
  self.results = self.create_results_dict()
@@ -523,32 +600,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
523
600
  if self.verbose == True:
524
601
  print(self.summary)
525
602
 
526
- def __auto(self):
527
603
 
528
- if self.n_groups == 2:
529
- if self.paired:
530
- if self.parametric:
531
- return self.t_test_paired()
532
- else:
533
- return self.wilcoxon()
534
- else:
535
- if self.parametric:
536
- return self.t_test_independend()
537
- else:
538
- return self.mann_whitney_u_test()
539
- elif self.n_groups == 1:
540
- if self.parametric:
541
- return self.t_test_single_sample()
542
- else:
543
- return self.wilcoxon_single_sample()
544
- else:
545
- if self.paired:
546
- return self.friedman_test()
547
- else:
548
- if self.parametric:
549
- return self.anova()
550
- else:
551
- return self.kruskal_wallis_test()
552
604
 
553
605
  # public methods:
554
606
  def RunAuto(self):
@@ -557,8 +609,11 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
557
609
  def RunManual(self, test):
558
610
  self.__run_test(test)
559
611
 
560
- def RunAnova(self):
561
- self.__run_test(test='anova')
612
+ def RunOnewayAnova(self):
613
+ self.__run_test(test='anova_1w_ordinary')
614
+
615
+ def RunOnewayAnovaRM(self):
616
+ self.__run_test(test='anova_1w_rm')
562
617
 
563
618
  def RunFriedman(self):
564
619
  self.__run_test(test='friedman')
@@ -570,7 +625,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
570
625
  self.__run_test(test='mann_whitney')
571
626
 
572
627
  def RunTtest(self):
573
- self.__run_test(test='t_test_independend')
628
+ self.__run_test(test='t_test_independent')
574
629
 
575
630
  def RunTtestPaired(self):
576
631
  self.__run_test(test='t_test_paired')
@@ -603,6 +658,9 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
603
658
  else:
604
659
  return self.summary
605
660
 
661
+ def GetTestIDs(self):
662
+ return self.test_ids_all
663
+
606
664
  def PrintSummary(self):
607
665
  print(self.summary)
608
666
 
@@ -1,2 +1,2 @@
1
1
  # AutoStatLib package version:
2
- __version__ = "0.2.0"
2
+ __version__ = "0.2.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AutoStatLib
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -531,6 +531,7 @@ License-File: LICENSE
531
531
  Requires-Dist: numpy
532
532
  Requires-Dist: scipy
533
533
  Requires-Dist: statsmodels
534
+ Requires-Dist: pandas
534
535
 
535
536
  # AutoStatLib - python library for automated statistical analysis
536
537
 
@@ -569,7 +570,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
569
570
 
570
571
 
571
572
  # set the parameters:
572
- paired = False # is groups dependend or not
573
+ paired = False # is groups dependent or not
573
574
  tails = 2 # two-tailed or one-tailed result
574
575
  popmean = 0 # population mean - only for single-sample tests needed
575
576
 
@@ -585,7 +586,7 @@ analysis.RunAuto()
585
586
 
586
587
  or you can choose specific tests:
587
588
  ```python
588
- # 2 groups independend:
589
+ # 2 groups independent:
589
590
  analysis.RunTtest()
590
591
  analysis.RunMannWhitney()
591
592
 
@@ -594,10 +595,11 @@ analysis.RunTtestPaired()
594
595
  analysis.RunWilcoxon()
595
596
 
596
597
  # 3 and more independed groups comparison:
597
- analysis.RunAnova()
598
+ analysis.RunOnewayAnova()
598
599
  analysis.RunKruskalWallis()
599
600
 
600
601
  # 3 and more depended groups comparison:
602
+ analysis.RunOnewayAnovaRM()
601
603
  analysis.RunFriedman()
602
604
 
603
605
  # single group tests"
@@ -647,20 +649,40 @@ If errors occured, *GetResult()* returns an empty dictionary
647
649
 
648
650
 
649
651
  ---
650
- ## Pre-Alpha dev status.
651
-
652
- ### TODO:
653
-
654
- --Kruskal-Wallis test - add Dunn's multiple comparisons
655
- --Anova: add 2-way anova and 3-way(?)
656
-
657
- check:
658
- --Wilcoxon signed-rank test and Mann-whitney - check mechanism of one-tailed calc, looks like it works wrong
659
-
660
-
661
- checked tests:
662
- --Wilcoxon 2 tail - ok
663
- --Mann-whitney 2 tail - ok
652
+ ## Pre-Alpha dev status.
653
+
654
+ ### TODO:
655
+
656
+ -- Kruskal-Wallis test - add Dunn's multiple comparisons
657
+ -- Anova: add 2-way anova and 3-way anova
658
+ -- onevay Anova: add repeated measures (for normal dependent values) with and without Gaisser-Greenhouse correction
659
+ -- onevay Anova: add Brown-Forsithe and Welch (for normal independent values with unequal SDs between groups)
660
+ -- paired T-test: add ratio-paired t-test (ratios of paired values are consistent)
661
+ -- add Welch test (for norm data unequal variances)
662
+ -- add Kolmogorov-smirnov test (unpaired nonparametric 2 sample, compare cumulative distributions)
663
+ -- add independent t-test with Welch correction (do not assume equal SDs in groups)
664
+ -- add correlation test, correlation diagram
665
+ -- add linear regression, regression diagram
666
+ -- add QQ plot
667
+ -- n-sample tests: add onetail option
668
+
669
+ ✅ done -- detailed normality test results
670
+
671
+
672
+ checked tests:
673
+ 1-sample:
674
+ --Wilcoxon 2,1 tails - ok
675
+ --t-tests 2,1 tails -ok
676
+
677
+ 2-sample:
678
+ --Wilcoxon 2,1 tails - ok
679
+ --Mann-whitney 2,1 tails - ok
680
+ --t-tests 2,1 tails -ok
681
+
682
+ n-sample:
683
+ --Kruskal-Wallis 2 tail - ok
684
+ --Friedman 2 tail - ok
685
+ --one-way ANOWA 2 tail - ok
664
686
 
665
687
 
666
688
 
@@ -0,0 +1,4 @@
1
+ numpy
2
+ scipy
3
+ statsmodels
4
+ pandas
@@ -1,3 +0,0 @@
1
- numpy
2
- scipy
3
- statsmodels
File without changes
File without changes
File without changes
File without changes