AutoStatLib 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AutoStatLib might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AutoStatLib
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -531,6 +531,7 @@ License-File: LICENSE
531
531
  Requires-Dist: numpy
532
532
  Requires-Dist: scipy
533
533
  Requires-Dist: statsmodels
534
+ Requires-Dist: pandas
534
535
 
535
536
  # AutoStatLib - python library for automated statistical analysis
536
537
 
@@ -569,7 +570,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
569
570
 
570
571
 
571
572
  # set the parameters:
572
- paired = False # is groups dependend or not
573
+ paired = False # is groups dependent or not
573
574
  tails = 2 # two-tailed or one-tailed result
574
575
  popmean = 0 # population mean - only for single-sample tests needed
575
576
 
@@ -585,7 +586,7 @@ analysis.RunAuto()
585
586
 
586
587
  or you can choose specific tests:
587
588
  ```python
588
- # 2 groups independend:
589
+ # 2 groups independent:
589
590
  analysis.RunTtest()
590
591
  analysis.RunMannWhitney()
591
592
 
@@ -35,7 +35,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
35
35
 
36
36
 
37
37
  # set the parameters:
38
- paired = False # is groups dependend or not
38
+ paired = False # is groups dependent or not
39
39
  tails = 2 # two-tailed or one-tailed result
40
40
  popmean = 0 # population mean - only for single-sample tests needed
41
41
 
@@ -51,7 +51,7 @@ analysis.RunAuto()
51
51
 
52
52
  or you can choose specific tests:
53
53
  ```python
54
- # 2 groups independend:
54
+ # 2 groups independent:
55
55
  analysis.RunTtest()
56
56
  analysis.RunMannWhitney()
57
57
 
@@ -1,6 +1,8 @@
1
1
  import numpy as np
2
+ import pandas as pd
2
3
  from statsmodels.stats.diagnostic import lilliefors
3
- from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare, shapiro, kstest, anderson, normaltest
4
+ from statsmodels.stats.anova import AnovaRM
5
+ from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare, shapiro, anderson, normaltest
4
6
 
5
7
 
6
8
  class __StatisticalTests():
@@ -8,19 +10,39 @@ class __StatisticalTests():
8
10
  Statistical tests mixin
9
11
  '''
10
12
 
11
- def anova(self):
13
+ def anova_1w_ordinary(self):
12
14
  stat, p_value = f_oneway(*self.data)
13
15
  self.tails = 2
14
16
  # if self.tails == 1 and p_value > 0.5:
15
17
  # p_value /= 2
16
18
  # if self.tails == 1:
17
19
  # p_value /= 2
18
- self.test_name = 'ANOVA'
19
- self.test_id = 'anova'
20
+ self.test_name = 'Ordinary One-Way ANOVA'
21
+ self.test_id = 'anova_1w_ordinary'
20
22
  self.paired = False
21
23
  self.test_stat = stat
22
24
  self.p_value = p_value
23
25
 
26
+ def anova_1w_rm(self):
27
+ """
28
+ Perform repeated measures one-way ANOVA test.
29
+
30
+ Parameters:
31
+ data: list of lists, where each sublist represents repeated measures for a subject
32
+ """
33
+
34
+ df = self.matrix_to_dataframe(self.data)
35
+ res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
36
+ f_stat = res.anova_table['F Value'][0]
37
+ p_value = res.anova_table['Pr > F'][0]
38
+
39
+ self.tails = 2
40
+ self.test_name = 'Repeated Measures One-Way ANOVA'
41
+ self.test_id = 'anova_1w_rm'
42
+ self.paired = True
43
+ self.test_stat = f_stat
44
+ self.p_value = p_value
45
+
24
46
  def friedman_test(self):
25
47
  stat, p_value = friedmanchisquare(*self.data)
26
48
  self.tails = 2
@@ -56,13 +78,13 @@ class __StatisticalTests():
56
78
  self.test_stat = stat
57
79
  self.p_value = p_value
58
80
 
59
- def t_test_independend(self):
81
+ def t_test_independent(self):
60
82
  t_stat, t_p_value = ttest_ind(
61
83
  self.data[0], self.data[1])
62
84
  if self.tails == 1:
63
85
  t_p_value /= 2
64
- self.test_name = 't-test for independend samples'
65
- self.test_id = 't_test_independend'
86
+ self.test_name = 't-test for independent samples'
87
+ self.test_id = 't_test_independent'
66
88
  self.paired = False
67
89
  self.test_stat = t_stat
68
90
  self.p_value = t_p_value
@@ -120,7 +142,7 @@ class __NormalityTests():
120
142
  '''
121
143
  Normality tests mixin
122
144
 
123
- see the article about minimum sample size for tests:
145
+ see the article about minimal sample size for tests:
124
146
  Power comparisons of Shapiro-Wilk, Kolmogorov-Smirnov,
125
147
  Lilliefors and Anderson-Darling tests, Nornadiah Mohd Razali1, Yap Bee Wah1
126
148
  '''
@@ -171,7 +193,7 @@ class __NormalityTests():
171
193
 
172
194
  def anderson_get_p(self, data, dist='norm'):
173
195
  '''
174
- calculating p-value for Anderson-Darling test using the method described here:
196
+ calculating p-value for Anderson-Darling test using the method described here:
175
197
  Computation of Probability Associated with Anderson-Darling Statistic
176
198
  Lorentz Jantschi and Sorana D. Bolboaca, 2018 - Mathematics
177
199
 
@@ -199,6 +221,65 @@ class __NormalityTests():
199
221
  return ad, p
200
222
 
201
223
 
224
+ class __Helpers():
225
+
226
+ def matrix_to_dataframe(self, matrix):
227
+ data = []
228
+ cols = []
229
+ rows = []
230
+
231
+ order_number = 1
232
+ for i, row in enumerate(matrix):
233
+ for j, value in enumerate(row):
234
+ data.append(value)
235
+ cols.append(i)
236
+ rows.append(j)
237
+ order_number += 1
238
+
239
+ df = pd.DataFrame(
240
+ {'Row': rows, 'Col': cols, 'Value': data})
241
+ return df
242
+
243
+ def create_results_dict(self) -> dict:
244
+
245
+ self.stars_int = self.make_stars()
246
+ self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
247
+
248
+ return {
249
+ 'p-value': self.make_p_value_printed(),
250
+ 'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
251
+ 'Stars_Printed': self.stars_str,
252
+ 'Test_Name': self.test_name,
253
+ 'Groups_Compared': self.n_groups,
254
+ 'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
255
+ 'Data_Normaly_Distributed': self.parametric,
256
+ 'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
257
+ 'Paired_Test_Applied': self.paired,
258
+ 'Tails': self.tails,
259
+ 'p-value_exact': self.p_value.item(),
260
+ 'Stars': self.stars_int,
261
+ # 'Stat_Value': self.test_stat.item(),
262
+ 'Warnings': self.warnings,
263
+ 'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
264
+ 'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
265
+ 'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
266
+ 'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
267
+ 'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
268
+ # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
269
+ 'Samples': self.data,
270
+ }
271
+
272
+ def log(self, *args, **kwargs):
273
+ message = ' '.join(map(str, args))
274
+ # print(message, **kwargs)
275
+ self.summary += '\n' + message
276
+
277
+ def AddWarning(self, warning_id):
278
+ message = self.warning_ids_all[warning_id]
279
+ self.log(message)
280
+ self.warnings.append(message)
281
+
282
+
202
283
  class __TextFormatting():
203
284
  '''
204
285
  Text formatting mixin
@@ -293,45 +374,6 @@ class __TextFormatting():
293
374
  else:
294
375
  self.log(i, ':', ' ' * shift, self.results[i])
295
376
 
296
- def create_results_dict(self) -> dict:
297
-
298
- self.stars_int = self.make_stars()
299
- self.stars_str = '*' * self.stars_int if self.stars_int else 'ns'
300
-
301
- return {
302
- 'p-value': self.make_p_value_printed(),
303
- 'Significance(p<0.05)': True if self.p_value.item() < 0.05 else False,
304
- 'Stars_Printed': self.stars_str,
305
- 'Test_Name': self.test_name,
306
- 'Groups_Compared': self.n_groups,
307
- 'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
308
- 'Data_Normaly_Distributed': self.parametric,
309
- 'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
310
- 'Paired_Test_Applied': self.paired,
311
- 'Tails': self.tails,
312
- 'p-value_exact': self.p_value.item(),
313
- 'Stars': self.stars_int,
314
- # 'Stat_Value': self.test_stat.item(),
315
- 'Warnings': self.warnings,
316
- 'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
317
- 'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
318
- 'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
319
- 'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
320
- 'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
321
- # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
322
- 'Samples': self.data,
323
- }
324
-
325
- def log(self, *args, **kwargs):
326
- message = ' '.join(map(str, args))
327
- # print(message, **kwargs)
328
- self.summary += '\n' + message
329
-
330
- def AddWarning(self, warning_id):
331
- message = self.warning_ids_all[warning_id]
332
- self.log(message)
333
- self.warnings.append(message)
334
-
335
377
 
336
378
  class __InputFormatting():
337
379
  def floatify_recursive(self, data):
@@ -349,7 +391,7 @@ class __InputFormatting():
349
391
  return None
350
392
 
351
393
 
352
- class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting):
394
+ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting, __InputFormatting, __Helpers):
353
395
  '''
354
396
  The main class
355
397
  *documentation placeholder*
@@ -372,19 +414,47 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
372
414
  self.n_groups = len(self.groups_list)
373
415
  self.warning_flag_non_numeric_data = False
374
416
  self.summary = ''
375
- self.test_ids_parametric = ['anova',
376
- 't_test_independend',
377
- 't_test_paired',
378
- 't_test_single_sample',]
417
+
418
+ # test IDs classification:
379
419
  self.test_ids_all = [ # in aplhabetical order
380
- 'anova',
420
+ 'anova_1w_ordinary',
421
+ 'anova_1w_rm',
381
422
  'friedman',
382
423
  'kruskal_wallis',
383
424
  'mann_whitney',
384
- 't_test_independend',
425
+ 't_test_independent',
426
+ 't_test_paired',
427
+ 't_test_single_sample',
428
+ 'wilcoxon',
429
+ 'wilcoxon_single_sample',
430
+ ]
431
+ self.test_ids_parametric = [
432
+ 'anova_1w_ordinary',
433
+ 'anova_1w_rm'
434
+ 't_test_independent',
385
435
  't_test_paired',
386
436
  't_test_single_sample',
437
+ ]
438
+ self.test_ids_dependent = [
439
+ 'anova_1w_rm',
440
+ 'friedman',
441
+ 't_test_paired',
442
+ 'wilcoxon',
443
+ ]
444
+ self.test_ids_3sample = [
445
+ 'anova_1w_ordinary',
446
+ 'anova_1w_rm',
447
+ 'friedman',
448
+ 'kruskal_wallis',
449
+ ]
450
+ self.test_ids_2sample = [
451
+ 'mann_whitney',
452
+ 't_test_independent',
453
+ 't_test_paired',
387
454
  'wilcoxon',
455
+ ]
456
+ self.test_ids_1sample = [
457
+ 't_test_single_sample',
388
458
  'wilcoxon_single_sample',
389
459
  ]
390
460
  self.warning_ids_all = {
@@ -425,28 +495,18 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
425
495
  assert self.data, 'There is no input data'
426
496
  assert self.tails in [1, 2], 'Tails parameter can be 1 or 2 only'
427
497
  assert test in self.test_ids_all or test == 'auto', 'Wrong test id choosen, ensure you called correct function'
428
- assert not (self.n_groups > 1
429
- and (test == 't_test_single_sample'
430
- or test == 'wilcoxon_single_sample')), 'Only one group of data must be given for single-group tests'
431
498
  assert all(len(
432
499
  group) >= 4 for group in self.data), 'Each group must contain at least four values'
433
- assert not (self.paired == True and not all(len(lst) == len(
434
- self.data[0]) for lst in self.data)), 'Paired groups must be the same length'
435
- assert not (test == 'friedman' and not all(len(lst) == len(
436
- self.data[0]) for lst in self.data)), 'Paired groups must be the same length for Friedman Chi Square test'
437
- assert not (test == 't_test_paired' and not all(len(lst) == len(
438
- self.data[0]) for lst in self.data)), 'Paired groups must be the same length for Paired t-test'
439
- assert not (test == 'wilcoxon' and not all(len(lst) == len(
440
- self.data[0]) for lst in self.data)), 'Paired groups must be the same length for Wilcoxon signed-rank test'
441
- assert not (test == 'friedman' and self.n_groups <
442
- 3), 'At least three groups of data must be given for 3-groups tests'
443
- assert not ((test == 'anova'
444
- or test == 'kruskal_wallis') and self.n_groups < 2), 'At least two groups of data must be given for ANOVA or Kruskal Wallis tests'
445
- assert not ((test == 'wilcoxon'
446
- or test == 't_test_independend'
447
- or test == 't_test_paired'
448
- or test == 'mann_whitney')
449
- and self.n_groups != 2), 'Only two groups of data must be given for 2-groups tests'
500
+ assert not (self.paired == True
501
+ and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired groups must have the same length'
502
+ assert not (test in self.test_ids_dependent
503
+ and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Groups must have the same length for dependent groups test'
504
+ assert not (test in self.test_ids_2sample
505
+ and self.n_groups != 2), f'Only two groups of data must be given for 2-groups tests, got {self.n_groups}'
506
+ assert not (test in self.test_ids_1sample
507
+ and self.n_groups > 1), f'Only one group of data must be given for single-group tests, got {self.n_groups}'
508
+ assert not (test in self.test_ids_3sample
509
+ and self.n_groups < 3), f'At least three groups of data must be given for multi-groups tests, got {self.n_groups}'
450
510
  except AssertionError as error:
451
511
  self.log('\nTest :', test)
452
512
  self.log('Error :', error)
@@ -490,16 +550,18 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
490
550
  if not test == 'auto' and self.parametric and not test in self.test_ids_parametric:
491
551
  self.AddWarning('non-param_test_with_normal_data')
492
552
 
493
- if test == 'anova':
494
- self.anova()
553
+ if test == 'anova_1w_ordinary':
554
+ self.anova_1w_ordinary()
555
+ elif test == 'anova_1w_rm':
556
+ self.anova_1w_rm()
495
557
  elif test == 'friedman':
496
558
  self.friedman_test()
497
559
  elif test == 'kruskal_wallis':
498
560
  self.kruskal_wallis_test()
499
561
  elif test == 'mann_whitney':
500
562
  self.mann_whitney_u_test()
501
- elif test == 't_test_independend':
502
- self.t_test_independend()
563
+ elif test == 't_test_independent':
564
+ self.t_test_independent()
503
565
  elif test == 't_test_paired':
504
566
  self.t_test_paired()
505
567
  elif test == 't_test_single_sample':
@@ -525,7 +587,13 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
525
587
 
526
588
  def __auto(self):
527
589
 
528
- if self.n_groups == 2:
590
+ if self.n_groups == 1:
591
+ if self.parametric:
592
+ return self.t_test_single_sample()
593
+ else:
594
+ return self.wilcoxon_single_sample()
595
+
596
+ elif self.n_groups == 2:
529
597
  if self.paired:
530
598
  if self.parametric:
531
599
  return self.t_test_paired()
@@ -533,23 +601,25 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
533
601
  return self.wilcoxon()
534
602
  else:
535
603
  if self.parametric:
536
- return self.t_test_independend()
604
+ return self.t_test_independent()
537
605
  else:
538
606
  return self.mann_whitney_u_test()
539
- elif self.n_groups == 1:
540
- if self.parametric:
541
- return self.t_test_single_sample()
542
- else:
543
- return self.wilcoxon_single_sample()
544
- else:
607
+
608
+ elif self.n_groups >= 3:
545
609
  if self.paired:
546
- return self.friedman_test()
610
+ if self.parametric:
611
+ return self.anova_1w_rm()
612
+ else:
613
+ return self.friedman_test()
547
614
  else:
548
615
  if self.parametric:
549
- return self.anova()
616
+ return self.anova_1w_ordinary()
550
617
  else:
551
618
  return self.kruskal_wallis_test()
552
619
 
620
+ else:
621
+ pass
622
+
553
623
  # public methods:
554
624
  def RunAuto(self):
555
625
  self.__run_test(test='auto')
@@ -557,8 +627,11 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
557
627
  def RunManual(self, test):
558
628
  self.__run_test(test)
559
629
 
560
- def RunAnova(self):
561
- self.__run_test(test='anova')
630
+ def RunOnewayAnova(self):
631
+ self.__run_test(test='anova_1w_ordinary')
632
+
633
+ def RunOnewayAnovaRM(self):
634
+ self.__run_test(test='anova_1w_rm')
562
635
 
563
636
  def RunFriedman(self):
564
637
  self.__run_test(test='friedman')
@@ -570,7 +643,7 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
570
643
  self.__run_test(test='mann_whitney')
571
644
 
572
645
  def RunTtest(self):
573
- self.__run_test(test='t_test_independend')
646
+ self.__run_test(test='t_test_independent')
574
647
 
575
648
  def RunTtestPaired(self):
576
649
  self.__run_test(test='t_test_paired')
@@ -603,6 +676,9 @@ class StatisticalAnalysis(__StatisticalTests, __NormalityTests, __TextFormatting
603
676
  else:
604
677
  return self.summary
605
678
 
679
+ def GetTestIDs(self):
680
+ return self.test_ids_all
681
+
606
682
  def PrintSummary(self):
607
683
  print(self.summary)
608
684
 
@@ -1,2 +1,2 @@
1
1
  # AutoStatLib package version:
2
- __version__ = "0.2.0"
2
+ __version__ = "0.2.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: AutoStatLib
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -531,6 +531,7 @@ License-File: LICENSE
531
531
  Requires-Dist: numpy
532
532
  Requires-Dist: scipy
533
533
  Requires-Dist: statsmodels
534
+ Requires-Dist: pandas
534
535
 
535
536
  # AutoStatLib - python library for automated statistical analysis
536
537
 
@@ -569,7 +570,7 @@ data_uniform = [list(np.random.uniform(i+3, i+1, n)) for i in range(groups)]
569
570
 
570
571
 
571
572
  # set the parameters:
572
- paired = False # is groups dependend or not
573
+ paired = False # is groups dependent or not
573
574
  tails = 2 # two-tailed or one-tailed result
574
575
  popmean = 0 # population mean - only for single-sample tests needed
575
576
 
@@ -585,7 +586,7 @@ analysis.RunAuto()
585
586
 
586
587
  or you can choose specific tests:
587
588
  ```python
588
- # 2 groups independend:
589
+ # 2 groups independent:
589
590
  analysis.RunTtest()
590
591
  analysis.RunMannWhitney()
591
592
 
@@ -0,0 +1,4 @@
1
+ numpy
2
+ scipy
3
+ statsmodels
4
+ pandas
@@ -1,3 +0,0 @@
1
- numpy
2
- scipy
3
- statsmodels
File without changes
File without changes
File without changes
File without changes