AutoStatLib 0.2.22__tar.gz → 0.2.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {autostatlib-0.2.22/src/AutoStatLib.egg-info → autostatlib-0.2.23}/PKG-INFO +1 -1
  2. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib/AutoStatLib.py +8 -2
  3. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib/StatPlots.py +250 -70
  4. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib/_version.py +1 -1
  5. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib/helpers.py +1 -0
  6. {autostatlib-0.2.22 → autostatlib-0.2.23/src/AutoStatLib.egg-info}/PKG-INFO +1 -1
  7. {autostatlib-0.2.22 → autostatlib-0.2.23}/LICENSE +0 -0
  8. {autostatlib-0.2.22 → autostatlib-0.2.23}/MANIFEST.in +0 -0
  9. {autostatlib-0.2.22 → autostatlib-0.2.23}/README.md +0 -0
  10. {autostatlib-0.2.22 → autostatlib-0.2.23}/pyproject.toml +0 -0
  11. {autostatlib-0.2.22 → autostatlib-0.2.23}/requirements.txt +0 -0
  12. {autostatlib-0.2.22 → autostatlib-0.2.23}/setup.cfg +0 -0
  13. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib/__init__.py +0 -0
  14. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib/__main__.py +0 -0
  15. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib/normality_tests.py +0 -0
  16. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib/statistical_tests.py +0 -0
  17. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib/text_formatting.py +0 -0
  18. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib.egg-info/SOURCES.txt +0 -0
  19. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
  20. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib.egg-info/requires.txt +0 -0
  21. {autostatlib-0.2.22 → autostatlib-0.2.23}/src/AutoStatLib.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: AutoStatLib
3
- Version: 0.2.22
3
+ Version: 0.2.23
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -19,7 +19,9 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
19
19
  popmean=None,
20
20
  posthoc=False,
21
21
  verbose=True,
22
- groups_name=[]):
22
+ raise_errors=False,
23
+ groups_name=[],
24
+ subgrouping=[]):
23
25
  self.results = None
24
26
  self.error = False
25
27
  self.groups_list = groups_list
@@ -28,10 +30,11 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
28
30
  self.popmean = popmean
29
31
  self.posthoc = posthoc
30
32
  self.verbose = verbose
33
+ self.raise_errors = raise_errors
31
34
  self.n_groups = len(self.groups_list)
32
35
  self.groups_name = [groups_name[i % len(groups_name)]
33
36
  for i in range(self.n_groups)] if groups_name and groups_name != [''] else [f'Group {i+1}' for i in range(self.n_groups)]
34
-
37
+ self.subgrouping = subgrouping if subgrouping else [0]
35
38
  self.warning_flag_non_numeric_data = False
36
39
  self.summary = 'AutoStatLib v{}'.format(__version__)
37
40
 
@@ -150,6 +153,9 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
150
153
  self.run_test_by_id('none')
151
154
  self.results = self.create_results_dict()
152
155
 
156
+ if self.raise_errors:
157
+ raise ValueError(error)
158
+
153
159
  # Print errmessage:
154
160
  if self.verbose:
155
161
  self.log('\nTest :', test)
@@ -1,7 +1,9 @@
1
+ from re import X
1
2
  import seaborn as sns
2
3
  import random
3
4
  # from math import comb
4
5
  import numpy as np
6
+ import pandas as pd
5
7
  import matplotlib.pyplot as plt
6
8
  import matplotlib.colors as mcolors
7
9
  import matplotlib.colors as color
@@ -70,6 +72,23 @@ class Helpers():
70
72
  def transpose(self, data):
71
73
  return list(map(list, zip(*data)))
72
74
 
75
+ def expand_counts(self, counts):
76
+ '''
77
+ The input is a list of integers.
78
+ Output is list of matrices.
79
+ Each int represents each output matrix and defines
80
+ how many columns to include in the matrix.
81
+ Eg: input: [3,2,1]
82
+ output: [0,0,0,1,1,2]
83
+ '''
84
+ output = []
85
+ counts = list(filter(None, counts))
86
+ for n, c in enumerate(counts, start=0):
87
+ output.extend([n] * c)
88
+ if output == []:
89
+ output = [0]
90
+ return output
91
+
73
92
 
74
93
  class BaseStatPlot(Helpers):
75
94
 
@@ -83,6 +102,7 @@ class BaseStatPlot(Helpers):
83
102
  y_label='',
84
103
  print_x_labels=True,
85
104
  Groups_Name=None,
105
+ subgrouping=[],
86
106
  Posthoc_Matrix=[],
87
107
  Posthoc_Tests_Name='',
88
108
  colormap=None,
@@ -92,8 +112,8 @@ class BaseStatPlot(Helpers):
92
112
  figure_h=4,
93
113
  figure_w=0, # 0 means auto
94
114
  **kwargs):
95
- self.data_groups = [group if group else [0, 0, 0, 0]
96
- for group in data_groups]
115
+ self.data_groups = [group if group else None
116
+ for group in data_groups] if any(self.data_groups) else [[0],[0]]
97
117
  self.n_groups = len(self.data_groups)
98
118
  self.p = p_value_exact
99
119
  self.testname = Test_Name
@@ -110,6 +130,14 @@ class BaseStatPlot(Helpers):
110
130
  self.figure_scale_factor = figure_scale_factor
111
131
  self.figure_h = figure_h
112
132
  self.figure_w = figure_w
133
+ self.error = False
134
+
135
+ try:
136
+ assert any(self.data_groups), 'There is no input data'
137
+ except AssertionError as error:
138
+ self.error = True
139
+ print('AutoStatLib.StatPlots Error :', error)
140
+ return
113
141
 
114
142
  # sd sem mean and median calculation if they are not provided
115
143
  self.mean = [
@@ -128,6 +156,9 @@ class BaseStatPlot(Helpers):
128
156
  self.groups_name = Groups_Name if Groups_Name is not None else [
129
157
  '']
130
158
 
159
+ self.subgrouping = subgrouping if subgrouping else [0]
160
+ self.subgrouping_arrange = self.expand_counts(self.subgrouping)
161
+
131
162
  if colormap is not None and colormap != ['']:
132
163
  colormap = colormap
133
164
  self.colormap_default = False
@@ -362,6 +393,7 @@ class BaseStatPlot(Helpers):
362
393
 
363
394
  def add_swarm(self, ax,
364
395
  color='dimgrey',
396
+ default_color='dimgrey',
365
397
  alpha=1,
366
398
  marker='o',
367
399
  markersize=8,
@@ -370,6 +402,9 @@ class BaseStatPlot(Helpers):
370
402
  """
371
403
  Add a swarmplot (scatter-like plot with non-overlapping points)
372
404
  to the provided Axes. Automatically reduce point size if overcrowded.
405
+ Automatically assigns colors using sns.color_palette("tab10")
406
+ to all unique non-missing group labels.
407
+ Missing labels → default_color.
373
408
  """
374
409
 
375
410
  # Prepare flattened data
@@ -412,6 +447,116 @@ class BaseStatPlot(Helpers):
412
447
  linewidth=linewidth * self.figure_scale_factor,
413
448
  zorder=zorder - 1)
414
449
 
450
+ def add_swarm_with_alternate_colors(self, ax,
451
+ color='dimgrey',
452
+ default_color='dimgrey',
453
+ palette_name="tab10",
454
+ subgrouping=[0],
455
+ alpha=1,
456
+ marker='o',
457
+ markersize=8,
458
+ linewidth=1.4,
459
+ zorder=2):
460
+ """
461
+ Add a swarmplot (scatter-like plot with non-overlapping points)
462
+ to the provided Axes. Automatically reduce point size if overcrowded.
463
+ Automatically assigns colors using sns.color_palette("tab10")
464
+ to all unique non-missing group labels.
465
+ Missing labels → default_color.
466
+ """
467
+
468
+ # Prepare flattened data
469
+ values = [v for i, group in enumerate(self.data_groups) for v in group]
470
+ groups = [i for i, group in enumerate(self.data_groups) for _ in group]
471
+ values = np.array(values)
472
+
473
+ # Estimate overcrowding for adaptive sizing
474
+ group_counts = [len(g) for g in self.data_groups]
475
+ max_points = max(group_counts) if group_counts else 1
476
+
477
+ # Determine horizontal space per category
478
+ num_groups = len(self.data_groups)
479
+ xlim = ax.get_xlim()
480
+ width_per_group = (xlim[1] - xlim[0]) / max(num_groups, 1)
481
+
482
+ # Empirical density threshold: if points are too dense, shrink
483
+ density = max_points / (width_per_group + 1e-6)
484
+
485
+ # Tunable constants to approximate best function of size adjustment
486
+ size_scale = max(0.1, min(1, 3.5 / (density ** 0.5)))
487
+
488
+ # Normalize labels (missing -> __default__)
489
+ if set(subgrouping) != {0}:
490
+ normalized_labels = [
491
+ lbl if (lbl not in (None, "", np.nan, 0)) else "_"
492
+ for lbl in subgrouping]
493
+
494
+ len_data = int(len(values)/2)
495
+ len_lbl = len(normalized_labels)
496
+
497
+ if len_lbl < len_data:
498
+ # Extend normalized_labels to match data points count
499
+ normalized_labels.extend(['last'] * (len_data - len_lbl))
500
+ elif len_lbl > len_data:
501
+ # Shrink normalized_labels to match data points count
502
+ normalized_labels = normalized_labels[0:len_data]
503
+
504
+ else:
505
+ normalized_labels = ["_" for _ in self.data_groups[0]]
506
+
507
+ # Construct row-by-row long-form DataFrame for seaborn
508
+ # df_list = []
509
+ # for col in range(num_groups):
510
+ # df_list.append(pd.DataFrame({
511
+ # "value": values,
512
+ # "x": groups,
513
+ # "subgroup": normalized_labels[col],
514
+ # }))
515
+ # df = pd.concat(df_list, ignore_index=True)
516
+
517
+ # Extract unique non-default labels
518
+ # unique_subgroups = [g for g in df["subgroup"].unique() if g != "__default__"]
519
+ unique_subgroups = list(set(normalized_labels))
520
+
521
+ # Auto palette for them
522
+ colors = sns.color_palette(palette_name, len(unique_subgroups))
523
+ palette = {g: c for g, c in zip(unique_subgroups, colors)}
524
+
525
+ # Add default color
526
+ palette["_"] = default_color
527
+
528
+ print(values)
529
+ print(groups)
530
+ print(subgrouping)
531
+ print(normalized_labels)
532
+
533
+ sns.swarmplot(
534
+ # data=df,
535
+
536
+ y=values,
537
+ x=groups,
538
+ hue=normalized_labels*num_groups,
539
+ ax=ax,
540
+ # color=color,
541
+ palette=palette,
542
+ dodge=False,
543
+ legend=False,
544
+ alpha=alpha,
545
+ size=markersize * self.figure_scale_factor * size_scale,
546
+ marker=marker,
547
+ linewidth=linewidth * self.figure_scale_factor * size_scale,
548
+ zorder=zorder,
549
+ )
550
+
551
+ # # Connect points if data paired
552
+ # if self.dependent == True:
553
+ # for i, data in enumerate(self.transpose(self.data_groups)):
554
+ # ax.plot(range(len(data)), data,
555
+ # color=color,
556
+ # alpha=alpha * 0.25,
557
+ # linewidth=linewidth * self.figure_scale_factor,
558
+ # zorder=zorder - 1)
559
+
415
560
  def add_errorbar_sd(self, ax, x,
416
561
  capsize=4,
417
562
  ecolor='r',
@@ -616,42 +761,50 @@ class BaseStatPlot(Helpers):
616
761
  ha='right', va='bottom', fontsize=8*self.figure_scale_factor, fontweight='regular')
617
762
 
618
763
  def show(self):
619
- plt.show()
620
-
621
- def save(self, path):
622
- plt.savefig(path,
623
- pad_inches=0.1*self.figure_scale_factor,
624
- transparent=True,
625
- )
764
+ if not self.error:
765
+ plt.show()
766
+
767
+ def save(self, path, format='png', dpi=150, transparent=True):
768
+ if not self.error:
769
+ plt.savefig(path,
770
+ pad_inches=0.1*self.figure_scale_factor,
771
+ format=format,
772
+ dpi=dpi,
773
+ transparent=transparent,
774
+ )
626
775
 
627
776
  def close(self):
628
- plt.close()
777
+ if not self.error:
778
+ plt.close()
629
779
 
630
780
  def plot(self):
631
- # Abstract method—each subclass must implement its own plot method
632
- raise NotImplementedError(
633
- "Implement the plot() method in the subclass")
781
+ if not self.error:
782
+ # Abstract method—each subclass must implement its own plot method
783
+ raise NotImplementedError(
784
+ "Implement the plot() method in the subclass")
634
785
 
635
786
 
636
787
  class BarStatPlot(BaseStatPlot):
637
788
 
638
789
  def plot(self, linewidth=1.8):
639
- fig, ax = self.setup_figure()
640
-
641
- for x in range(len(self.data_groups)):
790
+ if not self.error:
642
791
 
643
- # Create a bar for given group.
644
- self.add_barplot(ax, x, linewidth=linewidth)
792
+ fig, ax = self.setup_figure()
645
793
 
646
- # Overlay errbars, and markers.
647
- self.add_median_marker(ax, x, linewidth=linewidth)
648
- self.add_mean_marker(ax, x, linewidth=linewidth)
649
- self.add_errorbar_sd(ax, x, linewidth=linewidth)
794
+ for x in range(len(self.data_groups)):
650
795
 
651
- self.add_swarm(ax)
652
- self.add_significance_bars(ax, linewidth)
653
- self.add_titles_and_labels(fig, ax)
654
- self.axes_formatting(ax, linewidth)
796
+ # Create a bar for given group.
797
+ self.add_barplot(ax, x, linewidth=linewidth)
798
+
799
+ # Overlay errbars, and markers.
800
+ self.add_median_marker(ax, x, linewidth=linewidth)
801
+ self.add_mean_marker(ax, x, linewidth=linewidth)
802
+ self.add_errorbar_sd(ax, x, linewidth=linewidth)
803
+
804
+ self.add_swarm(ax)
805
+ self.add_significance_bars(ax, linewidth)
806
+ self.add_titles_and_labels(fig, ax)
807
+ self.axes_formatting(ax, linewidth)
655
808
 
656
809
 
657
810
  class ViolinStatPlot(BaseStatPlot):
@@ -668,76 +821,103 @@ class ViolinStatPlot(BaseStatPlot):
668
821
  '''
669
822
 
670
823
  def plot(self, linewidth=1.8):
671
- fig, ax = self.setup_figure()
824
+ if not self.error:
825
+ fig, ax = self.setup_figure()
672
826
 
673
- for x in range(len(self.data_groups)):
827
+ for x in range(len(self.data_groups)):
674
828
 
675
- # Create a violin for given group.
676
- self.add_violinplot(ax, x)
829
+ # Create a violin for given group.
830
+ self.add_violinplot(ax, x)
677
831
 
678
- # Overlay errbars and markers.
679
- self.add_median_marker(ax, x, linewidth=linewidth)
680
- self.add_mean_marker(ax, x, linewidth=linewidth)
681
- self.add_errorbar_sd(ax, x, linewidth=linewidth)
832
+ # Overlay errbars and markers.
833
+ self.add_median_marker(ax, x, linewidth=linewidth)
834
+ self.add_mean_marker(ax, x, linewidth=linewidth)
835
+ self.add_errorbar_sd(ax, x, linewidth=linewidth)
682
836
 
683
- self.add_swarm(ax)
684
- self.add_significance_bars(ax, linewidth)
685
- self.add_titles_and_labels(fig, ax)
686
- self.axes_formatting(ax, linewidth)
837
+ self.add_swarm(ax)
838
+ self.add_significance_bars(ax, linewidth)
839
+ self.add_titles_and_labels(fig, ax)
840
+ self.axes_formatting(ax, linewidth)
687
841
 
688
- xmin, xmax = ax.get_xlim()
689
- ax.set_xlim(xmin - 0.3, xmax + 0.3)
842
+ xmin, xmax = ax.get_xlim()
843
+ ax.set_xlim(xmin - 0.3, xmax + 0.3)
690
844
 
691
845
 
692
846
  class BoxStatPlot(BaseStatPlot):
693
847
 
694
848
  def plot(self, linewidth=1.8):
695
- fig, ax = self.setup_figure()
849
+ if not self.error:
850
+ fig, ax = self.setup_figure()
696
851
 
697
- self.add_boxplot(ax)
698
- self.add_swarm(ax)
699
- self.add_significance_bars(ax, linewidth)
700
- self.add_titles_and_labels(fig, ax)
701
- self.axes_formatting(ax, linewidth)
852
+ self.add_boxplot(ax)
853
+ self.add_swarm(ax)
854
+ self.add_significance_bars(ax, linewidth)
855
+ self.add_titles_and_labels(fig, ax)
856
+ self.axes_formatting(ax, linewidth)
702
857
 
703
858
 
704
859
  class ScatterStatPlot(BaseStatPlot):
705
860
 
706
861
  def plot(self, linewidth=1.8):
707
- fig, ax = self.setup_figure()
862
+ if not self.error:
863
+ fig, ax = self.setup_figure()
708
864
 
709
- for x in range(len(self.data_groups)):
865
+ for x in range(len(self.data_groups)):
710
866
 
711
- # Overlay errbars, and markers.
712
- self.add_median_marker(ax, x, linewidth=linewidth)
713
- self.add_mean_marker(ax, x, linewidth=linewidth)
714
- self.add_errorbar_sd(ax, x, linewidth=linewidth)
867
+ # Overlay errbars, and markers.
868
+ self.add_median_marker(ax, x, linewidth=linewidth)
869
+ self.add_mean_marker(ax, x, linewidth=linewidth)
870
+ self.add_errorbar_sd(ax, x, linewidth=linewidth)
715
871
 
716
- self.add_scatter(ax)
717
- self.add_significance_bars(ax, linewidth)
718
- self.add_titles_and_labels(fig, ax)
719
- self.axes_formatting(ax, linewidth)
872
+ self.add_scatter(ax)
873
+ self.add_significance_bars(ax, linewidth)
874
+ self.add_titles_and_labels(fig, ax)
875
+ self.axes_formatting(ax, linewidth)
720
876
 
721
- xmin, xmax = ax.get_xlim()
722
- ax.set_xlim(xmin - 0.3, xmax + 0.3)
877
+ xmin, xmax = ax.get_xlim()
878
+ ax.set_xlim(xmin - 0.3, xmax + 0.3)
723
879
 
724
880
 
725
881
  class SwarmStatPlot(BaseStatPlot):
726
882
 
727
883
  def plot(self, linewidth=1.8):
728
- fig, ax = self.setup_figure()
884
+ if not self.error:
885
+ fig, ax = self.setup_figure()
886
+
887
+ for x in range(len(self.data_groups)):
888
+
889
+ # Overlay errbars, and markers.
890
+ self.add_median_marker(ax, x, linewidth=linewidth)
891
+ self.add_mean_marker(ax, x, linewidth=linewidth)
892
+ self.add_errorbar_sd(ax, x, linewidth=linewidth)
893
+
894
+ self.add_swarm(ax)
895
+ self.add_significance_bars(ax, linewidth)
896
+ self.add_titles_and_labels(fig, ax)
897
+ self.axes_formatting(ax, linewidth)
898
+
899
+ xmin, xmax = ax.get_xlim()
900
+ ax.set_xlim(xmin - 0.3, xmax + 0.3)
901
+
902
+
903
+ class SwarmStatPlot_subgrouping_betta(BaseStatPlot):
904
+
905
+ def plot(self, linewidth=1.8):
906
+ if not self.error:
907
+ fig, ax = self.setup_figure()
729
908
 
730
- for x in range(len(self.data_groups)):
909
+ for x in range(len(self.data_groups)):
731
910
 
732
- # Overlay errbars, and markers.
733
- self.add_median_marker(ax, x, linewidth=linewidth)
734
- self.add_mean_marker(ax, x, linewidth=linewidth)
735
- self.add_errorbar_sd(ax, x, linewidth=linewidth)
911
+ # Overlay errbars, and markers.
912
+ self.add_median_marker(ax, x, linewidth=linewidth)
913
+ self.add_mean_marker(ax, x, linewidth=linewidth)
914
+ self.add_errorbar_sd(ax, x, linewidth=linewidth)
736
915
 
737
- self.add_swarm(ax)
738
- self.add_significance_bars(ax, linewidth)
739
- self.add_titles_and_labels(fig, ax)
740
- self.axes_formatting(ax, linewidth)
916
+ self.add_swarm_with_alternate_colors(
917
+ ax, subgrouping=self.subgrouping_arrange)
918
+ self.add_significance_bars(ax, linewidth)
919
+ self.add_titles_and_labels(fig, ax)
920
+ self.axes_formatting(ax, linewidth)
741
921
 
742
- xmin, xmax = ax.get_xlim()
743
- ax.set_xlim(xmin - 0.3, xmax + 0.3)
922
+ xmin, xmax = ax.get_xlim()
923
+ ax.set_xlim(xmin - 0.3, xmax + 0.3)
@@ -1,2 +1,2 @@
1
1
  # AutoStatLib package version:
2
- __version__ = "0.2.22"
2
+ __version__ = "0.2.23"
@@ -73,6 +73,7 @@ class Helpers():
73
73
  'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
74
74
  'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
75
75
  'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
76
+ 'subgrouping': self.subgrouping,
76
77
  # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
77
78
  'Samples': self.data,
78
79
  'Posthoc_Tests_Name': self.posthoc_name if self.posthoc_name is not None else '',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: AutoStatLib
3
- Version: 0.2.22
3
+ Version: 0.2.23
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
File without changes
File without changes
File without changes
File without changes