PyPI - AutoStatLib - Versions diffs - 0.2.26__tar.gz → 0.3.0__tar.gz - Mend

AutoStatLib 0.2.26tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{autostatlib-0.2.26/src/AutoStatLib.egg-info → autostatlib-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: AutoStatLib
-Version: 0.2.26
+Version: 0.3.0
 Summary: AutoStatLib - a simple statistical analysis tool
 Author: Stemonitis, SciWare LLC
 Author-email: konung-yaropolk <yaropolk1995@gmail.com>

{autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/AutoStatLib.py RENAMED Viewed

@@ -20,10 +20,9 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
                  posthoc=False,
                  verbose=True,
                  raise_errors=False,
-                 groups_name=[],
-                 subgrouping=[]):
-        self.results = None
-        self.error = False
+                 groups_name=None,
+                 subgrouping=None):
         self.groups_list = groups_list
         self.paired = paired
         self.tails = tails
@@ -34,7 +33,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
         self.n_groups = len(self.groups_list)
         self.groups_name = [groups_name[i % len(groups_name)]
                             for i in range(self.n_groups)] if groups_name and groups_name != [''] else [f'Group {i+1}' for i in range(self.n_groups)]
-        self.subgrouping = subgrouping if subgrouping else [0]
+        self.subgrouping = subgrouping if subgrouping is not None else [0]
         self.warning_flag_non_numeric_data = False
         self.summary = 'AutoStatLib v{}'.format(__version__)
@@ -68,7 +67,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
         ]
         self.test_ids_parametric = [
             'anova_1w_ordinary',
-            'anova_1w_rm'
+            'anova_1w_rm',
             't_test_independent',
             't_test_paired',
             't_test_single_sample',
@@ -100,6 +99,8 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
             'param_test_with_non-normal_data': '\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n         The results might be skewed. \n         Please, run non-parametric test or preform automatic test selection.\n',
             'non-param_test_with_normal_data': '\nWarning: Non-Parametric test was manualy chosen for Normaly distributed data.\n         The results might be skewed. \n         Please, run parametric test or preform automatic test selection.\n',
             'no_pop_mean_set':                 '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n         The results might be skewed. \n         Please, set the Population Mean and run the test again.\n',
+            'paired_test_with_independend_samples': '\nWarning: A paired test was manually selected, even though the samples were declared independent.\n         The results might be skewed. \n         Please, run test for independend samples or preform automatic test selection.\n',
+            'independend_test_with_paired_samples': '\nWarning: An independent test was manually selected, even though the samples were declared paired.\n         The results might be skewed. \n         Please, run test for paired samples or preform automatic test selection.\n',
         }
     def run_test(self, test='auto'):
@@ -131,6 +132,8 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
         # delete the empty cols from input
         self.data = [col for col in self.data if any(
             x is not None for x in col)]
+        # re-calculate the number of groups after removing empty cols
+        self.n_groups = len(self.data)
         # User input assertion block
         try:
@@ -139,7 +142,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
             assert test in self.test_ids_all or test == 'auto', 'Wrong test id choosen, ensure you called correct function'
             assert all(len(
                 group) >= 4 for group in self.data), 'Each group must contain at least four values'
-            assert not (self.paired is True
+            assert not (test in self.test_ids_dependent     # self.paired is True
                         and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired samples must have the same length'
             assert not (test in self.test_ids_dependent
                         and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Samples must have the same length for the dependend statistics test'
@@ -203,8 +206,14 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
         if test != 'auto' and self.parametric and test not in self.test_ids_parametric:
             self.AddWarning('non-param_test_with_normal_data')
-        # run the test
+        # Maybe unneeded checks for manually selected tests
+        # because user propably know what test they selected
+        if test != 'auto' and not self.paired and test in self.test_ids_dependent:
+            self.AddWarning('paired_test_with_independend_samples')
+        if test != 'auto' and self.paired and test not in self.test_ids_dependent:
+            self.AddWarning('independend_test_with_paired_samples')
+        # run the test
         if test in self.test_ids_all:
             self.run_test_by_id(test)
         else:
@@ -260,7 +269,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
         self.run_test(test='wilcoxon')
     def GetResult(self):
-        if not self.results and not self.error:
+        if self.results is None and not self.error:
             print('No test chosen, no results to output')
             # self.run_test(test='auto')
             return self.results
@@ -271,7 +280,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
             return self.results
     def GetSummary(self):
-        if not self.results and not self.error:
+        if self.results is None and not self.error:
             print('No test chosen, no summary to output')
             # self.run_test(test='auto')
             return self.summary

{autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/StatPlots.py RENAMED Viewed

@@ -145,8 +145,8 @@ class BaseStatPlot(Helpers):
         self.median = [
             np.median(self.data_groups[i]).item() for i in range(self.n_groups)]
         self.sd = [
-            np.std(self.data_groups[i]).item() for i in range(self.n_groups)]
-        self.sem = [np.std(self.data_groups[i]).item() / np.sqrt(len(self.data_groups[i])).item()
+            np.std(self.data_groups[i], ddof=1).item() for i in range(self.n_groups)]
+        self.sem = [np.std(self.data_groups[i], ddof=1).item() / np.sqrt(len(self.data_groups[i])).item()
                     for i in range(self.n_groups)]
         self.n = [len(i) for i in self.data_groups]
@@ -274,65 +274,65 @@ class BaseStatPlot(Helpers):
                 fliersMarker = 'b+'
         # write a function to make a dictionary
         whiskersCapsStyles = dict()
-        if whiskersCapsColor != None:
+        if whiskersCapsColor is not None:
             whiskersCapsStyles["color"] = whiskersCapsColor
-        if whiskersCapsLineWidth != None:
+        if whiskersCapsLineWidth is not None:
             whiskersCapsStyles["linewidth"] = whiskersCapsLineWidth
-        if whiskersCapsLineStyle != None:
+        if whiskersCapsLineStyle is not None:
             whiskersCapsStyles['linestyle'] = whiskersCapsLineStyle
         boxProps = {"facecolor": (0, 0, 0, 0),
                     "edgecolor": "black", "linewidth": 1}
-        if boxFill != None:
+        if boxFill is not None:
             boxProps["facecolor"] = boxFill
-        if boxBorderColor != None:
+        if boxBorderColor is not None:
             boxProps["edgecolor"] = boxBorderColor
-        if boxBorderWidth != None:
+        if boxBorderWidth is not None:
             boxProps['linewidth'] = boxBorderWidth
-        # if boxBorderStyle != None:
+        # if boxBorderStyle is not None:
         #     boxProps['linestyle'] = boxBorderStyle  !!!this feature is not working with patch_artist that is needed for facecolor to work
         whiskersProps = {"color": 'black',
                          "linestyle": "solid", "linewidth": 1}
-        if whiskersColor != None:
+        if whiskersColor is not None:
             whiskersProps["color"] = whiskersColor
-        if whiskersLineStyle != None:
+        if whiskersLineStyle is not None:
             whiskersProps["linestyle"] = whiskersLineStyle
-        if whiskersLineWidth != None:
+        if whiskersLineWidth is not None:
             whiskersProps['linewidth'] = whiskersLineWidth
         flierProps = {"markerfacecolor": [
             0, 0, 0, 0], "markeredgecolor": "black", "linestyle": "solid", "markeredgewidth": 1}
-        if flierFillColor != None:
+        if flierFillColor is not None:
             flierProps["markerfacecolor"] = flierFillColor
-        if flierEdgeColor != None:
+        if flierEdgeColor is not None:
             flierProps["markeredgecolor"] = flierEdgeColor
-        if flierLineWidth != None:
+        if flierLineWidth is not None:
             flierProps['markeredgewidth'] = flierLineWidth
-        if flierLineStyle != None:
+        if flierLineStyle is not None:
             flierProps['linestyle'] = flierLineStyle
         medianProps = {"linestyle": 'solid', "linewidth": 1, "color": 'red'}
-        if medianColor != None:
+        if medianColor is not None:
             medianProps["color"] = medianColor
-        if medianLineStyle != None:
+        if medianLineStyle is not None:
             medianProps["linestyle"] = medianLineStyle
-        if medianLineWidth != None:
+        if medianLineWidth is not None:
             medianProps['linewidth'] = medianLineWidth
         meanProps = {"color": "black", "marker": 'o', "markerfacecolor": "black",
                      "markeredgecolor": "black", "linestyle": "solid", "linewidth": 1}
-        if meanMarker != None:
+        if meanMarker is not None:
             meanProps['marker'] = meanMarker
-        if meanFillColor != None:
+        if meanFillColor is not None:
             meanProps["markerfacecolor"] = meanFillColor
-        if meanEdgeColor != None:
+        if meanEdgeColor is not None:
             meanProps['markeredgecolor'] = meanEdgeColor
-        if meanLineColor != None:
+        if meanLineColor is not None:
             meanProps["color"] = meanLineColor
-        if meanLineStyle != None:
+        if meanLineStyle is not None:
             meanProps['linestyle'] = meanLineStyle
-        if meanLineWidth != None:
+        if meanLineWidth is not None:
             meanProps['linewidth'] = meanLineWidth
         bplot = ax.boxplot(self.data_groups,
@@ -901,6 +901,16 @@ class SwarmStatPlot(BaseStatPlot):
 class SwarmStatPlot_subgrouping_betta(BaseStatPlot):
+    '''
+    Swarm plot with subgrouping support. Subgrouping is defined by the user as a list of labels (one per data point)
+    that indicate which subgroup each data point belongs to.
+    The plot will automatically assign different colors to each unique subgroup label,
+    and add a legend to indicate which color corresponds to which subgroup.
+    Not tested well, use with caution.
+    For now, only supports one subgrouping across all groups,
+    so the subgrouping list should have the same length as the total number of data points across all groups.
+    '''
     def plot(self, linewidth=1.8):
         if not self.error:

{autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/_version.py RENAMED Viewed

@@ -1,2 +1,2 @@
 # AutoStatLib package version:
-__version__ = "0.2.26"
+__version__ = "0.3.0"

{autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/helpers.py RENAMED Viewed

@@ -48,7 +48,7 @@ class Helpers():
     def create_results_dict(self) -> dict:
         # evaluate successfullness
-        if self.p_value != None:
+        if self.p_value is not None:
             self.successfull = True
         else:
             self.successfull = False
@@ -68,7 +68,7 @@ class Helpers():
             'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
             'Data_Normaly_Distributed': self.parametric if self.successfull else None,
             'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
-            'Paired_Test_Applied': self.paired if self.successfull else None,
+            'Paired_Test_Applied': self.paired_test_applied if self.successfull else None,
             'Tails': self.tails,
             'p_value_exact': self.p_value.item() if self.successfull else None,
             'Stars':  self.stars_int,
@@ -79,8 +79,8 @@ class Helpers():
             'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
             'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
             'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
-            'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
-            'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
+            'Groups_SD': [np.std(self.data[i], ddof=1).item() for i in range(len(self.data))],
+            'Groups_SE': [np.std(self.data[i], ddof=1).item() / np.sqrt(len(self.data[i])) for i in range(len(self.data))],
             'subgrouping': self.subgrouping,
             # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
             'Samples': self.data,

{autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/statistical_tests.py RENAMED Viewed

@@ -6,6 +6,16 @@ from statsmodels.stats.multicomp import pairwise_tukeyhsd
 from statsmodels.stats.multitest import multipletests
 from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare
+# Known bugs:
+# One-tailed p-value: no directionality check
+# File: statistical_tests.py — t_test_independent, t_test_paired, mann_whitney, wilcoxon, etc.
+# if self.tails == 1:
+#     p_value /= 2
+# Dividing a two-tailed p-value by 2 is only valid when the test statistic falls in the hypothesized direction. If the effect is in the opposite direction, the one-tailed p should be 1 - p_two_tailed/2. Without a alternative parameter exposed to the user, results for one-tailed tests where the effect direction is "wrong" will be misleading.
+# Recommendation: Either expose an alternative='less'/'greater' parameter and pass it to scipy.stats directly (which handles it correctly), or document that one-tailed results are only valid when the observed effect is in the expected direction.
 class StatisticalTests():
     '''
@@ -76,11 +86,7 @@ class StatisticalTests():
             case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
             case 'none': stat, p_value = (None, None)
-        if test_id in self.test_ids_dependent:
-            self.paired = True
-        else:
-            self.paired = False
+        self.paired_test_applied = test_id in self.test_ids_dependent
         self.test_name = test_names_dict[test_id]
         self.test_id = test_id
         self.test_stat = stat
@@ -102,10 +108,10 @@ class StatisticalTests():
             # Tukey's multiple comparisons
             tukey_result = pairwise_tukeyhsd(data_flat, group_labels)
-            list = tukey_result.pvalues.tolist()
+            tukey_pvalues = tukey_result.pvalues.tolist()
             n = self.n_groups
             # prepare posthoc matrix
-            self.posthoc_matrix = self.list_to_matrix(list, n)
+            self.posthoc_matrix = self.list_to_matrix(tukey_pvalues, n)
         return stat, p_value
@@ -119,7 +125,7 @@ class StatisticalTests():
         df = self.matrix_to_dataframe(self.data)
         res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
-        print(res)
         stat = res.anova_table.iloc[0][0]
         p_value = res.anova_table.iloc[0][3]
@@ -186,7 +192,7 @@ class StatisticalTests():
         return stat, p_value
     def t_test_single_sample(self):
-        if self.popmean == None:
+        if self.popmean is None:
             self.popmean = 0
             self.AddWarning('no_pop_mean_set')
         stat, p_value = ttest_1samp(self.data[0], self.popmean)
@@ -201,7 +207,7 @@ class StatisticalTests():
         return stat, p_value
     def wilcoxon_single_sample(self):
-        if self.popmean == None:
+        if self.popmean is None:
             self.popmean = 0
             self.AddWarning('no_pop_mean_set')
         data = [i - self.popmean for i in self.data[0]]

{autostatlib-0.2.26 → autostatlib-0.3.0/src/AutoStatLib.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: AutoStatLib
-Version: 0.2.26
+Version: 0.3.0
 Summary: AutoStatLib - a simple statistical analysis tool
 Author: Stemonitis, SciWare LLC
 Author-email: konung-yaropolk <yaropolk1995@gmail.com>

{autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib.egg-info/SOURCES.txt RENAMED Viewed

@@ -16,4 +16,5 @@ src/AutoStatLib.egg-info/PKG-INFO
 src/AutoStatLib.egg-info/SOURCES.txt
 src/AutoStatLib.egg-info/dependency_links.txt
 src/AutoStatLib.egg-info/requires.txt
-src/AutoStatLib.egg-info/top_level.txt
+src/AutoStatLib.egg-info/top_level.txt
+tests/test_autostatlib.py

autostatlib-0.3.0/tests/test_autostatlib.py ADDED Viewed

@@ -0,0 +1,148 @@
+# tests/test_autostatlib.py
+import pytest
+import numpy as np
+import AutoStatLib
+# --- Fixtures ---
+@pytest.fixture
+def normal_2groups():
+    np.random.seed(42)
+    return [list(np.random.normal(0, 1, 20)), list(np.random.normal(1, 1, 20))]
+@pytest.fixture
+def nonnormal_2groups():
+    np.random.seed(42)
+    return [list(np.random.exponential(1, 20)), list(np.random.exponential(2, 20))]
+# --- Basic functionality ---
+def test_run_auto_returns_result(normal_2groups):
+    a = AutoStatLib.StatisticalAnalysis(normal_2groups)
+    a.RunAuto()
+    r = a.GetResult()
+    assert isinstance(r, dict)
+    assert 'p_value_exact' in r
+    assert 0.0 <= r['p_value_exact'] <= 1.0
+def test_run_auto_selects_ttest_for_normal(normal_2groups):
+    a = AutoStatLib.StatisticalAnalysis(normal_2groups)
+    a.RunAuto()
+    assert a.test_id == 't_test_independent'
+def test_run_auto_selects_mann_whitney_for_nonnormal(nonnormal_2groups):
+    a = AutoStatLib.StatisticalAnalysis(nonnormal_2groups)
+    a.RunAuto()
+    assert a.test_id == 'mann_whitney'
+def test_verbose_false_no_print(normal_2groups, capsys):
+    a = AutoStatLib.StatisticalAnalysis(normal_2groups, verbose=False)
+    a.RunAuto()
+    captured = capsys.readouterr()
+    assert captured.out == ''
+# --- Result dict completeness ---
+def test_result_dict_keys(normal_2groups):
+    a = AutoStatLib.StatisticalAnalysis(normal_2groups)
+    a.RunAuto()
+    r = a.GetResult()
+    required_keys = [
+        'p_value', 'p_value_exact', 'Significance(p<0.05)', 'Stars',
+        'Stars_Printed', 'Test_Name', 'Groups_N', 'Groups_Mean',
+        'Groups_SD', 'Groups_SE', 'Groups_Median', 'Warnings',
+    ]
+    for key in required_keys:
+        assert key in r, f"Missing key: {key}"
+def test_se_calculation_correct(normal_2groups):
+    """SE = std / sqrt(n) per group, not std / sqrt(num_groups)."""
+    a = AutoStatLib.StatisticalAnalysis(normal_2groups)
+    a.RunAuto()
+    r = a.GetResult()
+    for i, group in enumerate(normal_2groups):
+        expected_se = np.std(group, ddof=1) / np.sqrt(len(group))
+        assert abs(r['Groups_SE'][i] - expected_se) < 0.01, \
+            f"SE for group {i} is wrong: {r['Groups_SE'][i]} vs {expected_se}"
+# --- Error handling ---
+def test_raises_on_too_few_samples():
+    a = AutoStatLib.StatisticalAnalysis([[1, 2, 3], [4, 5, 6]], raise_errors=True)
+    with pytest.raises(ValueError):
+        a.RunAuto()
+def test_empty_result_on_wrong_group_count():
+    """3-group test requested with 2 groups should fail gracefully."""
+    a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]])
+    a.RunOnewayAnova()
+    assert a.GetResult() == {} or a.error
+def test_non_numeric_data_filtered():
+    a = AutoStatLib.StatisticalAnalysis([['x', 'y', 1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
+    a.RunAuto()
+    r = a.GetResult()
+    assert isinstance(r, dict)
+# --- Single-sample tests ---
+def test_single_sample_ttest():
+    data = [list(np.random.normal(5, 1, 30))]
+    a = AutoStatLib.StatisticalAnalysis(data, popmean=0)
+    a.RunTtestSingleSample()
+    r = a.GetResult()
+    assert r['Significance(p<0.05)'] is True  # mean ~5 vs popmean=0 should be significant
+def test_no_popmean_triggers_warning():
+    data = [list(np.random.normal(1, 1, 20))]
+    a = AutoStatLib.StatisticalAnalysis(data)
+    a.RunTtestSingleSample()
+    r = a.GetResult()
+    assert len(r['Warnings']) > 0
+# --- Paired tests ---
+def test_paired_ttest_equal_length_required():
+    a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10,11]], paired=True, raise_errors=True)
+    with pytest.raises(ValueError):
+        a.RunTtestPaired()
+# --- Posthoc ---
+def test_posthoc_kruskal():
+    np.random.seed(0)
+    data = [list(np.random.normal(i, 1, 20)) for i in range(3)]
+    a = AutoStatLib.StatisticalAnalysis(data, posthoc=True)
+    a.RunKruskalWallis()
+    r = a.GetResult()
+    assert len(r['Posthoc_Matrix']) == 3
+    assert len(r['Posthoc_Matrix'][0]) == 3
+# --- Stars ---
+@pytest.mark.parametrize("p,expected", [
+    (0.001, 3), (0.01, 2), (0.04, 1), (0.1, 0), (0.00001, 4)
+])
+def test_make_stars(p, expected):
+    a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]])
+    assert a.make_stars(p) == expected
+# --- Tails ---
+def test_one_tailed_p_less_than_two_tailed(normal_2groups):
+    a2 = AutoStatLib.StatisticalAnalysis(normal_2groups, tails=2)
+    a2.RunTtest()
+    p2 = a2.GetResult()['p_value_exact']
+    a1 = AutoStatLib.StatisticalAnalysis(normal_2groups, tails=1)
+    a1.RunTtest()
+    p1 = a1.GetResult()['p_value_exact']
+    assert abs(p1 - p2 / 2) < 1e-10
+# --- GetSummary ---
+def test_get_summary_contains_version(normal_2groups):
+    a = AutoStatLib.StatisticalAnalysis(normal_2groups)
+    a.RunAuto()
+    assert 'AutoStatLib' in a.GetSummary()