PyPI - AutoStatLib - Versions diffs - 0.4.0__tar.gz → 0.4.2__tar.gz - Mend

AutoStatLib 0.4.0tar.gz → 0.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{autostatlib-0.4.0/src/AutoStatLib.egg-info → autostatlib-0.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: AutoStatLib
-Version: 0.4.0
+Version: 0.4.2
 Summary: AutoStatLib - a simple statistical analysis tool
 Author: Stemonitis, SciWare LLC
 Author-email: konung-yaropolk <yaropolk1995@gmail.com>

{autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/AutoStatLib.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+from itertools import cycle, islice
 from typing import Optional, Union
 import numpy as np
@@ -69,7 +70,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
         self.raise_errors: bool = raise_errors
         self.n_groups: int = len(self.groups_list)
         self.groups_name: list[str] = (
-            [groups_name[i % len(groups_name)] for i in range(self.n_groups)]
+            list(islice(cycle(groups_name), self.n_groups))
             if groups_name and groups_name != [""]
             else [f"Group {i + 1}" for i in range(self.n_groups)]
         )
@@ -216,7 +217,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
             ), "Wrong test id choosen, ensure you called correct function"
             assert all(
                 len(group) >= 4 for group in self.data
-            ), "Each group must contain at least four values"
+            ), "Each group must be at least n=4 for the valid statistics, so test was skipped"
             assert not (
                 test in self.test_ids_dependent
                 and not all(len(lst) == len(self.data[0]) for lst in self.data)

{autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/StatPlots.py RENAMED Viewed

@@ -20,12 +20,8 @@ class Helpers:
         colors: list[str | tuple],
         alpha: float = 0.35,
     ) -> list[tuple[float, float, float, float]]:
-        rgba_colors: list[tuple[float, float, float, float]] = []
-        for col in colors:
-            rgba = list(mcolors.to_rgba(col))
-            rgba[3] = alpha
-            rgba_colors.append((rgba[0], rgba[1], rgba[2], rgba[3]))
-        return rgba_colors
+        # mcolors.to_rgba returns a 4-tuple; replace only the alpha channel.
+        return [(*mcolors.to_rgba(c)[:3], alpha) for c in colors]
     def get_colors(
         self,
@@ -166,26 +162,34 @@ class BaseStatPlot(Helpers):
             print("AutoStatLib.StatPlots Error :", error)
             return
-        #  sd sem mean and median calculation if they are not provided
-        self.mean: list[float] = [
-            np.mean(self.data_groups[i]).item() for i in range(self.n_groups)
-        ]
-        self.median: list[float] = [
-            np.median(self.data_groups[i]).item() for i in range(self.n_groups)
-        ]
-        self.sd: list[float] = [
-            np.std(self.data_groups[i], ddof=1).item() for i in range(self.n_groups)
-        ]
-        self.sem: list[float] = [
-            np.std(self.data_groups[i], ddof=1).item()
-            / np.sqrt(len(self.data_groups[i]))
-            for i in range(self.n_groups)
-        ]
-        self.n: list[int] = [len(i) for i in self.data_groups]
+        # sd sem mean and median calculation if they are not provided.
+        # Convert each group to a float array once; reuse for all four stats.
+        # This avoids calling np.std twice per group (old code recomputed it
+        # from scratch for sem after already computing it for sd).
+        _arrs       = [np.asarray(g, dtype=float) for g in self.data_groups]
+        self.n      = [len(a) for a in _arrs]
+        self.mean   = [float(a.mean())      for a in _arrs]
+        self.median = [float(np.median(a))  for a in _arrs]
+        self.sd     = [float(a.std(ddof=1)) for a in _arrs]
+        self.sem    = [sd / np.sqrt(n) for sd, n in zip(self.sd, self.n)]
         self.p_printed: str = self.make_p_value_printed(self.p)
         self.stars_printed: str = self.make_stars_printed(self.make_stars(self.p))
+        # Pre-compute posthoc matrix string representations once here so that
+        # add_significance_bars() doesn't rebuild them on every call.
+        if self.posthoc_matrix:
+            self._posthoc_printed: list[list[str]] = [
+                [self.make_p_value_printed(e) for e in row]
+                for row in self.posthoc_matrix
+            ]
+            self._posthoc_stars: list[list[str]] = [
+                [self.make_stars_printed(self.make_stars(e)) for e in row]
+                for row in self.posthoc_matrix
+            ]
+        else:
+            self._posthoc_printed = []
+            self._posthoc_stars   = []
         self.groups_name: list[str] = Groups_Name if Groups_Name is not None else [""]
         self.subgrouping: list = subgrouping if subgrouping else [0]
         self.subgrouping_arrange: list[int] = self.expand_counts(self.subgrouping)
@@ -430,10 +434,12 @@ class BaseStatPlot(Helpers):
         linewidth: float = 1.2,
         zorder: int = 2,
     ) -> None:
-        spread_pool: list[tuple] = []
-        for i, data in enumerate(self.data_groups):
-            spread = tuple(random.uniform(-0.10, 0.10) for _ in data)
-            spread_pool.append(tuple(i + s for s in spread))
+        # Generate all jitter offsets with NumPy at once instead of Python loops.
+        rng = np.random.default_rng()
+        spread_pool: list[np.ndarray] = [
+            i + rng.uniform(-0.10, 0.10, size=len(g))
+            for i, g in enumerate(self.data_groups)
+        ]
         for i, data in enumerate(self.transpose(self.data_groups)):
             ax.plot(
@@ -490,6 +496,7 @@ class BaseStatPlot(Helpers):
             marker=marker,
             linewidth=linewidth * self.figure_scale_factor * size_scale,
             zorder=zorder,
+            warn_thresh = 1, # threshold for warning about too many points; set to 0 to always warn, or 1 to never warn
         )
         if self.dependent:
@@ -690,22 +697,10 @@ class BaseStatPlot(Helpers):
         col: str = "k",
     ) -> None:
-        posthoc_matrix_printed: list[list[str]] = (
-            [
-                [self.make_p_value_printed(element) for element in row]
-                for row in self.posthoc_matrix
-            ]
-            if self.posthoc_matrix
-            else []
-        )
-        posthoc_matrix_stars: list[list[str]] = (
-            [
-                [self.make_stars_printed(self.make_stars(element)) for element in row]
-                for row in self.posthoc_matrix
-            ]
-            if self.posthoc_matrix
-            else []
-        )
+        # Use the pre-computed representations cached in __init__ rather than
+        # rebuilding them on every call to add_significance_bars().
+        posthoc_matrix_printed: list[list[str]] = self._posthoc_printed
+        posthoc_matrix_stars:   list[list[str]] = self._posthoc_stars
         def draw_bar(
             p: str,

{autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/_version.py RENAMED Viewed

@@ -1,2 +1,2 @@
 # AutoStatLib package version:
-__version__ = "0.4.0"
+__version__ = "0.4.2"

{autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/helpers.py RENAMED Viewed

@@ -11,17 +11,18 @@ import pandas as pd
 class Helpers(StatAnalysisProtocol):
     def matrix_to_dataframe(self, matrix: list[list[float]]) -> pd.DataFrame:
-        data: list[float] = []
-        cols: list[int] = []
-        rows: list[int] = []
-        for i, row in enumerate(matrix):
-            for j, value in enumerate(row):
-                data.append(value)
-                cols.append(i)
-                rows.append(j)
-        return pd.DataFrame({"Row": rows, "Col": cols, "Value": data})
+        # Convert once to a 2-D float array, then use NumPy meshgrid to build
+        # the row/col index arrays without any Python-level loop.
+        arr = np.array(matrix, dtype=float)          # (n_subjects, n_conditions)
+        n_rows, n_cols = arr.shape
+        row_idx, col_idx = np.meshgrid(
+            np.arange(n_rows), np.arange(n_cols), indexing="ij"
+        )
+        return pd.DataFrame({
+            "Row":   row_idx.ravel(),
+            "Col":   col_idx.ravel(),
+            "Value": arr.ravel(),
+        })
     def list_to_matrix(self, values: list[float], n: int) -> list[list[float]]:
         i = 0
@@ -65,6 +66,32 @@ class Helpers(StatAnalysisProtocol):
             self.make_stars_printed(self.stars_int) if self.successfull else ""
         )
+        # --- Compute per-group descriptive stats in a single pass ----------
+        # Convert each group once; reuse the array for mean, median, std, sem.
+        # This also avoids calling np.std twice (once for SD, once for SE).
+        groups_arr    = [np.asarray(g, dtype=float) for g in self.data]
+        groups_n      = [len(a)                         for a in groups_arr]
+        groups_mean   = [float(a.mean())                for a in groups_arr]
+        groups_median = [float(np.median(a))            for a in groups_arr]
+        groups_sd     = [float(a.std(ddof=1))           for a in groups_arr]
+        groups_se     = [sd / np.sqrt(n) for sd, n in zip(groups_sd, groups_n)]
+        # --- Posthoc matrix representations — one pass over the matrix -----
+        # Previously built as three separate nested list comprehensions;
+        # now all three are filled in a single traversal.
+        if self.posthoc_matrix:
+            pm_bool:    list[list] = []
+            pm_printed: list[list] = []
+            pm_stars:   list[list] = []
+            for row in self.posthoc_matrix:
+                pm_bool.append([bool(e) for e in row])
+                pm_printed.append([self.make_p_value_printed(e) for e in row])
+                pm_stars.append(
+                    [self.make_stars_printed(self.make_stars(e)) for e in row]
+                )
+        else:
+            pm_bool = pm_printed = pm_stars = []
         return {
             "p_value": (
                 self.make_p_value_printed(self.p_value.item())
@@ -90,52 +117,22 @@ class Helpers(StatAnalysisProtocol):
             "Stars": self.stars_int,
             "Warnings": self.warnings,
             "Successfull_Test": (self.successfull and not self.error),
-            "Groups_Name": self.groups_name,
-            "Groups_N": [len(self.data[i]) for i in range(len(self.data))],
-            "Groups_Median": [
-                np.median(self.data[i]).item() for i in range(len(self.data))
-            ],
-            "Groups_Mean": [
-                np.mean(self.data[i]).item() for i in range(len(self.data))
-            ],
-            "Groups_SD": [
-                np.std(self.data[i], ddof=1).item() for i in range(len(self.data))
-            ],
-            "Groups_SE": [
-                np.std(self.data[i], ddof=1).item() / np.sqrt(len(self.data[i]))
-                for i in range(len(self.data))
-            ],
+            "Groups_Name":   self.groups_name,
+            "Groups_N":      groups_n,
+            "Groups_Median": groups_median,
+            "Groups_Mean":   groups_mean,
+            "Groups_SD":     groups_sd,
+            "Groups_SE":     groups_se,
             "subgrouping": self.subgrouping,
             # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
             "Samples": self.data,
             "Posthoc_Tests_Name": (
                 self.posthoc_name if self.posthoc_name is not None else ""
             ),
-            "Posthoc_Matrix": self.posthoc_matrix if self.posthoc_matrix else [],
-            "Posthoc_Matrix_bool": (
-                [[bool(element) for element in row] for row in self.posthoc_matrix]
-                if self.posthoc_matrix
-                else []
-            ),
-            "Posthoc_Matrix_printed": (
-                [
-                    [self.make_p_value_printed(element) for element in row]
-                    for row in self.posthoc_matrix
-                ]
-                if self.posthoc_matrix
-                else []
-            ),
-            "Posthoc_Matrix_stars": (
-                [
-                    [
-                        self.make_stars_printed(self.make_stars(element))
-                        for element in row
-                    ]
-                    for row in self.posthoc_matrix
-                ]
-                if self.posthoc_matrix
-                else []
-            ),
+            "Posthoc_Matrix":         self.posthoc_matrix if self.posthoc_matrix else [],
+            "Posthoc_Matrix_bool":    pm_bool,
+            "Posthoc_Matrix_printed": pm_printed,
+            "Posthoc_Matrix_stars":   pm_stars,
         }
     def log(self, *args: object, **kwargs: object) -> None:
@@ -145,4 +142,4 @@ class Helpers(StatAnalysisProtocol):
     def AddWarning(self, warning_id: str) -> None:
         message: str = self.warning_ids_all[warning_id]
         self.log(message)
-        self.warnings.append(message)
+        self.warnings.append(message)

{autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/statistical_tests.py RENAMED Viewed

@@ -241,8 +241,8 @@ class StatisticalTests(StatAnalysisProtocol):
         if self.popmean is None:
             self.popmean = 0
             self.AddWarning("no_pop_mean_set")
-        data: list[float] = [x - self.popmean for x in self.data[0]]
-        stat, p_value = wilcoxon(data)
+        arr = np.asarray(self.data[0], dtype=float) - self.popmean
+        stat, p_value = wilcoxon(arr)
         if self.tails == 1:
             p_value /= 2
         return stat, p_value

{autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/text_formatting.py RENAMED Viewed

@@ -8,27 +8,29 @@ from typing import Optional
 class TextFormatting(StatAnalysisProtocol):
     """Text formatting mixin."""
-    def autospace(
-        self, elements_list: list[str], space: int, delimiter: str = " "
-    ) -> str:
-        output = ""
-        for i, element in enumerate(elements_list):
-            if i == len(elements_list):
-                output += element
-            else:
-                output += element + (space - len(element)) * delimiter
-        return output
+    def _fmt_row(self, elements: list[str], width: int, fill: str = " ") -> str:
+        """
+        Format a list of strings into a fixed-width columnar row.
+        Each element is left-justified to ``width`` characters using ``fill``
+        as the pad character.  The last element is appended without trailing
+        padding (matches terminal/log output intent).
+        Replaces the hand-rolled ``autospace()`` loop with Python's built-in
+        ``str.ljust`` and ``str.join``.
+        """
+        if not elements:
+            return ""
+        # All but the last element are padded to `width`; last is bare.
+        return "".join(e.ljust(width, fill) for e in elements[:-1]) + elements[-1]
     def print_groups(self, space: int = 24, max_length: int = 15) -> None:
         self.log("")
         data: list[list[float]] = self.data
-        num_groups: int = len(data)
         group_longest: int = max(len(row) for row in data)
-        header: list[str] = self.groups_name
-        line: list[str] = ["" * 7]
-        self.log(self.autospace(header, space))
-        self.log(self.autospace(line, space))
+        self.log(self._fmt_row(self.groups_name, space))
+        self.log(self._fmt_row(["" * 7], space))
         for i in range(group_longest):
             row_values: list[str] = []
@@ -51,7 +53,7 @@ class TextFormatting(StatAnalysisProtocol):
                         row_values.append("")
             if all_values_empty:
                 break
-            self.log(self.autospace(row_values, space))
+            self.log(self._fmt_row(row_values, space))
     def print_results(self) -> None:
         self.log("\n\nResults: \n")

{autostatlib-0.4.0 → autostatlib-0.4.2/src/AutoStatLib.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: AutoStatLib
-Version: 0.4.0
+Version: 0.4.2
 Summary: AutoStatLib - a simple statistical analysis tool
 Author: Stemonitis, SciWare LLC
 Author-email: konung-yaropolk <yaropolk1995@gmail.com>