AutoStatLib 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {autostatlib-0.4.1/src/AutoStatLib.egg-info → autostatlib-0.4.2}/PKG-INFO +1 -1
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/AutoStatLib.py +2 -1
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/StatPlots.py +37 -43
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/_version.py +1 -1
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/helpers.py +49 -52
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/statistical_tests.py +2 -2
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/text_formatting.py +18 -16
- {autostatlib-0.4.1 → autostatlib-0.4.2/src/AutoStatLib.egg-info}/PKG-INFO +1 -1
- {autostatlib-0.4.1 → autostatlib-0.4.2}/LICENSE +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/MANIFEST.in +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/README.md +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/pyproject.toml +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/requirements.txt +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/setup.cfg +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/__init__.py +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/__main__.py +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/_protocol.py +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib/normality_tests.py +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/SOURCES.txt +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/requires.txt +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/top_level.txt +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/tests/test_autostatlib.py +0 -0
- {autostatlib-0.4.1 → autostatlib-0.4.2}/tests/test_statplots.py +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from itertools import cycle, islice
|
|
3
4
|
from typing import Optional, Union
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
@@ -69,7 +70,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
69
70
|
self.raise_errors: bool = raise_errors
|
|
70
71
|
self.n_groups: int = len(self.groups_list)
|
|
71
72
|
self.groups_name: list[str] = (
|
|
72
|
-
|
|
73
|
+
list(islice(cycle(groups_name), self.n_groups))
|
|
73
74
|
if groups_name and groups_name != [""]
|
|
74
75
|
else [f"Group {i + 1}" for i in range(self.n_groups)]
|
|
75
76
|
)
|
|
@@ -20,12 +20,8 @@ class Helpers:
|
|
|
20
20
|
colors: list[str | tuple],
|
|
21
21
|
alpha: float = 0.35,
|
|
22
22
|
) -> list[tuple[float, float, float, float]]:
|
|
23
|
-
|
|
24
|
-
for
|
|
25
|
-
rgba = list(mcolors.to_rgba(col))
|
|
26
|
-
rgba[3] = alpha
|
|
27
|
-
rgba_colors.append((rgba[0], rgba[1], rgba[2], rgba[3]))
|
|
28
|
-
return rgba_colors
|
|
23
|
+
# mcolors.to_rgba returns a 4-tuple; replace only the alpha channel.
|
|
24
|
+
return [(*mcolors.to_rgba(c)[:3], alpha) for c in colors]
|
|
29
25
|
|
|
30
26
|
def get_colors(
|
|
31
27
|
self,
|
|
@@ -166,26 +162,34 @@ class BaseStatPlot(Helpers):
|
|
|
166
162
|
print("AutoStatLib.StatPlots Error :", error)
|
|
167
163
|
return
|
|
168
164
|
|
|
169
|
-
#
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
]
|
|
176
|
-
self.
|
|
177
|
-
|
|
178
|
-
]
|
|
179
|
-
self.sem: list[float] = [
|
|
180
|
-
np.std(self.data_groups[i], ddof=1).item()
|
|
181
|
-
/ np.sqrt(len(self.data_groups[i]))
|
|
182
|
-
for i in range(self.n_groups)
|
|
183
|
-
]
|
|
184
|
-
|
|
185
|
-
self.n: list[int] = [len(i) for i in self.data_groups]
|
|
165
|
+
# sd sem mean and median calculation if they are not provided.
|
|
166
|
+
# Convert each group to a float array once; reuse for all four stats.
|
|
167
|
+
# This avoids calling np.std twice per group (old code recomputed it
|
|
168
|
+
# from scratch for sem after already computing it for sd).
|
|
169
|
+
_arrs = [np.asarray(g, dtype=float) for g in self.data_groups]
|
|
170
|
+
self.n = [len(a) for a in _arrs]
|
|
171
|
+
self.mean = [float(a.mean()) for a in _arrs]
|
|
172
|
+
self.median = [float(np.median(a)) for a in _arrs]
|
|
173
|
+
self.sd = [float(a.std(ddof=1)) for a in _arrs]
|
|
174
|
+
self.sem = [sd / np.sqrt(n) for sd, n in zip(self.sd, self.n)]
|
|
186
175
|
self.p_printed: str = self.make_p_value_printed(self.p)
|
|
187
176
|
self.stars_printed: str = self.make_stars_printed(self.make_stars(self.p))
|
|
188
177
|
|
|
178
|
+
# Pre-compute posthoc matrix string representations once here so that
|
|
179
|
+
# add_significance_bars() doesn't rebuild them on every call.
|
|
180
|
+
if self.posthoc_matrix:
|
|
181
|
+
self._posthoc_printed: list[list[str]] = [
|
|
182
|
+
[self.make_p_value_printed(e) for e in row]
|
|
183
|
+
for row in self.posthoc_matrix
|
|
184
|
+
]
|
|
185
|
+
self._posthoc_stars: list[list[str]] = [
|
|
186
|
+
[self.make_stars_printed(self.make_stars(e)) for e in row]
|
|
187
|
+
for row in self.posthoc_matrix
|
|
188
|
+
]
|
|
189
|
+
else:
|
|
190
|
+
self._posthoc_printed = []
|
|
191
|
+
self._posthoc_stars = []
|
|
192
|
+
|
|
189
193
|
self.groups_name: list[str] = Groups_Name if Groups_Name is not None else [""]
|
|
190
194
|
self.subgrouping: list = subgrouping if subgrouping else [0]
|
|
191
195
|
self.subgrouping_arrange: list[int] = self.expand_counts(self.subgrouping)
|
|
@@ -430,10 +434,12 @@ class BaseStatPlot(Helpers):
|
|
|
430
434
|
linewidth: float = 1.2,
|
|
431
435
|
zorder: int = 2,
|
|
432
436
|
) -> None:
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
+
# Generate all jitter offsets with NumPy at once instead of Python loops.
|
|
438
|
+
rng = np.random.default_rng()
|
|
439
|
+
spread_pool: list[np.ndarray] = [
|
|
440
|
+
i + rng.uniform(-0.10, 0.10, size=len(g))
|
|
441
|
+
for i, g in enumerate(self.data_groups)
|
|
442
|
+
]
|
|
437
443
|
|
|
438
444
|
for i, data in enumerate(self.transpose(self.data_groups)):
|
|
439
445
|
ax.plot(
|
|
@@ -691,22 +697,10 @@ class BaseStatPlot(Helpers):
|
|
|
691
697
|
col: str = "k",
|
|
692
698
|
) -> None:
|
|
693
699
|
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
]
|
|
699
|
-
if self.posthoc_matrix
|
|
700
|
-
else []
|
|
701
|
-
)
|
|
702
|
-
posthoc_matrix_stars: list[list[str]] = (
|
|
703
|
-
[
|
|
704
|
-
[self.make_stars_printed(self.make_stars(element)) for element in row]
|
|
705
|
-
for row in self.posthoc_matrix
|
|
706
|
-
]
|
|
707
|
-
if self.posthoc_matrix
|
|
708
|
-
else []
|
|
709
|
-
)
|
|
700
|
+
# Use the pre-computed representations cached in __init__ rather than
|
|
701
|
+
# rebuilding them on every call to add_significance_bars().
|
|
702
|
+
posthoc_matrix_printed: list[list[str]] = self._posthoc_printed
|
|
703
|
+
posthoc_matrix_stars: list[list[str]] = self._posthoc_stars
|
|
710
704
|
|
|
711
705
|
def draw_bar(
|
|
712
706
|
p: str,
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.4.
|
|
2
|
+
__version__ = "0.4.2"
|
|
@@ -11,17 +11,18 @@ import pandas as pd
|
|
|
11
11
|
class Helpers(StatAnalysisProtocol):
|
|
12
12
|
|
|
13
13
|
def matrix_to_dataframe(self, matrix: list[list[float]]) -> pd.DataFrame:
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
14
|
+
# Convert once to a 2-D float array, then use NumPy meshgrid to build
|
|
15
|
+
# the row/col index arrays without any Python-level loop.
|
|
16
|
+
arr = np.array(matrix, dtype=float) # (n_subjects, n_conditions)
|
|
17
|
+
n_rows, n_cols = arr.shape
|
|
18
|
+
row_idx, col_idx = np.meshgrid(
|
|
19
|
+
np.arange(n_rows), np.arange(n_cols), indexing="ij"
|
|
20
|
+
)
|
|
21
|
+
return pd.DataFrame({
|
|
22
|
+
"Row": row_idx.ravel(),
|
|
23
|
+
"Col": col_idx.ravel(),
|
|
24
|
+
"Value": arr.ravel(),
|
|
25
|
+
})
|
|
25
26
|
|
|
26
27
|
def list_to_matrix(self, values: list[float], n: int) -> list[list[float]]:
|
|
27
28
|
i = 0
|
|
@@ -65,6 +66,32 @@ class Helpers(StatAnalysisProtocol):
|
|
|
65
66
|
self.make_stars_printed(self.stars_int) if self.successfull else ""
|
|
66
67
|
)
|
|
67
68
|
|
|
69
|
+
# --- Compute per-group descriptive stats in a single pass ----------
|
|
70
|
+
# Convert each group once; reuse the array for mean, median, std, sem.
|
|
71
|
+
# This also avoids calling np.std twice (once for SD, once for SE).
|
|
72
|
+
groups_arr = [np.asarray(g, dtype=float) for g in self.data]
|
|
73
|
+
groups_n = [len(a) for a in groups_arr]
|
|
74
|
+
groups_mean = [float(a.mean()) for a in groups_arr]
|
|
75
|
+
groups_median = [float(np.median(a)) for a in groups_arr]
|
|
76
|
+
groups_sd = [float(a.std(ddof=1)) for a in groups_arr]
|
|
77
|
+
groups_se = [sd / np.sqrt(n) for sd, n in zip(groups_sd, groups_n)]
|
|
78
|
+
|
|
79
|
+
# --- Posthoc matrix representations — one pass over the matrix -----
|
|
80
|
+
# Previously built as three separate nested list comprehensions;
|
|
81
|
+
# now all three are filled in a single traversal.
|
|
82
|
+
if self.posthoc_matrix:
|
|
83
|
+
pm_bool: list[list] = []
|
|
84
|
+
pm_printed: list[list] = []
|
|
85
|
+
pm_stars: list[list] = []
|
|
86
|
+
for row in self.posthoc_matrix:
|
|
87
|
+
pm_bool.append([bool(e) for e in row])
|
|
88
|
+
pm_printed.append([self.make_p_value_printed(e) for e in row])
|
|
89
|
+
pm_stars.append(
|
|
90
|
+
[self.make_stars_printed(self.make_stars(e)) for e in row]
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
pm_bool = pm_printed = pm_stars = []
|
|
94
|
+
|
|
68
95
|
return {
|
|
69
96
|
"p_value": (
|
|
70
97
|
self.make_p_value_printed(self.p_value.item())
|
|
@@ -90,52 +117,22 @@ class Helpers(StatAnalysisProtocol):
|
|
|
90
117
|
"Stars": self.stars_int,
|
|
91
118
|
"Warnings": self.warnings,
|
|
92
119
|
"Successfull_Test": (self.successfull and not self.error),
|
|
93
|
-
"Groups_Name":
|
|
94
|
-
"Groups_N":
|
|
95
|
-
"Groups_Median":
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
"
|
|
99
|
-
np.mean(self.data[i]).item() for i in range(len(self.data))
|
|
100
|
-
],
|
|
101
|
-
"Groups_SD": [
|
|
102
|
-
np.std(self.data[i], ddof=1).item() for i in range(len(self.data))
|
|
103
|
-
],
|
|
104
|
-
"Groups_SE": [
|
|
105
|
-
np.std(self.data[i], ddof=1).item() / np.sqrt(len(self.data[i]))
|
|
106
|
-
for i in range(len(self.data))
|
|
107
|
-
],
|
|
120
|
+
"Groups_Name": self.groups_name,
|
|
121
|
+
"Groups_N": groups_n,
|
|
122
|
+
"Groups_Median": groups_median,
|
|
123
|
+
"Groups_Mean": groups_mean,
|
|
124
|
+
"Groups_SD": groups_sd,
|
|
125
|
+
"Groups_SE": groups_se,
|
|
108
126
|
"subgrouping": self.subgrouping,
|
|
109
127
|
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
110
128
|
"Samples": self.data,
|
|
111
129
|
"Posthoc_Tests_Name": (
|
|
112
130
|
self.posthoc_name if self.posthoc_name is not None else ""
|
|
113
131
|
),
|
|
114
|
-
"Posthoc_Matrix":
|
|
115
|
-
"Posthoc_Matrix_bool":
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
else []
|
|
119
|
-
),
|
|
120
|
-
"Posthoc_Matrix_printed": (
|
|
121
|
-
[
|
|
122
|
-
[self.make_p_value_printed(element) for element in row]
|
|
123
|
-
for row in self.posthoc_matrix
|
|
124
|
-
]
|
|
125
|
-
if self.posthoc_matrix
|
|
126
|
-
else []
|
|
127
|
-
),
|
|
128
|
-
"Posthoc_Matrix_stars": (
|
|
129
|
-
[
|
|
130
|
-
[
|
|
131
|
-
self.make_stars_printed(self.make_stars(element))
|
|
132
|
-
for element in row
|
|
133
|
-
]
|
|
134
|
-
for row in self.posthoc_matrix
|
|
135
|
-
]
|
|
136
|
-
if self.posthoc_matrix
|
|
137
|
-
else []
|
|
138
|
-
),
|
|
132
|
+
"Posthoc_Matrix": self.posthoc_matrix if self.posthoc_matrix else [],
|
|
133
|
+
"Posthoc_Matrix_bool": pm_bool,
|
|
134
|
+
"Posthoc_Matrix_printed": pm_printed,
|
|
135
|
+
"Posthoc_Matrix_stars": pm_stars,
|
|
139
136
|
}
|
|
140
137
|
|
|
141
138
|
def log(self, *args: object, **kwargs: object) -> None:
|
|
@@ -145,4 +142,4 @@ class Helpers(StatAnalysisProtocol):
|
|
|
145
142
|
def AddWarning(self, warning_id: str) -> None:
|
|
146
143
|
message: str = self.warning_ids_all[warning_id]
|
|
147
144
|
self.log(message)
|
|
148
|
-
self.warnings.append(message)
|
|
145
|
+
self.warnings.append(message)
|
|
@@ -241,8 +241,8 @@ class StatisticalTests(StatAnalysisProtocol):
|
|
|
241
241
|
if self.popmean is None:
|
|
242
242
|
self.popmean = 0
|
|
243
243
|
self.AddWarning("no_pop_mean_set")
|
|
244
|
-
data
|
|
245
|
-
stat, p_value = wilcoxon(
|
|
244
|
+
arr = np.asarray(self.data[0], dtype=float) - self.popmean
|
|
245
|
+
stat, p_value = wilcoxon(arr)
|
|
246
246
|
if self.tails == 1:
|
|
247
247
|
p_value /= 2
|
|
248
248
|
return stat, p_value
|
|
@@ -8,27 +8,29 @@ from typing import Optional
|
|
|
8
8
|
class TextFormatting(StatAnalysisProtocol):
|
|
9
9
|
"""Text formatting mixin."""
|
|
10
10
|
|
|
11
|
-
def
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
11
|
+
def _fmt_row(self, elements: list[str], width: int, fill: str = " ") -> str:
|
|
12
|
+
"""
|
|
13
|
+
Format a list of strings into a fixed-width columnar row.
|
|
14
|
+
|
|
15
|
+
Each element is left-justified to ``width`` characters using ``fill``
|
|
16
|
+
as the pad character. The last element is appended without trailing
|
|
17
|
+
padding (matches terminal/log output intent).
|
|
18
|
+
|
|
19
|
+
Replaces the hand-rolled ``autospace()`` loop with Python's built-in
|
|
20
|
+
``str.ljust`` and ``str.join``.
|
|
21
|
+
"""
|
|
22
|
+
if not elements:
|
|
23
|
+
return ""
|
|
24
|
+
# All but the last element are padded to `width`; last is bare.
|
|
25
|
+
return "".join(e.ljust(width, fill) for e in elements[:-1]) + elements[-1]
|
|
21
26
|
|
|
22
27
|
def print_groups(self, space: int = 24, max_length: int = 15) -> None:
|
|
23
28
|
self.log("")
|
|
24
29
|
data: list[list[float]] = self.data
|
|
25
|
-
num_groups: int = len(data)
|
|
26
30
|
group_longest: int = max(len(row) for row in data)
|
|
27
31
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
self.log(self.autospace(header, space))
|
|
31
|
-
self.log(self.autospace(line, space))
|
|
32
|
+
self.log(self._fmt_row(self.groups_name, space))
|
|
33
|
+
self.log(self._fmt_row(["" * 7], space))
|
|
32
34
|
|
|
33
35
|
for i in range(group_longest):
|
|
34
36
|
row_values: list[str] = []
|
|
@@ -51,7 +53,7 @@ class TextFormatting(StatAnalysisProtocol):
|
|
|
51
53
|
row_values.append("")
|
|
52
54
|
if all_values_empty:
|
|
53
55
|
break
|
|
54
|
-
self.log(self.
|
|
56
|
+
self.log(self._fmt_row(row_values, space))
|
|
55
57
|
|
|
56
58
|
def print_results(self) -> None:
|
|
57
59
|
self.log("\n\nResults: \n")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|