AutoStatLib 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {autostatlib-0.4.0/src/AutoStatLib.egg-info → autostatlib-0.4.2}/PKG-INFO +1 -1
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/AutoStatLib.py +3 -2
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/StatPlots.py +38 -43
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/_version.py +1 -1
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/helpers.py +49 -52
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/statistical_tests.py +2 -2
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/text_formatting.py +18 -16
- {autostatlib-0.4.0 → autostatlib-0.4.2/src/AutoStatLib.egg-info}/PKG-INFO +1 -1
- {autostatlib-0.4.0 → autostatlib-0.4.2}/LICENSE +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/MANIFEST.in +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/README.md +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/pyproject.toml +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/requirements.txt +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/setup.cfg +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/__init__.py +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/__main__.py +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/_protocol.py +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/normality_tests.py +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/SOURCES.txt +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/requires.txt +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/top_level.txt +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/tests/test_autostatlib.py +0 -0
- {autostatlib-0.4.0 → autostatlib-0.4.2}/tests/test_statplots.py +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from itertools import cycle, islice
|
|
3
4
|
from typing import Optional, Union
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
@@ -69,7 +70,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
69
70
|
self.raise_errors: bool = raise_errors
|
|
70
71
|
self.n_groups: int = len(self.groups_list)
|
|
71
72
|
self.groups_name: list[str] = (
|
|
72
|
-
|
|
73
|
+
list(islice(cycle(groups_name), self.n_groups))
|
|
73
74
|
if groups_name and groups_name != [""]
|
|
74
75
|
else [f"Group {i + 1}" for i in range(self.n_groups)]
|
|
75
76
|
)
|
|
@@ -216,7 +217,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
216
217
|
), "Wrong test id choosen, ensure you called correct function"
|
|
217
218
|
assert all(
|
|
218
219
|
len(group) >= 4 for group in self.data
|
|
219
|
-
), "Each group must
|
|
220
|
+
), "Each group must be at least n=4 for the valid statistics, so test was skipped"
|
|
220
221
|
assert not (
|
|
221
222
|
test in self.test_ids_dependent
|
|
222
223
|
and not all(len(lst) == len(self.data[0]) for lst in self.data)
|
|
@@ -20,12 +20,8 @@ class Helpers:
|
|
|
20
20
|
colors: list[str | tuple],
|
|
21
21
|
alpha: float = 0.35,
|
|
22
22
|
) -> list[tuple[float, float, float, float]]:
|
|
23
|
-
|
|
24
|
-
for
|
|
25
|
-
rgba = list(mcolors.to_rgba(col))
|
|
26
|
-
rgba[3] = alpha
|
|
27
|
-
rgba_colors.append((rgba[0], rgba[1], rgba[2], rgba[3]))
|
|
28
|
-
return rgba_colors
|
|
23
|
+
# mcolors.to_rgba returns a 4-tuple; replace only the alpha channel.
|
|
24
|
+
return [(*mcolors.to_rgba(c)[:3], alpha) for c in colors]
|
|
29
25
|
|
|
30
26
|
def get_colors(
|
|
31
27
|
self,
|
|
@@ -166,26 +162,34 @@ class BaseStatPlot(Helpers):
|
|
|
166
162
|
print("AutoStatLib.StatPlots Error :", error)
|
|
167
163
|
return
|
|
168
164
|
|
|
169
|
-
#
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
]
|
|
176
|
-
self.
|
|
177
|
-
|
|
178
|
-
]
|
|
179
|
-
self.sem: list[float] = [
|
|
180
|
-
np.std(self.data_groups[i], ddof=1).item()
|
|
181
|
-
/ np.sqrt(len(self.data_groups[i]))
|
|
182
|
-
for i in range(self.n_groups)
|
|
183
|
-
]
|
|
184
|
-
|
|
185
|
-
self.n: list[int] = [len(i) for i in self.data_groups]
|
|
165
|
+
# sd sem mean and median calculation if they are not provided.
|
|
166
|
+
# Convert each group to a float array once; reuse for all four stats.
|
|
167
|
+
# This avoids calling np.std twice per group (old code recomputed it
|
|
168
|
+
# from scratch for sem after already computing it for sd).
|
|
169
|
+
_arrs = [np.asarray(g, dtype=float) for g in self.data_groups]
|
|
170
|
+
self.n = [len(a) for a in _arrs]
|
|
171
|
+
self.mean = [float(a.mean()) for a in _arrs]
|
|
172
|
+
self.median = [float(np.median(a)) for a in _arrs]
|
|
173
|
+
self.sd = [float(a.std(ddof=1)) for a in _arrs]
|
|
174
|
+
self.sem = [sd / np.sqrt(n) for sd, n in zip(self.sd, self.n)]
|
|
186
175
|
self.p_printed: str = self.make_p_value_printed(self.p)
|
|
187
176
|
self.stars_printed: str = self.make_stars_printed(self.make_stars(self.p))
|
|
188
177
|
|
|
178
|
+
# Pre-compute posthoc matrix string representations once here so that
|
|
179
|
+
# add_significance_bars() doesn't rebuild them on every call.
|
|
180
|
+
if self.posthoc_matrix:
|
|
181
|
+
self._posthoc_printed: list[list[str]] = [
|
|
182
|
+
[self.make_p_value_printed(e) for e in row]
|
|
183
|
+
for row in self.posthoc_matrix
|
|
184
|
+
]
|
|
185
|
+
self._posthoc_stars: list[list[str]] = [
|
|
186
|
+
[self.make_stars_printed(self.make_stars(e)) for e in row]
|
|
187
|
+
for row in self.posthoc_matrix
|
|
188
|
+
]
|
|
189
|
+
else:
|
|
190
|
+
self._posthoc_printed = []
|
|
191
|
+
self._posthoc_stars = []
|
|
192
|
+
|
|
189
193
|
self.groups_name: list[str] = Groups_Name if Groups_Name is not None else [""]
|
|
190
194
|
self.subgrouping: list = subgrouping if subgrouping else [0]
|
|
191
195
|
self.subgrouping_arrange: list[int] = self.expand_counts(self.subgrouping)
|
|
@@ -430,10 +434,12 @@ class BaseStatPlot(Helpers):
|
|
|
430
434
|
linewidth: float = 1.2,
|
|
431
435
|
zorder: int = 2,
|
|
432
436
|
) -> None:
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
+
# Generate all jitter offsets with NumPy at once instead of Python loops.
|
|
438
|
+
rng = np.random.default_rng()
|
|
439
|
+
spread_pool: list[np.ndarray] = [
|
|
440
|
+
i + rng.uniform(-0.10, 0.10, size=len(g))
|
|
441
|
+
for i, g in enumerate(self.data_groups)
|
|
442
|
+
]
|
|
437
443
|
|
|
438
444
|
for i, data in enumerate(self.transpose(self.data_groups)):
|
|
439
445
|
ax.plot(
|
|
@@ -490,6 +496,7 @@ class BaseStatPlot(Helpers):
|
|
|
490
496
|
marker=marker,
|
|
491
497
|
linewidth=linewidth * self.figure_scale_factor * size_scale,
|
|
492
498
|
zorder=zorder,
|
|
499
|
+
warn_thresh = 1, # threshold for warning about too many points; set to 0 to always warn, or 1 to never warn
|
|
493
500
|
)
|
|
494
501
|
|
|
495
502
|
if self.dependent:
|
|
@@ -690,22 +697,10 @@ class BaseStatPlot(Helpers):
|
|
|
690
697
|
col: str = "k",
|
|
691
698
|
) -> None:
|
|
692
699
|
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
]
|
|
698
|
-
if self.posthoc_matrix
|
|
699
|
-
else []
|
|
700
|
-
)
|
|
701
|
-
posthoc_matrix_stars: list[list[str]] = (
|
|
702
|
-
[
|
|
703
|
-
[self.make_stars_printed(self.make_stars(element)) for element in row]
|
|
704
|
-
for row in self.posthoc_matrix
|
|
705
|
-
]
|
|
706
|
-
if self.posthoc_matrix
|
|
707
|
-
else []
|
|
708
|
-
)
|
|
700
|
+
# Use the pre-computed representations cached in __init__ rather than
|
|
701
|
+
# rebuilding them on every call to add_significance_bars().
|
|
702
|
+
posthoc_matrix_printed: list[list[str]] = self._posthoc_printed
|
|
703
|
+
posthoc_matrix_stars: list[list[str]] = self._posthoc_stars
|
|
709
704
|
|
|
710
705
|
def draw_bar(
|
|
711
706
|
p: str,
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.4.
|
|
2
|
+
__version__ = "0.4.2"
|
|
@@ -11,17 +11,18 @@ import pandas as pd
|
|
|
11
11
|
class Helpers(StatAnalysisProtocol):
|
|
12
12
|
|
|
13
13
|
def matrix_to_dataframe(self, matrix: list[list[float]]) -> pd.DataFrame:
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
14
|
+
# Convert once to a 2-D float array, then use NumPy meshgrid to build
|
|
15
|
+
# the row/col index arrays without any Python-level loop.
|
|
16
|
+
arr = np.array(matrix, dtype=float) # (n_subjects, n_conditions)
|
|
17
|
+
n_rows, n_cols = arr.shape
|
|
18
|
+
row_idx, col_idx = np.meshgrid(
|
|
19
|
+
np.arange(n_rows), np.arange(n_cols), indexing="ij"
|
|
20
|
+
)
|
|
21
|
+
return pd.DataFrame({
|
|
22
|
+
"Row": row_idx.ravel(),
|
|
23
|
+
"Col": col_idx.ravel(),
|
|
24
|
+
"Value": arr.ravel(),
|
|
25
|
+
})
|
|
25
26
|
|
|
26
27
|
def list_to_matrix(self, values: list[float], n: int) -> list[list[float]]:
|
|
27
28
|
i = 0
|
|
@@ -65,6 +66,32 @@ class Helpers(StatAnalysisProtocol):
|
|
|
65
66
|
self.make_stars_printed(self.stars_int) if self.successfull else ""
|
|
66
67
|
)
|
|
67
68
|
|
|
69
|
+
# --- Compute per-group descriptive stats in a single pass ----------
|
|
70
|
+
# Convert each group once; reuse the array for mean, median, std, sem.
|
|
71
|
+
# This also avoids calling np.std twice (once for SD, once for SE).
|
|
72
|
+
groups_arr = [np.asarray(g, dtype=float) for g in self.data]
|
|
73
|
+
groups_n = [len(a) for a in groups_arr]
|
|
74
|
+
groups_mean = [float(a.mean()) for a in groups_arr]
|
|
75
|
+
groups_median = [float(np.median(a)) for a in groups_arr]
|
|
76
|
+
groups_sd = [float(a.std(ddof=1)) for a in groups_arr]
|
|
77
|
+
groups_se = [sd / np.sqrt(n) for sd, n in zip(groups_sd, groups_n)]
|
|
78
|
+
|
|
79
|
+
# --- Posthoc matrix representations — one pass over the matrix -----
|
|
80
|
+
# Previously built as three separate nested list comprehensions;
|
|
81
|
+
# now all three are filled in a single traversal.
|
|
82
|
+
if self.posthoc_matrix:
|
|
83
|
+
pm_bool: list[list] = []
|
|
84
|
+
pm_printed: list[list] = []
|
|
85
|
+
pm_stars: list[list] = []
|
|
86
|
+
for row in self.posthoc_matrix:
|
|
87
|
+
pm_bool.append([bool(e) for e in row])
|
|
88
|
+
pm_printed.append([self.make_p_value_printed(e) for e in row])
|
|
89
|
+
pm_stars.append(
|
|
90
|
+
[self.make_stars_printed(self.make_stars(e)) for e in row]
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
pm_bool = pm_printed = pm_stars = []
|
|
94
|
+
|
|
68
95
|
return {
|
|
69
96
|
"p_value": (
|
|
70
97
|
self.make_p_value_printed(self.p_value.item())
|
|
@@ -90,52 +117,22 @@ class Helpers(StatAnalysisProtocol):
|
|
|
90
117
|
"Stars": self.stars_int,
|
|
91
118
|
"Warnings": self.warnings,
|
|
92
119
|
"Successfull_Test": (self.successfull and not self.error),
|
|
93
|
-
"Groups_Name":
|
|
94
|
-
"Groups_N":
|
|
95
|
-
"Groups_Median":
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
"
|
|
99
|
-
np.mean(self.data[i]).item() for i in range(len(self.data))
|
|
100
|
-
],
|
|
101
|
-
"Groups_SD": [
|
|
102
|
-
np.std(self.data[i], ddof=1).item() for i in range(len(self.data))
|
|
103
|
-
],
|
|
104
|
-
"Groups_SE": [
|
|
105
|
-
np.std(self.data[i], ddof=1).item() / np.sqrt(len(self.data[i]))
|
|
106
|
-
for i in range(len(self.data))
|
|
107
|
-
],
|
|
120
|
+
"Groups_Name": self.groups_name,
|
|
121
|
+
"Groups_N": groups_n,
|
|
122
|
+
"Groups_Median": groups_median,
|
|
123
|
+
"Groups_Mean": groups_mean,
|
|
124
|
+
"Groups_SD": groups_sd,
|
|
125
|
+
"Groups_SE": groups_se,
|
|
108
126
|
"subgrouping": self.subgrouping,
|
|
109
127
|
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
110
128
|
"Samples": self.data,
|
|
111
129
|
"Posthoc_Tests_Name": (
|
|
112
130
|
self.posthoc_name if self.posthoc_name is not None else ""
|
|
113
131
|
),
|
|
114
|
-
"Posthoc_Matrix":
|
|
115
|
-
"Posthoc_Matrix_bool":
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
else []
|
|
119
|
-
),
|
|
120
|
-
"Posthoc_Matrix_printed": (
|
|
121
|
-
[
|
|
122
|
-
[self.make_p_value_printed(element) for element in row]
|
|
123
|
-
for row in self.posthoc_matrix
|
|
124
|
-
]
|
|
125
|
-
if self.posthoc_matrix
|
|
126
|
-
else []
|
|
127
|
-
),
|
|
128
|
-
"Posthoc_Matrix_stars": (
|
|
129
|
-
[
|
|
130
|
-
[
|
|
131
|
-
self.make_stars_printed(self.make_stars(element))
|
|
132
|
-
for element in row
|
|
133
|
-
]
|
|
134
|
-
for row in self.posthoc_matrix
|
|
135
|
-
]
|
|
136
|
-
if self.posthoc_matrix
|
|
137
|
-
else []
|
|
138
|
-
),
|
|
132
|
+
"Posthoc_Matrix": self.posthoc_matrix if self.posthoc_matrix else [],
|
|
133
|
+
"Posthoc_Matrix_bool": pm_bool,
|
|
134
|
+
"Posthoc_Matrix_printed": pm_printed,
|
|
135
|
+
"Posthoc_Matrix_stars": pm_stars,
|
|
139
136
|
}
|
|
140
137
|
|
|
141
138
|
def log(self, *args: object, **kwargs: object) -> None:
|
|
@@ -145,4 +142,4 @@ class Helpers(StatAnalysisProtocol):
|
|
|
145
142
|
def AddWarning(self, warning_id: str) -> None:
|
|
146
143
|
message: str = self.warning_ids_all[warning_id]
|
|
147
144
|
self.log(message)
|
|
148
|
-
self.warnings.append(message)
|
|
145
|
+
self.warnings.append(message)
|
|
@@ -241,8 +241,8 @@ class StatisticalTests(StatAnalysisProtocol):
|
|
|
241
241
|
if self.popmean is None:
|
|
242
242
|
self.popmean = 0
|
|
243
243
|
self.AddWarning("no_pop_mean_set")
|
|
244
|
-
data
|
|
245
|
-
stat, p_value = wilcoxon(
|
|
244
|
+
arr = np.asarray(self.data[0], dtype=float) - self.popmean
|
|
245
|
+
stat, p_value = wilcoxon(arr)
|
|
246
246
|
if self.tails == 1:
|
|
247
247
|
p_value /= 2
|
|
248
248
|
return stat, p_value
|
|
@@ -8,27 +8,29 @@ from typing import Optional
|
|
|
8
8
|
class TextFormatting(StatAnalysisProtocol):
|
|
9
9
|
"""Text formatting mixin."""
|
|
10
10
|
|
|
11
|
-
def
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
11
|
+
def _fmt_row(self, elements: list[str], width: int, fill: str = " ") -> str:
|
|
12
|
+
"""
|
|
13
|
+
Format a list of strings into a fixed-width columnar row.
|
|
14
|
+
|
|
15
|
+
Each element is left-justified to ``width`` characters using ``fill``
|
|
16
|
+
as the pad character. The last element is appended without trailing
|
|
17
|
+
padding (matches terminal/log output intent).
|
|
18
|
+
|
|
19
|
+
Replaces the hand-rolled ``autospace()`` loop with Python's built-in
|
|
20
|
+
``str.ljust`` and ``str.join``.
|
|
21
|
+
"""
|
|
22
|
+
if not elements:
|
|
23
|
+
return ""
|
|
24
|
+
# All but the last element are padded to `width`; last is bare.
|
|
25
|
+
return "".join(e.ljust(width, fill) for e in elements[:-1]) + elements[-1]
|
|
21
26
|
|
|
22
27
|
def print_groups(self, space: int = 24, max_length: int = 15) -> None:
|
|
23
28
|
self.log("")
|
|
24
29
|
data: list[list[float]] = self.data
|
|
25
|
-
num_groups: int = len(data)
|
|
26
30
|
group_longest: int = max(len(row) for row in data)
|
|
27
31
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
self.log(self.autospace(header, space))
|
|
31
|
-
self.log(self.autospace(line, space))
|
|
32
|
+
self.log(self._fmt_row(self.groups_name, space))
|
|
33
|
+
self.log(self._fmt_row(["" * 7], space))
|
|
32
34
|
|
|
33
35
|
for i in range(group_longest):
|
|
34
36
|
row_values: list[str] = []
|
|
@@ -51,7 +53,7 @@ class TextFormatting(StatAnalysisProtocol):
|
|
|
51
53
|
row_values.append("")
|
|
52
54
|
if all_values_empty:
|
|
53
55
|
break
|
|
54
|
-
self.log(self.
|
|
56
|
+
self.log(self._fmt_row(row_values, space))
|
|
55
57
|
|
|
56
58
|
def print_results(self) -> None:
|
|
57
59
|
self.log("\n\nResults: \n")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|