AutoStatLib 0.4.0__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {autostatlib-0.4.0/src/AutoStatLib.egg-info → autostatlib-0.4.2}/PKG-INFO +1 -1
  2. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/AutoStatLib.py +3 -2
  3. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/StatPlots.py +38 -43
  4. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/_version.py +1 -1
  5. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/helpers.py +49 -52
  6. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/statistical_tests.py +2 -2
  7. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/text_formatting.py +18 -16
  8. {autostatlib-0.4.0 → autostatlib-0.4.2/src/AutoStatLib.egg-info}/PKG-INFO +1 -1
  9. {autostatlib-0.4.0 → autostatlib-0.4.2}/LICENSE +0 -0
  10. {autostatlib-0.4.0 → autostatlib-0.4.2}/MANIFEST.in +0 -0
  11. {autostatlib-0.4.0 → autostatlib-0.4.2}/README.md +0 -0
  12. {autostatlib-0.4.0 → autostatlib-0.4.2}/pyproject.toml +0 -0
  13. {autostatlib-0.4.0 → autostatlib-0.4.2}/requirements.txt +0 -0
  14. {autostatlib-0.4.0 → autostatlib-0.4.2}/setup.cfg +0 -0
  15. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/__init__.py +0 -0
  16. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/__main__.py +0 -0
  17. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/_protocol.py +0 -0
  18. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib/normality_tests.py +0 -0
  19. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/SOURCES.txt +0 -0
  20. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
  21. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/requires.txt +0 -0
  22. {autostatlib-0.4.0 → autostatlib-0.4.2}/src/AutoStatLib.egg-info/top_level.txt +0 -0
  23. {autostatlib-0.4.0 → autostatlib-0.4.2}/tests/test_autostatlib.py +0 -0
  24. {autostatlib-0.4.0 → autostatlib-0.4.2}/tests/test_statplots.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: AutoStatLib
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from itertools import cycle, islice
3
4
  from typing import Optional, Union
4
5
 
5
6
  import numpy as np
@@ -69,7 +70,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
69
70
  self.raise_errors: bool = raise_errors
70
71
  self.n_groups: int = len(self.groups_list)
71
72
  self.groups_name: list[str] = (
72
- [groups_name[i % len(groups_name)] for i in range(self.n_groups)]
73
+ list(islice(cycle(groups_name), self.n_groups))
73
74
  if groups_name and groups_name != [""]
74
75
  else [f"Group {i + 1}" for i in range(self.n_groups)]
75
76
  )
@@ -216,7 +217,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
216
217
  ), "Wrong test id choosen, ensure you called correct function"
217
218
  assert all(
218
219
  len(group) >= 4 for group in self.data
219
- ), "Each group must contain at least four values"
220
+ ), "Each group must be at least n=4 for the valid statistics, so test was skipped"
220
221
  assert not (
221
222
  test in self.test_ids_dependent
222
223
  and not all(len(lst) == len(self.data[0]) for lst in self.data)
@@ -20,12 +20,8 @@ class Helpers:
20
20
  colors: list[str | tuple],
21
21
  alpha: float = 0.35,
22
22
  ) -> list[tuple[float, float, float, float]]:
23
- rgba_colors: list[tuple[float, float, float, float]] = []
24
- for col in colors:
25
- rgba = list(mcolors.to_rgba(col))
26
- rgba[3] = alpha
27
- rgba_colors.append((rgba[0], rgba[1], rgba[2], rgba[3]))
28
- return rgba_colors
23
+ # mcolors.to_rgba returns a 4-tuple; replace only the alpha channel.
24
+ return [(*mcolors.to_rgba(c)[:3], alpha) for c in colors]
29
25
 
30
26
  def get_colors(
31
27
  self,
@@ -166,26 +162,34 @@ class BaseStatPlot(Helpers):
166
162
  print("AutoStatLib.StatPlots Error :", error)
167
163
  return
168
164
 
169
- # sd sem mean and median calculation if they are not provided
170
- self.mean: list[float] = [
171
- np.mean(self.data_groups[i]).item() for i in range(self.n_groups)
172
- ]
173
- self.median: list[float] = [
174
- np.median(self.data_groups[i]).item() for i in range(self.n_groups)
175
- ]
176
- self.sd: list[float] = [
177
- np.std(self.data_groups[i], ddof=1).item() for i in range(self.n_groups)
178
- ]
179
- self.sem: list[float] = [
180
- np.std(self.data_groups[i], ddof=1).item()
181
- / np.sqrt(len(self.data_groups[i]))
182
- for i in range(self.n_groups)
183
- ]
184
-
185
- self.n: list[int] = [len(i) for i in self.data_groups]
165
+ # sd sem mean and median calculation if they are not provided.
166
+ # Convert each group to a float array once; reuse for all four stats.
167
+ # This avoids calling np.std twice per group (old code recomputed it
168
+ # from scratch for sem after already computing it for sd).
169
+ _arrs = [np.asarray(g, dtype=float) for g in self.data_groups]
170
+ self.n = [len(a) for a in _arrs]
171
+ self.mean = [float(a.mean()) for a in _arrs]
172
+ self.median = [float(np.median(a)) for a in _arrs]
173
+ self.sd = [float(a.std(ddof=1)) for a in _arrs]
174
+ self.sem = [sd / np.sqrt(n) for sd, n in zip(self.sd, self.n)]
186
175
  self.p_printed: str = self.make_p_value_printed(self.p)
187
176
  self.stars_printed: str = self.make_stars_printed(self.make_stars(self.p))
188
177
 
178
+ # Pre-compute posthoc matrix string representations once here so that
179
+ # add_significance_bars() doesn't rebuild them on every call.
180
+ if self.posthoc_matrix:
181
+ self._posthoc_printed: list[list[str]] = [
182
+ [self.make_p_value_printed(e) for e in row]
183
+ for row in self.posthoc_matrix
184
+ ]
185
+ self._posthoc_stars: list[list[str]] = [
186
+ [self.make_stars_printed(self.make_stars(e)) for e in row]
187
+ for row in self.posthoc_matrix
188
+ ]
189
+ else:
190
+ self._posthoc_printed = []
191
+ self._posthoc_stars = []
192
+
189
193
  self.groups_name: list[str] = Groups_Name if Groups_Name is not None else [""]
190
194
  self.subgrouping: list = subgrouping if subgrouping else [0]
191
195
  self.subgrouping_arrange: list[int] = self.expand_counts(self.subgrouping)
@@ -430,10 +434,12 @@ class BaseStatPlot(Helpers):
430
434
  linewidth: float = 1.2,
431
435
  zorder: int = 2,
432
436
  ) -> None:
433
- spread_pool: list[tuple] = []
434
- for i, data in enumerate(self.data_groups):
435
- spread = tuple(random.uniform(-0.10, 0.10) for _ in data)
436
- spread_pool.append(tuple(i + s for s in spread))
437
+ # Generate all jitter offsets with NumPy at once instead of Python loops.
438
+ rng = np.random.default_rng()
439
+ spread_pool: list[np.ndarray] = [
440
+ i + rng.uniform(-0.10, 0.10, size=len(g))
441
+ for i, g in enumerate(self.data_groups)
442
+ ]
437
443
 
438
444
  for i, data in enumerate(self.transpose(self.data_groups)):
439
445
  ax.plot(
@@ -490,6 +496,7 @@ class BaseStatPlot(Helpers):
490
496
  marker=marker,
491
497
  linewidth=linewidth * self.figure_scale_factor * size_scale,
492
498
  zorder=zorder,
499
+ warn_thresh = 1, # threshold for warning about too many points; set to 0 to always warn, or 1 to never warn
493
500
  )
494
501
 
495
502
  if self.dependent:
@@ -690,22 +697,10 @@ class BaseStatPlot(Helpers):
690
697
  col: str = "k",
691
698
  ) -> None:
692
699
 
693
- posthoc_matrix_printed: list[list[str]] = (
694
- [
695
- [self.make_p_value_printed(element) for element in row]
696
- for row in self.posthoc_matrix
697
- ]
698
- if self.posthoc_matrix
699
- else []
700
- )
701
- posthoc_matrix_stars: list[list[str]] = (
702
- [
703
- [self.make_stars_printed(self.make_stars(element)) for element in row]
704
- for row in self.posthoc_matrix
705
- ]
706
- if self.posthoc_matrix
707
- else []
708
- )
700
+ # Use the pre-computed representations cached in __init__ rather than
701
+ # rebuilding them on every call to add_significance_bars().
702
+ posthoc_matrix_printed: list[list[str]] = self._posthoc_printed
703
+ posthoc_matrix_stars: list[list[str]] = self._posthoc_stars
709
704
 
710
705
  def draw_bar(
711
706
  p: str,
@@ -1,2 +1,2 @@
1
1
  # AutoStatLib package version:
2
- __version__ = "0.4.0"
2
+ __version__ = "0.4.2"
@@ -11,17 +11,18 @@ import pandas as pd
11
11
  class Helpers(StatAnalysisProtocol):
12
12
 
13
13
  def matrix_to_dataframe(self, matrix: list[list[float]]) -> pd.DataFrame:
14
- data: list[float] = []
15
- cols: list[int] = []
16
- rows: list[int] = []
17
-
18
- for i, row in enumerate(matrix):
19
- for j, value in enumerate(row):
20
- data.append(value)
21
- cols.append(i)
22
- rows.append(j)
23
-
24
- return pd.DataFrame({"Row": rows, "Col": cols, "Value": data})
14
+ # Convert once to a 2-D float array, then use NumPy meshgrid to build
15
+ # the row/col index arrays without any Python-level loop.
16
+ arr = np.array(matrix, dtype=float) # (n_subjects, n_conditions)
17
+ n_rows, n_cols = arr.shape
18
+ row_idx, col_idx = np.meshgrid(
19
+ np.arange(n_rows), np.arange(n_cols), indexing="ij"
20
+ )
21
+ return pd.DataFrame({
22
+ "Row": row_idx.ravel(),
23
+ "Col": col_idx.ravel(),
24
+ "Value": arr.ravel(),
25
+ })
25
26
 
26
27
  def list_to_matrix(self, values: list[float], n: int) -> list[list[float]]:
27
28
  i = 0
@@ -65,6 +66,32 @@ class Helpers(StatAnalysisProtocol):
65
66
  self.make_stars_printed(self.stars_int) if self.successfull else ""
66
67
  )
67
68
 
69
+ # --- Compute per-group descriptive stats in a single pass ----------
70
+ # Convert each group once; reuse the array for mean, median, std, sem.
71
+ # This also avoids calling np.std twice (once for SD, once for SE).
72
+ groups_arr = [np.asarray(g, dtype=float) for g in self.data]
73
+ groups_n = [len(a) for a in groups_arr]
74
+ groups_mean = [float(a.mean()) for a in groups_arr]
75
+ groups_median = [float(np.median(a)) for a in groups_arr]
76
+ groups_sd = [float(a.std(ddof=1)) for a in groups_arr]
77
+ groups_se = [sd / np.sqrt(n) for sd, n in zip(groups_sd, groups_n)]
78
+
79
+ # --- Posthoc matrix representations — one pass over the matrix -----
80
+ # Previously built as three separate nested list comprehensions;
81
+ # now all three are filled in a single traversal.
82
+ if self.posthoc_matrix:
83
+ pm_bool: list[list] = []
84
+ pm_printed: list[list] = []
85
+ pm_stars: list[list] = []
86
+ for row in self.posthoc_matrix:
87
+ pm_bool.append([bool(e) for e in row])
88
+ pm_printed.append([self.make_p_value_printed(e) for e in row])
89
+ pm_stars.append(
90
+ [self.make_stars_printed(self.make_stars(e)) for e in row]
91
+ )
92
+ else:
93
+ pm_bool = pm_printed = pm_stars = []
94
+
68
95
  return {
69
96
  "p_value": (
70
97
  self.make_p_value_printed(self.p_value.item())
@@ -90,52 +117,22 @@ class Helpers(StatAnalysisProtocol):
90
117
  "Stars": self.stars_int,
91
118
  "Warnings": self.warnings,
92
119
  "Successfull_Test": (self.successfull and not self.error),
93
- "Groups_Name": self.groups_name,
94
- "Groups_N": [len(self.data[i]) for i in range(len(self.data))],
95
- "Groups_Median": [
96
- np.median(self.data[i]).item() for i in range(len(self.data))
97
- ],
98
- "Groups_Mean": [
99
- np.mean(self.data[i]).item() for i in range(len(self.data))
100
- ],
101
- "Groups_SD": [
102
- np.std(self.data[i], ddof=1).item() for i in range(len(self.data))
103
- ],
104
- "Groups_SE": [
105
- np.std(self.data[i], ddof=1).item() / np.sqrt(len(self.data[i]))
106
- for i in range(len(self.data))
107
- ],
120
+ "Groups_Name": self.groups_name,
121
+ "Groups_N": groups_n,
122
+ "Groups_Median": groups_median,
123
+ "Groups_Mean": groups_mean,
124
+ "Groups_SD": groups_sd,
125
+ "Groups_SE": groups_se,
108
126
  "subgrouping": self.subgrouping,
109
127
  # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
110
128
  "Samples": self.data,
111
129
  "Posthoc_Tests_Name": (
112
130
  self.posthoc_name if self.posthoc_name is not None else ""
113
131
  ),
114
- "Posthoc_Matrix": self.posthoc_matrix if self.posthoc_matrix else [],
115
- "Posthoc_Matrix_bool": (
116
- [[bool(element) for element in row] for row in self.posthoc_matrix]
117
- if self.posthoc_matrix
118
- else []
119
- ),
120
- "Posthoc_Matrix_printed": (
121
- [
122
- [self.make_p_value_printed(element) for element in row]
123
- for row in self.posthoc_matrix
124
- ]
125
- if self.posthoc_matrix
126
- else []
127
- ),
128
- "Posthoc_Matrix_stars": (
129
- [
130
- [
131
- self.make_stars_printed(self.make_stars(element))
132
- for element in row
133
- ]
134
- for row in self.posthoc_matrix
135
- ]
136
- if self.posthoc_matrix
137
- else []
138
- ),
132
+ "Posthoc_Matrix": self.posthoc_matrix if self.posthoc_matrix else [],
133
+ "Posthoc_Matrix_bool": pm_bool,
134
+ "Posthoc_Matrix_printed": pm_printed,
135
+ "Posthoc_Matrix_stars": pm_stars,
139
136
  }
140
137
 
141
138
  def log(self, *args: object, **kwargs: object) -> None:
@@ -145,4 +142,4 @@ class Helpers(StatAnalysisProtocol):
145
142
  def AddWarning(self, warning_id: str) -> None:
146
143
  message: str = self.warning_ids_all[warning_id]
147
144
  self.log(message)
148
- self.warnings.append(message)
145
+ self.warnings.append(message)
@@ -241,8 +241,8 @@ class StatisticalTests(StatAnalysisProtocol):
241
241
  if self.popmean is None:
242
242
  self.popmean = 0
243
243
  self.AddWarning("no_pop_mean_set")
244
- data: list[float] = [x - self.popmean for x in self.data[0]]
245
- stat, p_value = wilcoxon(data)
244
+ arr = np.asarray(self.data[0], dtype=float) - self.popmean
245
+ stat, p_value = wilcoxon(arr)
246
246
  if self.tails == 1:
247
247
  p_value /= 2
248
248
  return stat, p_value
@@ -8,27 +8,29 @@ from typing import Optional
8
8
  class TextFormatting(StatAnalysisProtocol):
9
9
  """Text formatting mixin."""
10
10
 
11
- def autospace(
12
- self, elements_list: list[str], space: int, delimiter: str = " "
13
- ) -> str:
14
- output = ""
15
- for i, element in enumerate(elements_list):
16
- if i == len(elements_list):
17
- output += element
18
- else:
19
- output += element + (space - len(element)) * delimiter
20
- return output
11
+ def _fmt_row(self, elements: list[str], width: int, fill: str = " ") -> str:
12
+ """
13
+ Format a list of strings into a fixed-width columnar row.
14
+
15
+ Each element is left-justified to ``width`` characters using ``fill``
16
+ as the pad character. The last element is appended without trailing
17
+ padding (matches terminal/log output intent).
18
+
19
+ Replaces the hand-rolled ``autospace()`` loop with Python's built-in
20
+ ``str.ljust`` and ``str.join``.
21
+ """
22
+ if not elements:
23
+ return ""
24
+ # All but the last element are padded to `width`; last is bare.
25
+ return "".join(e.ljust(width, fill) for e in elements[:-1]) + elements[-1]
21
26
 
22
27
  def print_groups(self, space: int = 24, max_length: int = 15) -> None:
23
28
  self.log("")
24
29
  data: list[list[float]] = self.data
25
- num_groups: int = len(data)
26
30
  group_longest: int = max(len(row) for row in data)
27
31
 
28
- header: list[str] = self.groups_name
29
- line: list[str] = ["" * 7]
30
- self.log(self.autospace(header, space))
31
- self.log(self.autospace(line, space))
32
+ self.log(self._fmt_row(self.groups_name, space))
33
+ self.log(self._fmt_row(["" * 7], space))
32
34
 
33
35
  for i in range(group_longest):
34
36
  row_values: list[str] = []
@@ -51,7 +53,7 @@ class TextFormatting(StatAnalysisProtocol):
51
53
  row_values.append("")
52
54
  if all_values_empty:
53
55
  break
54
- self.log(self.autospace(row_values, space))
56
+ self.log(self._fmt_row(row_values, space))
55
57
 
56
58
  def print_results(self) -> None:
57
59
  self.log("\n\nResults: \n")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: AutoStatLib
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
File without changes
File without changes
File without changes
File without changes
File without changes