AutoStatLib 0.3.1__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {autostatlib-0.3.1/src/AutoStatLib.egg-info → autostatlib-0.4.0}/PKG-INFO +2 -1
  2. {autostatlib-0.3.1 → autostatlib-0.4.0}/README.md +1 -0
  3. autostatlib-0.4.0/src/AutoStatLib/AutoStatLib.py +408 -0
  4. autostatlib-0.4.0/src/AutoStatLib/StatPlots.py +1033 -0
  5. {autostatlib-0.3.1 → autostatlib-0.4.0}/src/AutoStatLib/__main__.py +4 -2
  6. autostatlib-0.4.0/src/AutoStatLib/_protocol.py +112 -0
  7. {autostatlib-0.3.1 → autostatlib-0.4.0}/src/AutoStatLib/_version.py +1 -1
  8. autostatlib-0.4.0/src/AutoStatLib/helpers.py +148 -0
  9. autostatlib-0.4.0/src/AutoStatLib/normality_tests.py +90 -0
  10. autostatlib-0.4.0/src/AutoStatLib/statistical_tests.py +248 -0
  11. autostatlib-0.4.0/src/AutoStatLib/text_formatting.py +120 -0
  12. {autostatlib-0.3.1 → autostatlib-0.4.0/src/AutoStatLib.egg-info}/PKG-INFO +2 -1
  13. {autostatlib-0.3.1 → autostatlib-0.4.0}/src/AutoStatLib.egg-info/SOURCES.txt +3 -1
  14. autostatlib-0.4.0/tests/test_statplots.py +670 -0
  15. autostatlib-0.3.1/src/AutoStatLib/AutoStatLib.py +0 -297
  16. autostatlib-0.3.1/src/AutoStatLib/StatPlots.py +0 -933
  17. autostatlib-0.3.1/src/AutoStatLib/helpers.py +0 -102
  18. autostatlib-0.3.1/src/AutoStatLib/normality_tests.py +0 -83
  19. autostatlib-0.3.1/src/AutoStatLib/statistical_tests.py +0 -224
  20. autostatlib-0.3.1/src/AutoStatLib/text_formatting.py +0 -106
  21. {autostatlib-0.3.1 → autostatlib-0.4.0}/LICENSE +0 -0
  22. {autostatlib-0.3.1 → autostatlib-0.4.0}/MANIFEST.in +0 -0
  23. {autostatlib-0.3.1 → autostatlib-0.4.0}/pyproject.toml +0 -0
  24. {autostatlib-0.3.1 → autostatlib-0.4.0}/requirements.txt +0 -0
  25. {autostatlib-0.3.1 → autostatlib-0.4.0}/setup.cfg +0 -0
  26. {autostatlib-0.3.1 → autostatlib-0.4.0}/src/AutoStatLib/__init__.py +0 -0
  27. {autostatlib-0.3.1 → autostatlib-0.4.0}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
  28. {autostatlib-0.3.1 → autostatlib-0.4.0}/src/AutoStatLib.egg-info/requires.txt +0 -0
  29. {autostatlib-0.3.1 → autostatlib-0.4.0}/src/AutoStatLib.egg-info/top_level.txt +0 -0
  30. {autostatlib-0.3.1 → autostatlib-0.4.0}/tests/test_autostatlib.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: AutoStatLib
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -35,6 +35,7 @@ Dynamic: license-file
35
35
 
36
36
  # AutoStatLib - python library for automated statistical analysis
37
37
 
38
+ [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/konung-yaropolk/AutoStatLib/tests.yml?label=Tests&color=green)](https://github.com/konung-yaropolk/AutoStatLib/actions/workflows/tests.yml)
38
39
  [![pypi_version](https://img.shields.io/pypi/v/AutoStatLib?label=PyPI&color=green)](https://pypi.org/project/AutoStatLib)
39
40
  [![GitHub Release](https://img.shields.io/github/v/release/konung-yaropolk/AutoStatLib?label=GitHub&color=green&link=https%3A%2F%2Fgithub.com%2Fkonung-yaropolk%2FAutoStatLib)](https://github.com/konung-yaropolk/AutoStatLib)
40
41
  [![PyPI - License](https://img.shields.io/pypi/l/AutoStatLib)](https://pypi.org/project/AutoStatLib)
@@ -1,5 +1,6 @@
1
1
  # AutoStatLib - python library for automated statistical analysis
2
2
 
3
+ [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/konung-yaropolk/AutoStatLib/tests.yml?label=Tests&color=green)](https://github.com/konung-yaropolk/AutoStatLib/actions/workflows/tests.yml)
3
4
  [![pypi_version](https://img.shields.io/pypi/v/AutoStatLib?label=PyPI&color=green)](https://pypi.org/project/AutoStatLib)
4
5
  [![GitHub Release](https://img.shields.io/github/v/release/konung-yaropolk/AutoStatLib?label=GitHub&color=green&link=https%3A%2F%2Fgithub.com%2Fkonung-yaropolk%2FAutoStatLib)](https://github.com/konung-yaropolk/AutoStatLib)
5
6
  [![PyPI - License](https://img.shields.io/pypi/l/AutoStatLib)](https://pypi.org/project/AutoStatLib)
@@ -0,0 +1,408 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Union
4
+
5
+ import numpy as np
6
+
7
+ from AutoStatLib.statistical_tests import StatisticalTests
8
+ from AutoStatLib.normality_tests import NormalityTests
9
+ from AutoStatLib.helpers import Helpers
10
+ from AutoStatLib.text_formatting import TextFormatting
11
+ from AutoStatLib._version import __version__
12
+
13
+
14
+ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Helpers):
15
+ """
16
+ Automatic statistical analysis tool
17
+
18
+ Selects and runs the appropriate statistical test based on the
19
+ properties of the input data (normality, number of groups, pairing).
20
+
21
+ Parameters
22
+ ----------
23
+ groups_list:
24
+ Input data as a list of groups, where each group is a list of
25
+ numeric values. Non-numeric values are silently dropped.
26
+ paired:
27
+ Whether the groups are paired / repeated measures. Default False.
28
+ tails:
29
+ Number of tails for the hypothesis test (1 or 2). Default 2.
30
+ popmean:
31
+ Population mean for single-sample tests. Default None (uses 0
32
+ with a warning if not provided).
33
+ posthoc:
34
+ Whether to run post-hoc pairwise comparisons after multi-group
35
+ tests. Default False.
36
+ verbose:
37
+ Whether to print the full summary to stdout after each test.
38
+ Default True.
39
+ raise_errors:
40
+ Whether to raise ValueError on invalid input instead of printing
41
+ an error message. Default False.
42
+ groups_name:
43
+ Optional list of group labels. Cycled if shorter than the number
44
+ of groups. Default None (auto-generates "Group 1", "Group 2", …).
45
+ subgrouping:
46
+ Optional subgrouping metadata passed through to the result dict.
47
+ Default None (stored as [0]).
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ groups_list: list[list],
53
+ paired: bool = False,
54
+ tails: int = 2,
55
+ popmean: Optional[float] = None,
56
+ posthoc: bool = False,
57
+ verbose: bool = True,
58
+ raise_errors: bool = False,
59
+ groups_name: Optional[list[str]] = None,
60
+ subgrouping: Optional[list] = None,
61
+ ) -> None:
62
+
63
+ self.groups_list: list[list] = groups_list
64
+ self.paired: bool = paired
65
+ self.tails: int = tails
66
+ self.popmean: Optional[float] = popmean
67
+ self.posthoc: bool = posthoc
68
+ self.verbose: bool = verbose
69
+ self.raise_errors: bool = raise_errors
70
+ self.n_groups: int = len(self.groups_list)
71
+ self.groups_name: list[str] = (
72
+ [groups_name[i % len(groups_name)] for i in range(self.n_groups)]
73
+ if groups_name and groups_name != [""]
74
+ else [f"Group {i + 1}" for i in range(self.n_groups)]
75
+ )
76
+ self.subgrouping: list = subgrouping if subgrouping is not None else [0]
77
+ self.warning_flag_non_numeric_data: bool = False
78
+ self.summary: str = "AutoStatLib v{}".format(__version__)
79
+
80
+ # State reset on every run
81
+ self.results: Optional[dict] = None
82
+ self.error: bool = False
83
+ self.warnings: list[str] = []
84
+ self.normals: list[bool] = []
85
+ self.test_name: str = ""
86
+ self.test_id: Optional[str] = None
87
+ self.test_stat: Optional[np.float64] = None
88
+ self.p_value: Optional[np.float64] = None
89
+ self.posthoc_matrix_df: Optional[object] = None
90
+ self.posthoc_matrix: list[list[float]] = []
91
+ self.posthoc_name: str = ""
92
+ self.data: list[list[float]] = []
93
+ self.parametric: Optional[bool] = None
94
+
95
+ # Test ID classification
96
+ self.test_ids_all: list[str] = [
97
+ "anova_1w_ordinary",
98
+ "anova_1w_rm",
99
+ "friedman",
100
+ "kruskal_wallis",
101
+ "mann_whitney",
102
+ "t_test_independent",
103
+ "t_test_paired",
104
+ "t_test_single_sample",
105
+ "wilcoxon",
106
+ "wilcoxon_single_sample",
107
+ ]
108
+ self.test_ids_parametric: list[str] = [
109
+ "anova_1w_ordinary",
110
+ "anova_1w_rm",
111
+ "t_test_independent",
112
+ "t_test_paired",
113
+ "t_test_single_sample",
114
+ ]
115
+ self.test_ids_dependent: list[str] = [
116
+ "anova_1w_rm",
117
+ "friedman",
118
+ "t_test_paired",
119
+ "wilcoxon",
120
+ ]
121
+ self.test_ids_3sample: list[str] = [
122
+ "anova_1w_ordinary",
123
+ "anova_1w_rm",
124
+ "friedman",
125
+ "kruskal_wallis",
126
+ ]
127
+ self.test_ids_2sample: list[str] = [
128
+ "mann_whitney",
129
+ "t_test_independent",
130
+ "t_test_paired",
131
+ "wilcoxon",
132
+ ]
133
+ self.test_ids_1sample: list[str] = [
134
+ "t_test_single_sample",
135
+ "wilcoxon_single_sample",
136
+ ]
137
+ self.warning_ids_all: dict[str, str] = {
138
+ "param_test_with_non-normal_data": (
139
+ "\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n"
140
+ " The results might be skewed. \n"
141
+ " Please, run non-parametric test or preform automatic test selection.\n"
142
+ ),
143
+ "non-param_test_with_normal_data": (
144
+ "\nWarning: Non-Parametric test was manualy chosen for Normaly distributed data.\n"
145
+ " The results might be skewed. \n"
146
+ " Please, run parametric test or preform automatic test selection.\n"
147
+ ),
148
+ "no_pop_mean_set": (
149
+ "\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n"
150
+ " The results might be skewed. \n"
151
+ " Please, set the Population Mean and run the test again.\n"
152
+ ),
153
+ "paired_test_with_independend_samples": (
154
+ "\nWarning: A paired test was manually selected, even though the samples were declared independent.\n"
155
+ " The results might be skewed. \n"
156
+ " Please, run test for independend samples or preform automatic test selection.\n"
157
+ ),
158
+ "independend_test_with_paired_samples": (
159
+ "\nWarning: An independent test was manually selected, even though the samples were declared paired.\n"
160
+ " The results might be skewed. \n"
161
+ " Please, run test for paired samples or preform automatic test selection.\n"
162
+ ),
163
+ }
164
+
165
+ # ------------------------------------------------------------------ #
166
+ # Internal orchestration #
167
+ # ------------------------------------------------------------------ #
168
+
169
+ def run_test(self, test: str = "auto") -> None:
170
+ """
171
+ Core test runner. Validates input, checks normality, dispatches the
172
+ chosen test, builds the result dict, and optionally prints a summary.
173
+
174
+ Parameters
175
+ ----------
176
+ test:
177
+ Test ID string (from ``test_ids_all``) or ``"auto"`` for
178
+ automatic selection. Default ``"auto"``.
179
+ """
180
+ # Reset state from any previous run
181
+ self.results = None
182
+ self.error = False
183
+ self.warnings = []
184
+ self.normals = []
185
+ self.test_name = ""
186
+ self.test_id = None
187
+ self.test_stat = None
188
+ self.p_value = None
189
+ self.parametric = None
190
+ self.posthoc_matrix_df = None
191
+ self.posthoc_matrix = []
192
+ self.posthoc_name = ""
193
+
194
+ self.log("\n" + "-" * 67)
195
+ self.log(
196
+ "Statistical analysis initiated for data in {} groups\n".format(
197
+ len(self.groups_list)
198
+ )
199
+ )
200
+
201
+ # Coerce input to float, drop non-numeric values
202
+ self.data = self.floatify_recursive(self.groups_list) # type: ignore[assignment]
203
+ if self.warning_flag_non_numeric_data:
204
+ self.log("Text or other non-numeric data in the input was ignored:")
205
+
206
+ # Drop completely empty columns
207
+ self.data = [col for col in self.data if any(x is not None for x in col)]
208
+ self.n_groups = len(self.data)
209
+
210
+ # Input validation
211
+ try:
212
+ assert self.data, "There is no input data"
213
+ assert self.tails in [1, 2], "Tails parameter can be 1 or 2 only"
214
+ assert (
215
+ test in self.test_ids_all or test == "auto"
216
+ ), "Wrong test id choosen, ensure you called correct function"
217
+ assert all(
218
+ len(group) >= 4 for group in self.data
219
+ ), "Each group must contain at least four values"
220
+ assert not (
221
+ test in self.test_ids_dependent
222
+ and not all(len(lst) == len(self.data[0]) for lst in self.data)
223
+ ), "Paired samples must have the same length"
224
+ assert not (
225
+ test in self.test_ids_2sample and self.n_groups != 2
226
+ ), f"Only two groups of data must be given for 2-groups tests, got {self.n_groups}"
227
+ assert not (
228
+ test in self.test_ids_1sample and self.n_groups > 1
229
+ ), f"Only one group of data must be given for single-group tests, got {self.n_groups}"
230
+ assert not (
231
+ test in self.test_ids_3sample and self.n_groups < 3
232
+ ), f"At least three groups of data must be given for multi-groups tests, got {self.n_groups}"
233
+ except AssertionError as error:
234
+ self.run_test_by_id("none")
235
+ self.results = self.create_results_dict()
236
+
237
+ if self.raise_errors:
238
+ raise ValueError(error)
239
+
240
+ if self.verbose:
241
+ self.log("\nTest :", test)
242
+ self.log("Error :", error)
243
+ self.log("-" * 67 + "\n")
244
+ self.error = True
245
+ print(self.summary)
246
+ else:
247
+ print("AutoStatLib Error :", error)
248
+
249
+ return
250
+
251
+ # Print data table
252
+ self.print_groups()
253
+
254
+ # Normality checks
255
+ self.log("\n\nThe group is assumed to be normally distributed if at least one")
256
+ self.log("normality test result is positive. Normality checked by tests:")
257
+ self.log("Shapiro-Wilk, Lilliefors, Anderson-Darling, D'Agostino-Pearson")
258
+ self.log("[+] -positive, [-] -negative, [ ] -too small group for the test\n")
259
+ self.log(" SW LF AD AP ")
260
+ for i, group_data in enumerate(self.data):
261
+ poll = self.check_normality(group_data)
262
+ isnormal: bool = any(poll)
263
+ poll_print = tuple(
264
+ "+" if x is True else "-" if x is False else " " if x is None else "e"
265
+ for x in poll
266
+ )
267
+ self.normals.append(isnormal)
268
+ self.log(
269
+ f" {self.groups_name[i].ljust(11, ' ')[:11]}: "
270
+ f"{poll_print[0]} {poll_print[1]} {poll_print[2]} {poll_print[3]} "
271
+ f"so disrtibution seems {'normal' if isnormal else 'not normal'}"
272
+ )
273
+ self.parametric = all(self.normals)
274
+
275
+ # Log test selection context
276
+ self.log("\n\nInput:\n")
277
+ self.log("Data Normaly Distributed: ", self.parametric)
278
+ self.log("Paired Groups: ", self.paired)
279
+ self.log("Groups: ", self.n_groups)
280
+ self.log("Test chosen by user: ", test)
281
+
282
+ # Warn when the manually chosen test is inappropriate
283
+ if test != "auto" and not self.parametric and test in self.test_ids_parametric:
284
+ self.AddWarning("param_test_with_non-normal_data")
285
+ if test != "auto" and self.parametric and test not in self.test_ids_parametric:
286
+ self.AddWarning("non-param_test_with_normal_data")
287
+ if test != "auto" and not self.paired and test in self.test_ids_dependent:
288
+ self.AddWarning("paired_test_with_independend_samples")
289
+ if test != "auto" and self.paired and test not in self.test_ids_dependent:
290
+ self.AddWarning("independend_test_with_paired_samples")
291
+
292
+ # Dispatch
293
+ if test in self.test_ids_all:
294
+ self.run_test_by_id(test)
295
+ else:
296
+ self.run_test_auto()
297
+
298
+ # Build and print results
299
+ self.results = self.create_results_dict()
300
+ self.print_results()
301
+ self.log(
302
+ "\n\nResults above are accessible as a dictionary via GetResult() method"
303
+ )
304
+ self.log("-" * 67 + "\n")
305
+
306
+ if self.verbose is True:
307
+ print(self.summary)
308
+
309
+ # ------------------------------------------------------------------ #
310
+ # Public API #
311
+ # ------------------------------------------------------------------ #
312
+
313
+ def RunAuto(self) -> None:
314
+ """Run automatic test selection."""
315
+ self.run_test(test="auto")
316
+
317
+ def RunManual(self, test: str) -> None:
318
+ """Run a specific test by ID. See ``GetTestIDs()`` for valid values."""
319
+ self.run_test(test)
320
+
321
+ def RunOnewayAnova(self) -> None:
322
+ """Run Ordinary One-Way ANOVA."""
323
+ self.run_test(test="anova_1w_ordinary")
324
+
325
+ def RunOnewayAnovaRM(self) -> None:
326
+ """Run Repeated Measures One-Way ANOVA."""
327
+ self.run_test(test="anova_1w_rm")
328
+
329
+ def RunFriedman(self) -> None:
330
+ """Run Friedman test."""
331
+ self.run_test(test="friedman")
332
+
333
+ def RunKruskalWallis(self) -> None:
334
+ """Run Kruskal-Wallis test."""
335
+ self.run_test(test="kruskal_wallis")
336
+
337
+ def RunMannWhitney(self) -> None:
338
+ """Run Mann-Whitney U test."""
339
+ self.run_test(test="mann_whitney")
340
+
341
+ def RunTtest(self) -> None:
342
+ """Run t-test for independent samples."""
343
+ self.run_test(test="t_test_independent")
344
+
345
+ def RunTtestPaired(self) -> None:
346
+ """Run t-test for paired samples."""
347
+ self.run_test(test="t_test_paired")
348
+
349
+ def RunTtestSingleSample(self) -> None:
350
+ """Run single-sample t-test against ``popmean``."""
351
+ self.run_test(test="t_test_single_sample")
352
+
353
+ def RunWilcoxonSingleSample(self) -> None:
354
+ """Run Wilcoxon signed-rank test for a single sample."""
355
+ self.run_test(test="wilcoxon_single_sample")
356
+
357
+ def RunWilcoxon(self) -> None:
358
+ """Run Wilcoxon signed-rank test for two paired samples."""
359
+ self.run_test(test="wilcoxon")
360
+
361
+ def GetResult(self) -> Optional[dict]:
362
+ """
363
+ Return the result dictionary from the last test run.
364
+
365
+ Returns
366
+ -------
367
+ dict
368
+ Full result dictionary if a test was run successfully.
369
+ None
370
+ If no test has been run yet.
371
+ {}
372
+ Empty dict if the test encountered an error.
373
+ """
374
+ if self.results is None and not self.error:
375
+ print("No test chosen, no results to output")
376
+ return self.results
377
+ if not self.results and self.error:
378
+ print("Error occured, no results to output")
379
+ return {}
380
+ else:
381
+ return self.results
382
+
383
+ def GetSummary(self) -> str:
384
+ """
385
+ Return the full text summary of the last test run.
386
+
387
+ Returns the accumulated log string including normality test output,
388
+ chosen test, and results table.
389
+ """
390
+ if self.results is None and not self.error:
391
+ print("No test chosen, no summary to output")
392
+ return self.summary
393
+ else:
394
+ return self.summary
395
+
396
+ def GetTestIDs(self) -> list[str]:
397
+ """Return the list of all valid test ID strings."""
398
+ return self.test_ids_all
399
+
400
+ def PrintSummary(self) -> None:
401
+ """Print the full text summary to stdout."""
402
+ print(self.summary)
403
+
404
+
405
+ if __name__ == "__main__":
406
+ print(
407
+ 'This package works as an imported module only.\nUse "import AutoStatLib" statement'
408
+ )