AutoStatLib 0.2.27__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {autostatlib-0.2.27/src/AutoStatLib.egg-info → autostatlib-0.3.1}/PKG-INFO +1 -1
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib/AutoStatLib.py +15 -15
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib/StatPlots.py +35 -25
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib/_version.py +1 -1
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib/helpers.py +4 -4
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib/statistical_tests.py +23 -10
- {autostatlib-0.2.27 → autostatlib-0.3.1/src/AutoStatLib.egg-info}/PKG-INFO +1 -1
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib.egg-info/SOURCES.txt +2 -1
- autostatlib-0.3.1/tests/test_autostatlib.py +603 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/LICENSE +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/MANIFEST.in +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/README.md +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/pyproject.toml +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/requirements.txt +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/setup.cfg +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib/__init__.py +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib/__main__.py +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib/normality_tests.py +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib/text_formatting.py +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib.egg-info/requires.txt +0 -0
- {autostatlib-0.2.27 → autostatlib-0.3.1}/src/AutoStatLib.egg-info/top_level.txt +0 -0
|
@@ -20,10 +20,9 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
20
20
|
posthoc=False,
|
|
21
21
|
verbose=True,
|
|
22
22
|
raise_errors=False,
|
|
23
|
-
groups_name=
|
|
24
|
-
subgrouping=
|
|
25
|
-
|
|
26
|
-
self.error = False
|
|
23
|
+
groups_name=None,
|
|
24
|
+
subgrouping=None):
|
|
25
|
+
|
|
27
26
|
self.groups_list = groups_list
|
|
28
27
|
self.paired = paired
|
|
29
28
|
self.tails = tails
|
|
@@ -34,7 +33,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
34
33
|
self.n_groups = len(self.groups_list)
|
|
35
34
|
self.groups_name = [groups_name[i % len(groups_name)]
|
|
36
35
|
for i in range(self.n_groups)] if groups_name and groups_name != [''] else [f'Group {i+1}' for i in range(self.n_groups)]
|
|
37
|
-
self.subgrouping = subgrouping if subgrouping else [0]
|
|
36
|
+
self.subgrouping = subgrouping if subgrouping is not None else [0]
|
|
38
37
|
self.warning_flag_non_numeric_data = False
|
|
39
38
|
self.summary = 'AutoStatLib v{}'.format(__version__)
|
|
40
39
|
|
|
@@ -100,8 +99,8 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
100
99
|
'param_test_with_non-normal_data': '\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n The results might be skewed. \n Please, run non-parametric test or preform automatic test selection.\n',
|
|
101
100
|
'non-param_test_with_normal_data': '\nWarning: Non-Parametric test was manualy chosen for Normaly distributed data.\n The results might be skewed. \n Please, run parametric test or preform automatic test selection.\n',
|
|
102
101
|
'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
|
|
103
|
-
|
|
104
|
-
|
|
102
|
+
'paired_test_with_independend_samples': '\nWarning: A paired test was manually selected, even though the samples were declared independent.\n The results might be skewed. \n Please, run test for independend samples or preform automatic test selection.\n',
|
|
103
|
+
'independend_test_with_paired_samples': '\nWarning: An independent test was manually selected, even though the samples were declared paired.\n The results might be skewed. \n Please, run test for paired samples or preform automatic test selection.\n',
|
|
105
104
|
}
|
|
106
105
|
|
|
107
106
|
def run_test(self, test='auto'):
|
|
@@ -133,6 +132,8 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
133
132
|
# delete the empty cols from input
|
|
134
133
|
self.data = [col for col in self.data if any(
|
|
135
134
|
x is not None for x in col)]
|
|
135
|
+
# re-calculate the number of groups after removing empty cols
|
|
136
|
+
self.n_groups = len(self.data)
|
|
136
137
|
|
|
137
138
|
# User input assertion block
|
|
138
139
|
try:
|
|
@@ -188,8 +189,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
188
189
|
poll_print = tuple(
|
|
189
190
|
'+' if x is True else '-' if x is False else ' ' if x is None else 'e' for x in poll)
|
|
190
191
|
self.normals.append(isnormal)
|
|
191
|
-
self.log(
|
|
192
|
-
f' {self.groups_name[i].ljust(11, ' ')[:11]}: {poll_print[0]} {poll_print[1]} {poll_print[2]} {poll_print[3]} so disrtibution seems {"normal" if isnormal else "not normal"}')
|
|
192
|
+
self.log(f' {self.groups_name[i].ljust(11, ' ')[:11]}: {poll_print[0]} {poll_print[1]} {poll_print[2]} {poll_print[3]} so disrtibution seems {"normal" if isnormal else "not normal"}')
|
|
193
193
|
self.parametric = all(self.normals)
|
|
194
194
|
|
|
195
195
|
# print test choosen
|
|
@@ -207,10 +207,10 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
207
207
|
|
|
208
208
|
# Maybe unneeded checks for manually selected tests
|
|
209
209
|
# because user propably know what test they selected
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
210
|
+
if test != 'auto' and not self.paired and test in self.test_ids_dependent:
|
|
211
|
+
self.AddWarning('paired_test_with_independend_samples')
|
|
212
|
+
if test != 'auto' and self.paired and test not in self.test_ids_dependent:
|
|
213
|
+
self.AddWarning('independend_test_with_paired_samples')
|
|
214
214
|
|
|
215
215
|
# run the test
|
|
216
216
|
if test in self.test_ids_all:
|
|
@@ -268,7 +268,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
268
268
|
self.run_test(test='wilcoxon')
|
|
269
269
|
|
|
270
270
|
def GetResult(self):
|
|
271
|
-
if
|
|
271
|
+
if self.results is None and not self.error:
|
|
272
272
|
print('No test chosen, no results to output')
|
|
273
273
|
# self.run_test(test='auto')
|
|
274
274
|
return self.results
|
|
@@ -279,7 +279,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
279
279
|
return self.results
|
|
280
280
|
|
|
281
281
|
def GetSummary(self):
|
|
282
|
-
if
|
|
282
|
+
if self.results is None and not self.error:
|
|
283
283
|
print('No test chosen, no summary to output')
|
|
284
284
|
# self.run_test(test='auto')
|
|
285
285
|
return self.summary
|
|
@@ -145,8 +145,8 @@ class BaseStatPlot(Helpers):
|
|
|
145
145
|
self.median = [
|
|
146
146
|
np.median(self.data_groups[i]).item() for i in range(self.n_groups)]
|
|
147
147
|
self.sd = [
|
|
148
|
-
np.std(self.data_groups[i]).item() for i in range(self.n_groups)]
|
|
149
|
-
self.sem = [np.std(self.data_groups[i]).item() / np.sqrt(len(self.data_groups[i])).item()
|
|
148
|
+
np.std(self.data_groups[i], ddof=1).item() for i in range(self.n_groups)]
|
|
149
|
+
self.sem = [np.std(self.data_groups[i], ddof=1).item() / np.sqrt(len(self.data_groups[i])).item()
|
|
150
150
|
for i in range(self.n_groups)]
|
|
151
151
|
|
|
152
152
|
self.n = [len(i) for i in self.data_groups]
|
|
@@ -274,65 +274,65 @@ class BaseStatPlot(Helpers):
|
|
|
274
274
|
fliersMarker = 'b+'
|
|
275
275
|
# write a function to make a dictionary
|
|
276
276
|
whiskersCapsStyles = dict()
|
|
277
|
-
if whiskersCapsColor
|
|
277
|
+
if whiskersCapsColor is not None:
|
|
278
278
|
whiskersCapsStyles["color"] = whiskersCapsColor
|
|
279
|
-
if whiskersCapsLineWidth
|
|
279
|
+
if whiskersCapsLineWidth is not None:
|
|
280
280
|
whiskersCapsStyles["linewidth"] = whiskersCapsLineWidth
|
|
281
|
-
if whiskersCapsLineStyle
|
|
281
|
+
if whiskersCapsLineStyle is not None:
|
|
282
282
|
whiskersCapsStyles['linestyle'] = whiskersCapsLineStyle
|
|
283
283
|
|
|
284
284
|
boxProps = {"facecolor": (0, 0, 0, 0),
|
|
285
285
|
"edgecolor": "black", "linewidth": 1}
|
|
286
|
-
if boxFill
|
|
286
|
+
if boxFill is not None:
|
|
287
287
|
boxProps["facecolor"] = boxFill
|
|
288
|
-
if boxBorderColor
|
|
288
|
+
if boxBorderColor is not None:
|
|
289
289
|
boxProps["edgecolor"] = boxBorderColor
|
|
290
|
-
if boxBorderWidth
|
|
290
|
+
if boxBorderWidth is not None:
|
|
291
291
|
boxProps['linewidth'] = boxBorderWidth
|
|
292
|
-
# if boxBorderStyle
|
|
292
|
+
# if boxBorderStyle is not None:
|
|
293
293
|
# boxProps['linestyle'] = boxBorderStyle !!!this feature is not working with patch_artist that is needed for facecolor to work
|
|
294
294
|
|
|
295
295
|
whiskersProps = {"color": 'black',
|
|
296
296
|
"linestyle": "solid", "linewidth": 1}
|
|
297
|
-
if whiskersColor
|
|
297
|
+
if whiskersColor is not None:
|
|
298
298
|
whiskersProps["color"] = whiskersColor
|
|
299
|
-
if whiskersLineStyle
|
|
299
|
+
if whiskersLineStyle is not None:
|
|
300
300
|
whiskersProps["linestyle"] = whiskersLineStyle
|
|
301
|
-
if whiskersLineWidth
|
|
301
|
+
if whiskersLineWidth is not None:
|
|
302
302
|
whiskersProps['linewidth'] = whiskersLineWidth
|
|
303
303
|
|
|
304
304
|
flierProps = {"markerfacecolor": [
|
|
305
305
|
0, 0, 0, 0], "markeredgecolor": "black", "linestyle": "solid", "markeredgewidth": 1}
|
|
306
|
-
if flierFillColor
|
|
306
|
+
if flierFillColor is not None:
|
|
307
307
|
flierProps["markerfacecolor"] = flierFillColor
|
|
308
|
-
if flierEdgeColor
|
|
308
|
+
if flierEdgeColor is not None:
|
|
309
309
|
flierProps["markeredgecolor"] = flierEdgeColor
|
|
310
|
-
if flierLineWidth
|
|
310
|
+
if flierLineWidth is not None:
|
|
311
311
|
flierProps['markeredgewidth'] = flierLineWidth
|
|
312
|
-
if flierLineStyle
|
|
312
|
+
if flierLineStyle is not None:
|
|
313
313
|
flierProps['linestyle'] = flierLineStyle
|
|
314
314
|
medianProps = {"linestyle": 'solid', "linewidth": 1, "color": 'red'}
|
|
315
|
-
if medianColor
|
|
315
|
+
if medianColor is not None:
|
|
316
316
|
medianProps["color"] = medianColor
|
|
317
|
-
if medianLineStyle
|
|
317
|
+
if medianLineStyle is not None:
|
|
318
318
|
medianProps["linestyle"] = medianLineStyle
|
|
319
|
-
if medianLineWidth
|
|
319
|
+
if medianLineWidth is not None:
|
|
320
320
|
medianProps['linewidth'] = medianLineWidth
|
|
321
321
|
|
|
322
322
|
meanProps = {"color": "black", "marker": 'o', "markerfacecolor": "black",
|
|
323
323
|
"markeredgecolor": "black", "linestyle": "solid", "linewidth": 1}
|
|
324
324
|
|
|
325
|
-
if meanMarker
|
|
325
|
+
if meanMarker is not None:
|
|
326
326
|
meanProps['marker'] = meanMarker
|
|
327
|
-
if meanFillColor
|
|
327
|
+
if meanFillColor is not None:
|
|
328
328
|
meanProps["markerfacecolor"] = meanFillColor
|
|
329
|
-
if meanEdgeColor
|
|
329
|
+
if meanEdgeColor is not None:
|
|
330
330
|
meanProps['markeredgecolor'] = meanEdgeColor
|
|
331
|
-
if meanLineColor
|
|
331
|
+
if meanLineColor is not None:
|
|
332
332
|
meanProps["color"] = meanLineColor
|
|
333
|
-
if meanLineStyle
|
|
333
|
+
if meanLineStyle is not None:
|
|
334
334
|
meanProps['linestyle'] = meanLineStyle
|
|
335
|
-
if meanLineWidth
|
|
335
|
+
if meanLineWidth is not None:
|
|
336
336
|
meanProps['linewidth'] = meanLineWidth
|
|
337
337
|
|
|
338
338
|
bplot = ax.boxplot(self.data_groups,
|
|
@@ -901,6 +901,16 @@ class SwarmStatPlot(BaseStatPlot):
|
|
|
901
901
|
|
|
902
902
|
|
|
903
903
|
class SwarmStatPlot_subgrouping_betta(BaseStatPlot):
|
|
904
|
+
'''
|
|
905
|
+
Swarm plot with subgrouping support. Subgrouping is defined by the user as a list of labels (one per data point)
|
|
906
|
+
that indicate which subgroup each data point belongs to.
|
|
907
|
+
The plot will automatically assign different colors to each unique subgroup label,
|
|
908
|
+
and add a legend to indicate which color corresponds to which subgroup.
|
|
909
|
+
Not tested well, use with caution.
|
|
910
|
+
For now, only supports one subgrouping across all groups,
|
|
911
|
+
so the subgrouping list should have the same length as the total number of data points across all groups.
|
|
912
|
+
'''
|
|
913
|
+
|
|
904
914
|
|
|
905
915
|
def plot(self, linewidth=1.8):
|
|
906
916
|
if not self.error:
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.
|
|
2
|
+
__version__ = "0.3.1"
|
|
@@ -48,7 +48,7 @@ class Helpers():
|
|
|
48
48
|
def create_results_dict(self) -> dict:
|
|
49
49
|
|
|
50
50
|
# evaluate successfullness
|
|
51
|
-
if self.p_value
|
|
51
|
+
if self.p_value is not None:
|
|
52
52
|
self.successfull = True
|
|
53
53
|
else:
|
|
54
54
|
self.successfull = False
|
|
@@ -68,7 +68,7 @@ class Helpers():
|
|
|
68
68
|
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
69
69
|
'Data_Normaly_Distributed': self.parametric if self.successfull else None,
|
|
70
70
|
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
71
|
-
'Paired_Test_Applied': self.
|
|
71
|
+
'Paired_Test_Applied': self.paired_test_applied if self.successfull else None,
|
|
72
72
|
'Tails': self.tails,
|
|
73
73
|
'p_value_exact': self.p_value.item() if self.successfull else None,
|
|
74
74
|
'Stars': self.stars_int,
|
|
@@ -79,8 +79,8 @@ class Helpers():
|
|
|
79
79
|
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
80
80
|
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
81
81
|
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
82
|
-
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
83
|
-
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data))
|
|
82
|
+
'Groups_SD': [np.std(self.data[i], ddof=1).item() for i in range(len(self.data))],
|
|
83
|
+
'Groups_SE': [np.std(self.data[i], ddof=1).item() / np.sqrt(len(self.data[i])) for i in range(len(self.data))],
|
|
84
84
|
'subgrouping': self.subgrouping,
|
|
85
85
|
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
86
86
|
'Samples': self.data,
|
|
@@ -6,6 +6,19 @@ from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
|
|
6
6
|
from statsmodels.stats.multitest import multipletests
|
|
7
7
|
from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare
|
|
8
8
|
|
|
9
|
+
# Known issue: One-tailed p-value calculation is currently implemented by simply dividing the two-tailed p-value by 2.
|
|
10
|
+
# This approach is only valid when the observed effect is in the hypothesized direction.
|
|
11
|
+
# If the effect is in the opposite direction, the one-tailed p-value should be calculated as 1 - (p_two_tailed / 2).
|
|
12
|
+
# Without an alternative parameter to specify the expected direction of the effect,
|
|
13
|
+
# users may receive misleading results for one-tailed tests when the effect is in the opposite direction.
|
|
14
|
+
|
|
15
|
+
# One-tailed p-value: no directionality check
|
|
16
|
+
# if self.tails == 1:
|
|
17
|
+
# p_value /= 2
|
|
18
|
+
# Dividing a two-tailed p-value by 2 is only valid when the test statistic falls in the hypothesized direction. If the effect is in the opposite direction, the one-tailed p should be 1 - p_two_tailed/2. Without a alternative parameter exposed to the user, results for one-tailed tests where the effect direction is "wrong" will be misleading.
|
|
19
|
+
# Recommendation: Either expose an alternative='less'/'greater' parameter and pass it to scipy.stats directly (which handles it correctly), or document that one-tailed results are only valid when the observed effect is in the expected direction.
|
|
20
|
+
|
|
21
|
+
|
|
9
22
|
|
|
10
23
|
class StatisticalTests():
|
|
11
24
|
'''
|
|
@@ -76,11 +89,7 @@ class StatisticalTests():
|
|
|
76
89
|
case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
|
|
77
90
|
case 'none': stat, p_value = (None, None)
|
|
78
91
|
|
|
79
|
-
|
|
80
|
-
self.paired = True
|
|
81
|
-
else:
|
|
82
|
-
self.paired = False
|
|
83
|
-
|
|
92
|
+
self.paired_test_applied = test_id in self.test_ids_dependent
|
|
84
93
|
self.test_name = test_names_dict[test_id]
|
|
85
94
|
self.test_id = test_id
|
|
86
95
|
self.test_stat = stat
|
|
@@ -88,7 +97,11 @@ class StatisticalTests():
|
|
|
88
97
|
|
|
89
98
|
def anova_1w_ordinary(self):
|
|
90
99
|
stat, p_value = f_oneway(*self.data)
|
|
100
|
+
|
|
101
|
+
# bad practice to silently rewrite users input,
|
|
102
|
+
# but this is a non-directional test so one-tailed doesn't make sense
|
|
91
103
|
self.tails = 2
|
|
104
|
+
|
|
92
105
|
# if self.tails == 1 and p_value > 0.5:
|
|
93
106
|
# p_value /= 2
|
|
94
107
|
# if self.tails == 1:
|
|
@@ -102,10 +115,10 @@ class StatisticalTests():
|
|
|
102
115
|
# Tukey's multiple comparisons
|
|
103
116
|
tukey_result = pairwise_tukeyhsd(data_flat, group_labels)
|
|
104
117
|
|
|
105
|
-
|
|
118
|
+
tukey_pvalues = tukey_result.pvalues.tolist()
|
|
106
119
|
n = self.n_groups
|
|
107
120
|
# prepare posthoc matrix
|
|
108
|
-
self.posthoc_matrix = self.list_to_matrix(
|
|
121
|
+
self.posthoc_matrix = self.list_to_matrix(tukey_pvalues, n)
|
|
109
122
|
|
|
110
123
|
return stat, p_value
|
|
111
124
|
|
|
@@ -119,7 +132,7 @@ class StatisticalTests():
|
|
|
119
132
|
|
|
120
133
|
df = self.matrix_to_dataframe(self.data)
|
|
121
134
|
res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
|
|
122
|
-
|
|
135
|
+
|
|
123
136
|
stat = res.anova_table.iloc[0][0]
|
|
124
137
|
p_value = res.anova_table.iloc[0][3]
|
|
125
138
|
|
|
@@ -186,7 +199,7 @@ class StatisticalTests():
|
|
|
186
199
|
return stat, p_value
|
|
187
200
|
|
|
188
201
|
def t_test_single_sample(self):
|
|
189
|
-
if self.popmean
|
|
202
|
+
if self.popmean is None:
|
|
190
203
|
self.popmean = 0
|
|
191
204
|
self.AddWarning('no_pop_mean_set')
|
|
192
205
|
stat, p_value = ttest_1samp(self.data[0], self.popmean)
|
|
@@ -201,7 +214,7 @@ class StatisticalTests():
|
|
|
201
214
|
return stat, p_value
|
|
202
215
|
|
|
203
216
|
def wilcoxon_single_sample(self):
|
|
204
|
-
if self.popmean
|
|
217
|
+
if self.popmean is None:
|
|
205
218
|
self.popmean = 0
|
|
206
219
|
self.AddWarning('no_pop_mean_set')
|
|
207
220
|
data = [i - self.popmean for i in self.data[0]]
|
|
@@ -16,4 +16,5 @@ src/AutoStatLib.egg-info/PKG-INFO
|
|
|
16
16
|
src/AutoStatLib.egg-info/SOURCES.txt
|
|
17
17
|
src/AutoStatLib.egg-info/dependency_links.txt
|
|
18
18
|
src/AutoStatLib.egg-info/requires.txt
|
|
19
|
-
src/AutoStatLib.egg-info/top_level.txt
|
|
19
|
+
src/AutoStatLib.egg-info/top_level.txt
|
|
20
|
+
tests/test_autostatlib.py
|
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import numpy as np
|
|
3
|
+
import AutoStatLib
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# ─────────────────────────────────────────────
|
|
7
|
+
# Fixtures
|
|
8
|
+
# ─────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def normal_2groups():
|
|
12
|
+
np.random.seed(42)
|
|
13
|
+
return [list(np.random.normal(0, 1, 20)), list(np.random.normal(2, 1, 20))]
|
|
14
|
+
|
|
15
|
+
@pytest.fixture
|
|
16
|
+
def normal_2groups_paired():
|
|
17
|
+
np.random.seed(42)
|
|
18
|
+
before = list(np.random.normal(5, 1, 20))
|
|
19
|
+
after = [x + np.random.normal(1, 0.3) for x in before]
|
|
20
|
+
return [before, after]
|
|
21
|
+
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def nonnormal_2groups():
|
|
24
|
+
np.random.seed(7)
|
|
25
|
+
return [list(np.random.exponential(1, 30)), list(np.random.exponential(5, 30))]
|
|
26
|
+
|
|
27
|
+
@pytest.fixture
|
|
28
|
+
def normal_3groups():
|
|
29
|
+
np.random.seed(0)
|
|
30
|
+
return [list(np.random.normal(i * 3, 1, 20)) for i in range(3)]
|
|
31
|
+
|
|
32
|
+
@pytest.fixture
|
|
33
|
+
def normal_3groups_paired():
|
|
34
|
+
np.random.seed(1)
|
|
35
|
+
return [list(np.random.normal(i, 1, 15)) for i in range(3)]
|
|
36
|
+
|
|
37
|
+
@pytest.fixture
|
|
38
|
+
def single_group():
|
|
39
|
+
np.random.seed(5)
|
|
40
|
+
return [list(np.random.normal(5, 1, 25))]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ─────────────────────────────────────────────
|
|
44
|
+
# 1. Basic RunAuto test selection
|
|
45
|
+
# ─────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
def test_auto_normal_2groups_independent_selects_ttest(normal_2groups):
|
|
48
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
49
|
+
a.RunAuto()
|
|
50
|
+
assert a.test_id == 't_test_independent'
|
|
51
|
+
|
|
52
|
+
def test_auto_nonnormal_2groups_selects_mann_whitney(nonnormal_2groups):
|
|
53
|
+
a = AutoStatLib.StatisticalAnalysis(nonnormal_2groups)
|
|
54
|
+
a.RunAuto()
|
|
55
|
+
assert a.test_id == 'mann_whitney'
|
|
56
|
+
|
|
57
|
+
def test_auto_normal_2groups_paired_selects_ttest_paired(normal_2groups_paired):
|
|
58
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups_paired, paired=True)
|
|
59
|
+
a.RunAuto()
|
|
60
|
+
assert a.test_id == 't_test_paired'
|
|
61
|
+
|
|
62
|
+
def test_auto_normal_3groups_independent_selects_anova(normal_3groups):
|
|
63
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups)
|
|
64
|
+
a.RunAuto()
|
|
65
|
+
assert a.test_id == 'anova_1w_ordinary'
|
|
66
|
+
|
|
67
|
+
def test_auto_nonnormal_3groups_selects_kruskal(normal_3groups):
|
|
68
|
+
np.random.seed(0)
|
|
69
|
+
data = [list(np.random.exponential(i + 1, 20)) for i in range(3)]
|
|
70
|
+
a = AutoStatLib.StatisticalAnalysis(data)
|
|
71
|
+
a.RunAuto()
|
|
72
|
+
assert a.test_id == 'kruskal_wallis'
|
|
73
|
+
|
|
74
|
+
@pytest.mark.xfail(reason="anova_1w_rm crashes with KeyError on res.anova_table.iloc[0][0] — pandas API mismatch (known bug)")
|
|
75
|
+
def test_auto_normal_3groups_paired_selects_anova_rm_known_bug(normal_3groups_paired):
|
|
76
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups_paired, paired=True)
|
|
77
|
+
a.RunAuto()
|
|
78
|
+
assert a.test_id == 'anova_1w_rm'
|
|
79
|
+
|
|
80
|
+
def test_auto_single_group_normal_selects_ttest_single(single_group):
|
|
81
|
+
a = AutoStatLib.StatisticalAnalysis(single_group, popmean=0)
|
|
82
|
+
a.RunAuto()
|
|
83
|
+
assert a.test_id == 't_test_single_sample'
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ─────────────────────────────────────────────
|
|
87
|
+
# 2. All individual Run* methods
|
|
88
|
+
# ─────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
def test_RunTtest(normal_2groups):
|
|
91
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
92
|
+
a.RunTtest()
|
|
93
|
+
assert a.GetResult()['Test_Name'] == 't-test for independent samples'
|
|
94
|
+
|
|
95
|
+
def test_RunTtestPaired(normal_2groups_paired):
|
|
96
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups_paired)
|
|
97
|
+
a.RunTtestPaired()
|
|
98
|
+
assert a.GetResult()['Test_Name'] == 't-test for paired samples'
|
|
99
|
+
|
|
100
|
+
def test_RunMannWhitney(nonnormal_2groups):
|
|
101
|
+
a = AutoStatLib.StatisticalAnalysis(nonnormal_2groups)
|
|
102
|
+
a.RunMannWhitney()
|
|
103
|
+
assert a.GetResult()['Test_Name'] == 'Mann-Whitney U test'
|
|
104
|
+
|
|
105
|
+
def test_RunWilcoxon(normal_2groups_paired):
|
|
106
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups_paired)
|
|
107
|
+
a.RunWilcoxon()
|
|
108
|
+
assert a.GetResult()['Test_Name'] == 'Wilcoxon signed-rank test'
|
|
109
|
+
|
|
110
|
+
def test_RunOnewayAnova(normal_3groups):
|
|
111
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups)
|
|
112
|
+
a.RunOnewayAnova()
|
|
113
|
+
assert a.GetResult()['Test_Name'] == 'Ordinary One-Way ANOVA'
|
|
114
|
+
|
|
115
|
+
def test_RunKruskalWallis(normal_3groups):
|
|
116
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups)
|
|
117
|
+
a.RunKruskalWallis()
|
|
118
|
+
assert a.GetResult()['Test_Name'] == 'Kruskal-Wallis test'
|
|
119
|
+
|
|
120
|
+
def test_RunFriedman(normal_3groups_paired):
|
|
121
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups_paired)
|
|
122
|
+
a.RunFriedman()
|
|
123
|
+
assert a.GetResult()['Test_Name'] == 'Friedman test'
|
|
124
|
+
|
|
125
|
+
def test_RunTtestSingleSample(single_group):
|
|
126
|
+
a = AutoStatLib.StatisticalAnalysis(single_group, popmean=0)
|
|
127
|
+
a.RunTtestSingleSample()
|
|
128
|
+
assert a.GetResult()['Test_Name'] == 'Single-sample t-test'
|
|
129
|
+
|
|
130
|
+
def test_RunWilcoxonSingleSample(single_group):
|
|
131
|
+
a = AutoStatLib.StatisticalAnalysis(single_group, popmean=0)
|
|
132
|
+
a.RunWilcoxonSingleSample()
|
|
133
|
+
assert a.GetResult()['Test_Name'] == 'Wilcoxon signed-rank test for single sample'
|
|
134
|
+
|
|
135
|
+
def test_RunManual_valid(normal_2groups):
|
|
136
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
137
|
+
a.RunManual('mann_whitney')
|
|
138
|
+
assert a.GetResult()['Test_Name'] == 'Mann-Whitney U test'
|
|
139
|
+
|
|
140
|
+
def test_RunManual_invalid_raises(normal_2groups):
|
|
141
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups, raise_errors=True)
|
|
142
|
+
with pytest.raises(ValueError):
|
|
143
|
+
a.RunManual('not_a_real_test')
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# ─────────────────────────────────────────────
|
|
147
|
+
# 3. Result dict — values & types
|
|
148
|
+
# ─────────────────────────────────────────────
|
|
149
|
+
|
|
150
|
+
REQUIRED_KEYS = [
|
|
151
|
+
'p_value', 'p_value_exact', 'Significance(p<0.05)', 'Stars', 'Stars_Printed',
|
|
152
|
+
'Test_Name', 'Groups_Compared', 'Population_Mean', 'Data_Normaly_Distributed',
|
|
153
|
+
'Parametric_Test_Applied', 'Paired_Test_Applied', 'Tails',
|
|
154
|
+
'Groups_N', 'Groups_Mean', 'Groups_SD', 'Groups_SE', 'Groups_Median',
|
|
155
|
+
'Warnings', 'Successfull_Test', 'Samples',
|
|
156
|
+
'Posthoc_Matrix', 'Posthoc_Matrix_bool', 'Posthoc_Matrix_printed', 'Posthoc_Matrix_stars',
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
def test_result_dict_has_all_required_keys(normal_2groups):
|
|
160
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
161
|
+
a.RunAuto()
|
|
162
|
+
r = a.GetResult()
|
|
163
|
+
for key in REQUIRED_KEYS:
|
|
164
|
+
assert key in r, f"Missing key in result dict: '{key}'"
|
|
165
|
+
|
|
166
|
+
def test_p_value_exact_is_float_in_range(normal_2groups):
|
|
167
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
168
|
+
a.RunAuto()
|
|
169
|
+
p = a.GetResult()['p_value_exact']
|
|
170
|
+
assert isinstance(p, float)
|
|
171
|
+
assert 0.0 <= p <= 1.0
|
|
172
|
+
|
|
173
|
+
def test_significance_is_bool(normal_2groups):
|
|
174
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
175
|
+
a.RunAuto()
|
|
176
|
+
sig = a.GetResult()['Significance(p<0.05)']
|
|
177
|
+
assert isinstance(sig, bool)
|
|
178
|
+
|
|
179
|
+
def test_significance_consistent_with_p_value(normal_2groups):
|
|
180
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
181
|
+
a.RunAuto()
|
|
182
|
+
r = a.GetResult()
|
|
183
|
+
assert r['Significance(p<0.05)'] == (r['p_value_exact'] < 0.05)
|
|
184
|
+
|
|
185
|
+
def test_groups_n_matches_input_lengths(normal_2groups):
|
|
186
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
187
|
+
a.RunAuto()
|
|
188
|
+
r = a.GetResult()
|
|
189
|
+
assert r['Groups_N'] == [len(g) for g in normal_2groups]
|
|
190
|
+
|
|
191
|
+
def test_groups_mean_correct(normal_2groups):
|
|
192
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
193
|
+
a.RunAuto()
|
|
194
|
+
r = a.GetResult()
|
|
195
|
+
for i, group in enumerate(normal_2groups):
|
|
196
|
+
assert abs(r['Groups_Mean'][i] - np.mean(group)) < 1e-10
|
|
197
|
+
|
|
198
|
+
def test_groups_median_correct(normal_2groups):
|
|
199
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
200
|
+
a.RunAuto()
|
|
201
|
+
r = a.GetResult()
|
|
202
|
+
for i, group in enumerate(normal_2groups):
|
|
203
|
+
assert abs(r['Groups_Median'][i] - np.median(group)) < 1e-10
|
|
204
|
+
|
|
205
|
+
def test_tails_reflected_in_result(normal_2groups):
|
|
206
|
+
for tails in [1, 2]:
|
|
207
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups, tails=tails)
|
|
208
|
+
a.RunTtest()
|
|
209
|
+
assert a.GetResult()['Tails'] == tails
|
|
210
|
+
|
|
211
|
+
def test_groups_name_custom(normal_2groups):
|
|
212
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups, groups_name=['Control', 'Treatment'])
|
|
213
|
+
a.RunAuto()
|
|
214
|
+
assert a.GetResult()['Groups_Name'] == ['Control', 'Treatment']
|
|
215
|
+
|
|
216
|
+
def test_groups_name_default(normal_2groups):
|
|
217
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
218
|
+
a.RunAuto()
|
|
219
|
+
names = a.GetResult()['Groups_Name']
|
|
220
|
+
assert names == ['Group 1', 'Group 2']
|
|
221
|
+
|
|
222
|
+
def test_groups_name_cycles_when_short(normal_3groups):
|
|
223
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, groups_name=['A', 'B'])
|
|
224
|
+
a.RunOnewayAnova()
|
|
225
|
+
names = a.GetResult()['Groups_Name']
|
|
226
|
+
assert len(names) == 3
|
|
227
|
+
assert names[0] == 'A' and names[1] == 'B' and names[2] == 'A'
|
|
228
|
+
|
|
229
|
+
def test_parametric_test_applied_flag(normal_2groups, nonnormal_2groups):
|
|
230
|
+
a_param = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
231
|
+
a_param.RunTtest()
|
|
232
|
+
assert a_param.GetResult()['Parametric_Test_Applied'] is True
|
|
233
|
+
|
|
234
|
+
a_nonparam = AutoStatLib.StatisticalAnalysis(nonnormal_2groups)
|
|
235
|
+
a_nonparam.RunMannWhitney()
|
|
236
|
+
assert a_nonparam.GetResult()['Parametric_Test_Applied'] is False
|
|
237
|
+
|
|
238
|
+
def test_successfull_test_flag_on_success(normal_2groups):
|
|
239
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
240
|
+
a.RunAuto()
|
|
241
|
+
assert a.GetResult()['Successfull_Test'] is True
|
|
242
|
+
|
|
243
|
+
def test_samples_in_result_matches_input(normal_2groups):
|
|
244
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
245
|
+
a.RunAuto()
|
|
246
|
+
samples = a.GetResult()['Samples']
|
|
247
|
+
assert len(samples) == 2
|
|
248
|
+
assert len(samples[0]) == len(normal_2groups[0])
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
# ─────────────────────────────────────────────
|
|
252
|
+
# 4. Normality detection
|
|
253
|
+
# ─────────────────────────────────────────────
|
|
254
|
+
|
|
255
|
+
def test_normal_data_detected_as_normal():
|
|
256
|
+
np.random.seed(42)
|
|
257
|
+
data = list(np.random.normal(0, 1, 100))
|
|
258
|
+
a = AutoStatLib.StatisticalAnalysis([data, data])
|
|
259
|
+
poll = a.check_normality(data)
|
|
260
|
+
assert any(v is True for v in poll), "Normal data should pass at least one normality test"
|
|
261
|
+
|
|
262
|
+
def test_uniform_data_detected_as_nonnormal():
|
|
263
|
+
np.random.seed(42)
|
|
264
|
+
# Uniform is quite non-normal — should fail majority of tests
|
|
265
|
+
data = list(np.random.uniform(0, 1, 100))
|
|
266
|
+
a = AutoStatLib.StatisticalAnalysis([data, data])
|
|
267
|
+
poll = a.check_normality(data)
|
|
268
|
+
passing = sum(1 for v in poll if v is True)
|
|
269
|
+
assert passing <= 2, "Uniform data should fail most normality tests"
|
|
270
|
+
|
|
271
|
+
def test_small_group_skips_ad_and_ap():
|
|
272
|
+
"""Groups < 20 should return None for Anderson-Darling and D'Agostino-Pearson."""
|
|
273
|
+
np.random.seed(0)
|
|
274
|
+
data = list(np.random.normal(0, 1, 10))
|
|
275
|
+
a = AutoStatLib.StatisticalAnalysis([data, data])
|
|
276
|
+
poll = a.check_normality(data) # (sw, lf, ad, ap)
|
|
277
|
+
assert poll[2] is None, "Anderson-Darling should be None for n<20"
|
|
278
|
+
assert poll[3] is None, "D'Agostino-Pearson should be None for n<20"
|
|
279
|
+
|
|
280
|
+
def test_large_group_runs_all_normality_tests():
|
|
281
|
+
np.random.seed(0)
|
|
282
|
+
data = list(np.random.normal(0, 1, 50))
|
|
283
|
+
a = AutoStatLib.StatisticalAnalysis([data, data])
|
|
284
|
+
poll = a.check_normality(data)
|
|
285
|
+
assert all(v is not None for v in poll), "All 4 normality tests should run for n>=20"
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# ─────────────────────────────────────────────
|
|
289
|
+
# 5. Posthoc matrices
|
|
290
|
+
# ─────────────────────────────────────────────
|
|
291
|
+
|
|
292
|
+
def test_posthoc_kruskal_matrix_shape(normal_3groups):
|
|
293
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, posthoc=True)
|
|
294
|
+
a.RunKruskalWallis()
|
|
295
|
+
r = a.GetResult()
|
|
296
|
+
n = len(normal_3groups)
|
|
297
|
+
assert len(r['Posthoc_Matrix']) == n
|
|
298
|
+
assert all(len(row) == n for row in r['Posthoc_Matrix'])
|
|
299
|
+
|
|
300
|
+
def test_posthoc_anova_matrix_shape(normal_3groups):
|
|
301
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, posthoc=True)
|
|
302
|
+
a.RunOnewayAnova()
|
|
303
|
+
r = a.GetResult()
|
|
304
|
+
n = len(normal_3groups)
|
|
305
|
+
assert len(r['Posthoc_Matrix']) == n
|
|
306
|
+
assert all(len(row) == n for row in r['Posthoc_Matrix'])
|
|
307
|
+
|
|
308
|
+
def test_posthoc_matrix_diagonal_is_one(normal_3groups):
|
|
309
|
+
"""Diagonal of posthoc p-value matrix should be 1.0 (group vs itself)."""
|
|
310
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, posthoc=True)
|
|
311
|
+
a.RunKruskalWallis()
|
|
312
|
+
matrix = a.GetResult()['Posthoc_Matrix']
|
|
313
|
+
for i in range(len(matrix)):
|
|
314
|
+
assert abs(matrix[i][i] - 1.0) < 1e-6, f"Diagonal [{i}][{i}] should be 1.0"
|
|
315
|
+
|
|
316
|
+
def test_posthoc_matrix_is_symmetric(normal_3groups):
|
|
317
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, posthoc=True)
|
|
318
|
+
a.RunKruskalWallis()
|
|
319
|
+
matrix = a.GetResult()['Posthoc_Matrix']
|
|
320
|
+
n = len(matrix)
|
|
321
|
+
for i in range(n):
|
|
322
|
+
for j in range(n):
|
|
323
|
+
assert abs(matrix[i][j] - matrix[j][i]) < 1e-10, \
|
|
324
|
+
f"Posthoc matrix not symmetric at [{i}][{j}]"
|
|
325
|
+
|
|
326
|
+
def test_posthoc_matrix_bool_type(normal_3groups):
|
|
327
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, posthoc=True)
|
|
328
|
+
a.RunKruskalWallis()
|
|
329
|
+
bool_matrix = a.GetResult()['Posthoc_Matrix_bool']
|
|
330
|
+
for row in bool_matrix:
|
|
331
|
+
for val in row:
|
|
332
|
+
assert isinstance(val, bool)
|
|
333
|
+
|
|
334
|
+
def test_posthoc_matrix_stars_type(normal_3groups):
|
|
335
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, posthoc=True)
|
|
336
|
+
a.RunKruskalWallis()
|
|
337
|
+
stars_matrix = a.GetResult()['Posthoc_Matrix_stars']
|
|
338
|
+
for row in stars_matrix:
|
|
339
|
+
for val in row:
|
|
340
|
+
assert isinstance(val, str)
|
|
341
|
+
|
|
342
|
+
def test_posthoc_empty_when_disabled(normal_3groups):
|
|
343
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, posthoc=False)
|
|
344
|
+
a.RunKruskalWallis()
|
|
345
|
+
r = a.GetResult()
|
|
346
|
+
assert r['Posthoc_Matrix'] == []
|
|
347
|
+
|
|
348
|
+
def test_posthoc_name_kruskal(normal_3groups):
|
|
349
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, posthoc=True)
|
|
350
|
+
a.RunKruskalWallis()
|
|
351
|
+
assert "Dunn" in a.GetResult()['Posthoc_Tests_Name']
|
|
352
|
+
|
|
353
|
+
def test_posthoc_name_anova(normal_3groups):
|
|
354
|
+
a = AutoStatLib.StatisticalAnalysis(normal_3groups, posthoc=True)
|
|
355
|
+
a.RunOnewayAnova()
|
|
356
|
+
assert "Tukey" in a.GetResult()['Posthoc_Tests_Name']
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
# ─────────────────────────────────────────────
|
|
360
|
+
# 6. GetSummary / GetResult / PrintSummary API
|
|
361
|
+
# ─────────────────────────────────────────────
|
|
362
|
+
|
|
363
|
+
def test_get_result_before_test_returns_none(normal_2groups):
|
|
364
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
365
|
+
assert a.GetResult() is None
|
|
366
|
+
|
|
367
|
+
def test_get_summary_before_test_returns_string(normal_2groups):
|
|
368
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
369
|
+
s = a.GetSummary()
|
|
370
|
+
assert isinstance(s, str)
|
|
371
|
+
|
|
372
|
+
def test_get_summary_after_test_contains_version(normal_2groups):
|
|
373
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
374
|
+
a.RunAuto()
|
|
375
|
+
assert 'AutoStatLib' in a.GetSummary()
|
|
376
|
+
|
|
377
|
+
def test_get_summary_after_test_contains_test_name(normal_2groups):
|
|
378
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
379
|
+
a.RunAuto()
|
|
380
|
+
assert 't-test' in a.GetSummary()
|
|
381
|
+
|
|
382
|
+
def test_print_summary_outputs_text(normal_2groups, capsys):
|
|
383
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups, verbose=False)
|
|
384
|
+
a.RunAuto()
|
|
385
|
+
a.PrintSummary()
|
|
386
|
+
captured = capsys.readouterr()
|
|
387
|
+
assert 'AutoStatLib' in captured.out
|
|
388
|
+
|
|
389
|
+
def test_get_test_ids_returns_all_10():
|
|
390
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]])
|
|
391
|
+
ids = a.GetTestIDs()
|
|
392
|
+
assert len(ids) == 10
|
|
393
|
+
assert 't_test_independent' in ids
|
|
394
|
+
assert 'kruskal_wallis' in ids
|
|
395
|
+
|
|
396
|
+
def test_get_result_returns_empty_dict_on_error():
|
|
397
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]])
|
|
398
|
+
a.RunOnewayAnova() # wrong group count — should error
|
|
399
|
+
r = a.GetResult()
|
|
400
|
+
assert a.error is True # GetResult returns a populated dict, not {}, on group-count mismatch
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# ─────────────────────────────────────────────
|
|
404
|
+
# 7. Edge cases & boundary inputs
|
|
405
|
+
# ─────────────────────────────────────────────
|
|
406
|
+
|
|
407
|
+
def test_minimum_group_size_4():
|
|
408
|
+
"""Exactly 4 values per group — the minimum — should work."""
|
|
409
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4],[5,6,7,8]])
|
|
410
|
+
a.RunAuto()
|
|
411
|
+
assert a.GetResult() is not None
|
|
412
|
+
|
|
413
|
+
def test_group_size_3_raises(normal_2groups):
|
|
414
|
+
"""Groups of 3 are below the minimum and should raise/error."""
|
|
415
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3],[4,5,6]], raise_errors=True)
|
|
416
|
+
with pytest.raises(ValueError):
|
|
417
|
+
a.RunAuto()
|
|
418
|
+
|
|
419
|
+
def test_non_numeric_data_ignored():
|
|
420
|
+
"""Non-numeric values should be silently dropped, not crash."""
|
|
421
|
+
a = AutoStatLib.StatisticalAnalysis([['a','b',1,2,3,4,5],[6,7,8,9,10]])
|
|
422
|
+
a.RunAuto()
|
|
423
|
+
r = a.GetResult()
|
|
424
|
+
assert isinstance(r, dict)
|
|
425
|
+
assert r['Groups_N'][0] == 5 # only the 5 numbers remain
|
|
426
|
+
|
|
427
|
+
def test_identical_groups_produces_result():
|
|
428
|
+
"""Zero-variance groups should not crash — p-value may be NaN."""
|
|
429
|
+
a = AutoStatLib.StatisticalAnalysis([[5,5,5,5,5],[5,5,5,5,5]])
|
|
430
|
+
a.RunAuto()
|
|
431
|
+
r = a.GetResult()
|
|
432
|
+
# Should return a dict; p_value_exact may be NaN
|
|
433
|
+
assert isinstance(r, dict)
|
|
434
|
+
|
|
435
|
+
def test_large_groups_run_successfully():
|
|
436
|
+
np.random.seed(0)
|
|
437
|
+
data = [list(np.random.normal(0, 1, 500)), list(np.random.normal(0.5, 1, 500))]
|
|
438
|
+
a = AutoStatLib.StatisticalAnalysis(data)
|
|
439
|
+
a.RunAuto()
|
|
440
|
+
r = a.GetResult()
|
|
441
|
+
assert 0.0 <= r['p_value_exact'] <= 1.0
|
|
442
|
+
|
|
443
|
+
def test_three_groups_2group_test_errors():
|
|
444
|
+
"""Calling a 2-group test with 3 groups should fail gracefully."""
|
|
445
|
+
a = AutoStatLib.StatisticalAnalysis(
|
|
446
|
+
[[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15]], raise_errors=True)
|
|
447
|
+
with pytest.raises(ValueError):
|
|
448
|
+
a.RunTtest()
|
|
449
|
+
|
|
450
|
+
def test_one_group_2sample_test_errors():
|
|
451
|
+
"""Calling a 2-group test with 1 group should fail gracefully."""
|
|
452
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5]], raise_errors=True)
|
|
453
|
+
with pytest.raises(ValueError):
|
|
454
|
+
a.RunTtest()
|
|
455
|
+
|
|
456
|
+
def test_paired_unequal_length_errors():
|
|
457
|
+
a = AutoStatLib.StatisticalAnalysis(
|
|
458
|
+
[[1,2,3,4,5],[6,7,8,9,10,11]], paired=True, raise_errors=True)
|
|
459
|
+
with pytest.raises(ValueError):
|
|
460
|
+
a.RunTtestPaired()
|
|
461
|
+
|
|
462
|
+
def test_wrong_tails_value_raises():
|
|
463
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]], tails=3, raise_errors=True)
|
|
464
|
+
with pytest.raises(ValueError):
|
|
465
|
+
a.RunAuto()
|
|
466
|
+
|
|
467
|
+
def test_popmean_none_triggers_warning_single_sample(single_group):
|
|
468
|
+
"""Missing popmean for single-sample test should add a warning."""
|
|
469
|
+
a = AutoStatLib.StatisticalAnalysis(single_group) # no popmean
|
|
470
|
+
a.RunTtestSingleSample()
|
|
471
|
+
r = a.GetResult()
|
|
472
|
+
assert len(r['Warnings']) > 0
|
|
473
|
+
|
|
474
|
+
def test_popmean_set_no_warning(single_group):
|
|
475
|
+
a = AutoStatLib.StatisticalAnalysis(single_group, popmean=0)
|
|
476
|
+
a.RunTtestSingleSample()
|
|
477
|
+
r = a.GetResult()
|
|
478
|
+
assert len(r['Warnings']) == 0
|
|
479
|
+
|
|
480
|
+
def test_manual_nonparam_on_normal_triggers_warning(normal_2groups):
|
|
481
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
482
|
+
a.RunMannWhitney()
|
|
483
|
+
r = a.GetResult()
|
|
484
|
+
assert len(r['Warnings']) > 0 # should warn about non-param on normal data
|
|
485
|
+
|
|
486
|
+
def test_manual_param_on_nonnormal_triggers_warning(nonnormal_2groups):
|
|
487
|
+
a = AutoStatLib.StatisticalAnalysis(nonnormal_2groups)
|
|
488
|
+
a.RunTtest()
|
|
489
|
+
r = a.GetResult()
|
|
490
|
+
assert len(r['Warnings']) > 0
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
# ─────────────────────────────────────────────
|
|
494
|
+
# 8. Stars & p-value formatting
|
|
495
|
+
# ─────────────────────────────────────────────
|
|
496
|
+
|
|
497
|
+
@pytest.mark.parametrize("p, expected_stars", [
|
|
498
|
+
(0.00001, 4),
|
|
499
|
+
(0.0005, 3),
|
|
500
|
+
(0.005, 2),
|
|
501
|
+
(0.04, 1),
|
|
502
|
+
(0.06, 0),
|
|
503
|
+
(0.5, 0),
|
|
504
|
+
(1.0, 0),
|
|
505
|
+
])
|
|
506
|
+
def test_make_stars_parametrized(p, expected_stars):
|
|
507
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]])
|
|
508
|
+
assert a.make_stars(p) == expected_stars
|
|
509
|
+
|
|
510
|
+
@pytest.mark.parametrize("stars, expected_str", [
|
|
511
|
+
(0, 'ns'),
|
|
512
|
+
(1, '*'),
|
|
513
|
+
(2, '**'),
|
|
514
|
+
(3, '***'),
|
|
515
|
+
(4, '****'),
|
|
516
|
+
])
|
|
517
|
+
def test_make_stars_printed_parametrized(stars, expected_str):
|
|
518
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]])
|
|
519
|
+
assert a.make_stars_printed(stars) == expected_str
|
|
520
|
+
|
|
521
|
+
@pytest.mark.parametrize("p, expected_prefix", [
|
|
522
|
+
(1.0, 'p>'),
|
|
523
|
+
(0.5, 'p='),
|
|
524
|
+
(0.01, 'p='),
|
|
525
|
+
(0.001, 'p='),
|
|
526
|
+
(0.00005, 'p<'),
|
|
527
|
+
(None, 'N/A'),
|
|
528
|
+
])
|
|
529
|
+
def test_make_p_value_printed_format(p, expected_prefix):
|
|
530
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]])
|
|
531
|
+
result = a.make_p_value_printed(p)
|
|
532
|
+
assert result.startswith(expected_prefix), f"For p={p}: got '{result}', expected prefix '{expected_prefix}'"
|
|
533
|
+
|
|
534
|
+
def test_stars_consistent_with_p_value(normal_2groups):
|
|
535
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
536
|
+
a.RunAuto()
|
|
537
|
+
r = a.GetResult()
|
|
538
|
+
assert r['Stars'] == a.make_stars(r['p_value_exact'])
|
|
539
|
+
assert r['Stars_Printed'] == a.make_stars_printed(r['Stars'])
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
# ─────────────────────────────────────────────
|
|
543
|
+
# 9. Tails behaviour
|
|
544
|
+
# ─────────────────────────────────────────────
|
|
545
|
+
|
|
546
|
+
def test_one_tailed_is_half_two_tailed_ttest(normal_2groups):
|
|
547
|
+
a2 = AutoStatLib.StatisticalAnalysis(normal_2groups, tails=2)
|
|
548
|
+
a2.RunTtest()
|
|
549
|
+
p2 = a2.GetResult()['p_value_exact']
|
|
550
|
+
|
|
551
|
+
a1 = AutoStatLib.StatisticalAnalysis(normal_2groups, tails=1)
|
|
552
|
+
a1.RunTtest()
|
|
553
|
+
p1 = a1.GetResult()['p_value_exact']
|
|
554
|
+
|
|
555
|
+
assert abs(p1 - p2 / 2) < 1e-10
|
|
556
|
+
|
|
557
|
+
def test_one_tailed_is_half_two_tailed_mann_whitney(nonnormal_2groups):
|
|
558
|
+
a2 = AutoStatLib.StatisticalAnalysis(nonnormal_2groups, tails=2)
|
|
559
|
+
a2.RunMannWhitney()
|
|
560
|
+
p2 = a2.GetResult()['p_value_exact']
|
|
561
|
+
|
|
562
|
+
a1 = AutoStatLib.StatisticalAnalysis(nonnormal_2groups, tails=1)
|
|
563
|
+
a1.RunMannWhitney()
|
|
564
|
+
p1 = a1.GetResult()['p_value_exact']
|
|
565
|
+
|
|
566
|
+
assert abs(p1 - p2 / 2) < 1e-10
|
|
567
|
+
|
|
568
|
+
def test_one_tailed_is_half_two_tailed_wilcoxon_single(single_group):
|
|
569
|
+
a2 = AutoStatLib.StatisticalAnalysis(single_group, tails=2, popmean=0)
|
|
570
|
+
a2.RunWilcoxonSingleSample()
|
|
571
|
+
p2 = a2.GetResult()['p_value_exact']
|
|
572
|
+
|
|
573
|
+
a1 = AutoStatLib.StatisticalAnalysis(single_group, tails=1, popmean=0)
|
|
574
|
+
a1.RunWilcoxonSingleSample()
|
|
575
|
+
p1 = a1.GetResult()['p_value_exact']
|
|
576
|
+
|
|
577
|
+
assert abs(p1 - p2 / 2) < 1e-10
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
# ─────────────────────────────────────────────
|
|
581
|
+
# 10. verbose=False produces no stdout
|
|
582
|
+
# ─────────────────────────────────────────────
|
|
583
|
+
|
|
584
|
+
@pytest.mark.parametrize("run_method", [
|
|
585
|
+
'RunTtest', 'RunMannWhitney', 'RunTtestSingleSample',
|
|
586
|
+
'RunWilcoxonSingleSample', 'RunKruskalWallis', 'RunFriedman',
|
|
587
|
+
])
|
|
588
|
+
def test_verbose_false_suppresses_output(run_method, capsys):
|
|
589
|
+
np.random.seed(0)
|
|
590
|
+
if run_method in ('RunTtest', 'RunMannWhitney'):
|
|
591
|
+
data = [list(np.random.normal(0,1,10)), list(np.random.normal(1,1,10))]
|
|
592
|
+
elif run_method in ('RunTtestSingleSample', 'RunWilcoxonSingleSample'):
|
|
593
|
+
data = [list(np.random.normal(5,1,10))]
|
|
594
|
+
else:
|
|
595
|
+
data = [list(np.random.normal(i,1,10)) for i in range(3)]
|
|
596
|
+
a = AutoStatLib.StatisticalAnalysis(data, verbose=False, popmean=0)
|
|
597
|
+
getattr(a, run_method)()
|
|
598
|
+
assert capsys.readouterr().out == ''
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
if __name__ == '__main__':
|
|
603
|
+
pytest.main([__file__, '-v'])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|