AutoStatLib 0.2.26__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {autostatlib-0.2.26/src/AutoStatLib.egg-info → autostatlib-0.3.0}/PKG-INFO +1 -1
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/AutoStatLib.py +19 -10
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/StatPlots.py +35 -25
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/_version.py +1 -1
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/helpers.py +4 -4
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/statistical_tests.py +16 -10
- {autostatlib-0.2.26 → autostatlib-0.3.0/src/AutoStatLib.egg-info}/PKG-INFO +1 -1
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib.egg-info/SOURCES.txt +2 -1
- autostatlib-0.3.0/tests/test_autostatlib.py +148 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/LICENSE +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/MANIFEST.in +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/README.md +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/pyproject.toml +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/requirements.txt +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/setup.cfg +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/__init__.py +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/__main__.py +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/normality_tests.py +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib/text_formatting.py +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib.egg-info/dependency_links.txt +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib.egg-info/requires.txt +0 -0
- {autostatlib-0.2.26 → autostatlib-0.3.0}/src/AutoStatLib.egg-info/top_level.txt +0 -0
|
@@ -20,10 +20,9 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
20
20
|
posthoc=False,
|
|
21
21
|
verbose=True,
|
|
22
22
|
raise_errors=False,
|
|
23
|
-
groups_name=
|
|
24
|
-
subgrouping=
|
|
25
|
-
|
|
26
|
-
self.error = False
|
|
23
|
+
groups_name=None,
|
|
24
|
+
subgrouping=None):
|
|
25
|
+
|
|
27
26
|
self.groups_list = groups_list
|
|
28
27
|
self.paired = paired
|
|
29
28
|
self.tails = tails
|
|
@@ -34,7 +33,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
34
33
|
self.n_groups = len(self.groups_list)
|
|
35
34
|
self.groups_name = [groups_name[i % len(groups_name)]
|
|
36
35
|
for i in range(self.n_groups)] if groups_name and groups_name != [''] else [f'Group {i+1}' for i in range(self.n_groups)]
|
|
37
|
-
self.subgrouping = subgrouping if subgrouping else [0]
|
|
36
|
+
self.subgrouping = subgrouping if subgrouping is not None else [0]
|
|
38
37
|
self.warning_flag_non_numeric_data = False
|
|
39
38
|
self.summary = 'AutoStatLib v{}'.format(__version__)
|
|
40
39
|
|
|
@@ -68,7 +67,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
68
67
|
]
|
|
69
68
|
self.test_ids_parametric = [
|
|
70
69
|
'anova_1w_ordinary',
|
|
71
|
-
'anova_1w_rm'
|
|
70
|
+
'anova_1w_rm',
|
|
72
71
|
't_test_independent',
|
|
73
72
|
't_test_paired',
|
|
74
73
|
't_test_single_sample',
|
|
@@ -100,6 +99,8 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
100
99
|
'param_test_with_non-normal_data': '\nWarning: Parametric test was manualy chosen for Not-Normaly distributed data.\n The results might be skewed. \n Please, run non-parametric test or preform automatic test selection.\n',
|
|
101
100
|
'non-param_test_with_normal_data': '\nWarning: Non-Parametric test was manualy chosen for Normaly distributed data.\n The results might be skewed. \n Please, run parametric test or preform automatic test selection.\n',
|
|
102
101
|
'no_pop_mean_set': '\nWarning: No Population Mean was set up for single-sample test, used default 0 value.\n The results might be skewed. \n Please, set the Population Mean and run the test again.\n',
|
|
102
|
+
'paired_test_with_independend_samples': '\nWarning: A paired test was manually selected, even though the samples were declared independent.\n The results might be skewed. \n Please, run test for independend samples or preform automatic test selection.\n',
|
|
103
|
+
'independend_test_with_paired_samples': '\nWarning: An independent test was manually selected, even though the samples were declared paired.\n The results might be skewed. \n Please, run test for paired samples or preform automatic test selection.\n',
|
|
103
104
|
}
|
|
104
105
|
|
|
105
106
|
def run_test(self, test='auto'):
|
|
@@ -131,6 +132,8 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
131
132
|
# delete the empty cols from input
|
|
132
133
|
self.data = [col for col in self.data if any(
|
|
133
134
|
x is not None for x in col)]
|
|
135
|
+
# re-calculate the number of groups after removing empty cols
|
|
136
|
+
self.n_groups = len(self.data)
|
|
134
137
|
|
|
135
138
|
# User input assertion block
|
|
136
139
|
try:
|
|
@@ -139,7 +142,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
139
142
|
assert test in self.test_ids_all or test == 'auto', 'Wrong test id choosen, ensure you called correct function'
|
|
140
143
|
assert all(len(
|
|
141
144
|
group) >= 4 for group in self.data), 'Each group must contain at least four values'
|
|
142
|
-
assert not (self.paired is True
|
|
145
|
+
assert not (test in self.test_ids_dependent # self.paired is True
|
|
143
146
|
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Paired samples must have the same length'
|
|
144
147
|
assert not (test in self.test_ids_dependent
|
|
145
148
|
and not all(len(lst) == len(self.data[0]) for lst in self.data)), 'Samples must have the same length for the dependend statistics test'
|
|
@@ -203,8 +206,14 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
203
206
|
if test != 'auto' and self.parametric and test not in self.test_ids_parametric:
|
|
204
207
|
self.AddWarning('non-param_test_with_normal_data')
|
|
205
208
|
|
|
206
|
-
#
|
|
209
|
+
# Maybe unneeded checks for manually selected tests
|
|
210
|
+
# because user propably know what test they selected
|
|
211
|
+
if test != 'auto' and not self.paired and test in self.test_ids_dependent:
|
|
212
|
+
self.AddWarning('paired_test_with_independend_samples')
|
|
213
|
+
if test != 'auto' and self.paired and test not in self.test_ids_dependent:
|
|
214
|
+
self.AddWarning('independend_test_with_paired_samples')
|
|
207
215
|
|
|
216
|
+
# run the test
|
|
208
217
|
if test in self.test_ids_all:
|
|
209
218
|
self.run_test_by_id(test)
|
|
210
219
|
else:
|
|
@@ -260,7 +269,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
260
269
|
self.run_test(test='wilcoxon')
|
|
261
270
|
|
|
262
271
|
def GetResult(self):
|
|
263
|
-
if
|
|
272
|
+
if self.results is None and not self.error:
|
|
264
273
|
print('No test chosen, no results to output')
|
|
265
274
|
# self.run_test(test='auto')
|
|
266
275
|
return self.results
|
|
@@ -271,7 +280,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
|
|
|
271
280
|
return self.results
|
|
272
281
|
|
|
273
282
|
def GetSummary(self):
|
|
274
|
-
if
|
|
283
|
+
if self.results is None and not self.error:
|
|
275
284
|
print('No test chosen, no summary to output')
|
|
276
285
|
# self.run_test(test='auto')
|
|
277
286
|
return self.summary
|
|
@@ -145,8 +145,8 @@ class BaseStatPlot(Helpers):
|
|
|
145
145
|
self.median = [
|
|
146
146
|
np.median(self.data_groups[i]).item() for i in range(self.n_groups)]
|
|
147
147
|
self.sd = [
|
|
148
|
-
np.std(self.data_groups[i]).item() for i in range(self.n_groups)]
|
|
149
|
-
self.sem = [np.std(self.data_groups[i]).item() / np.sqrt(len(self.data_groups[i])).item()
|
|
148
|
+
np.std(self.data_groups[i], ddof=1).item() for i in range(self.n_groups)]
|
|
149
|
+
self.sem = [np.std(self.data_groups[i], ddof=1).item() / np.sqrt(len(self.data_groups[i])).item()
|
|
150
150
|
for i in range(self.n_groups)]
|
|
151
151
|
|
|
152
152
|
self.n = [len(i) for i in self.data_groups]
|
|
@@ -274,65 +274,65 @@ class BaseStatPlot(Helpers):
|
|
|
274
274
|
fliersMarker = 'b+'
|
|
275
275
|
# write a function to make a dictionary
|
|
276
276
|
whiskersCapsStyles = dict()
|
|
277
|
-
if whiskersCapsColor
|
|
277
|
+
if whiskersCapsColor is not None:
|
|
278
278
|
whiskersCapsStyles["color"] = whiskersCapsColor
|
|
279
|
-
if whiskersCapsLineWidth
|
|
279
|
+
if whiskersCapsLineWidth is not None:
|
|
280
280
|
whiskersCapsStyles["linewidth"] = whiskersCapsLineWidth
|
|
281
|
-
if whiskersCapsLineStyle
|
|
281
|
+
if whiskersCapsLineStyle is not None:
|
|
282
282
|
whiskersCapsStyles['linestyle'] = whiskersCapsLineStyle
|
|
283
283
|
|
|
284
284
|
boxProps = {"facecolor": (0, 0, 0, 0),
|
|
285
285
|
"edgecolor": "black", "linewidth": 1}
|
|
286
|
-
if boxFill
|
|
286
|
+
if boxFill is not None:
|
|
287
287
|
boxProps["facecolor"] = boxFill
|
|
288
|
-
if boxBorderColor
|
|
288
|
+
if boxBorderColor is not None:
|
|
289
289
|
boxProps["edgecolor"] = boxBorderColor
|
|
290
|
-
if boxBorderWidth
|
|
290
|
+
if boxBorderWidth is not None:
|
|
291
291
|
boxProps['linewidth'] = boxBorderWidth
|
|
292
|
-
# if boxBorderStyle
|
|
292
|
+
# if boxBorderStyle is not None:
|
|
293
293
|
# boxProps['linestyle'] = boxBorderStyle !!!this feature is not working with patch_artist that is needed for facecolor to work
|
|
294
294
|
|
|
295
295
|
whiskersProps = {"color": 'black',
|
|
296
296
|
"linestyle": "solid", "linewidth": 1}
|
|
297
|
-
if whiskersColor
|
|
297
|
+
if whiskersColor is not None:
|
|
298
298
|
whiskersProps["color"] = whiskersColor
|
|
299
|
-
if whiskersLineStyle
|
|
299
|
+
if whiskersLineStyle is not None:
|
|
300
300
|
whiskersProps["linestyle"] = whiskersLineStyle
|
|
301
|
-
if whiskersLineWidth
|
|
301
|
+
if whiskersLineWidth is not None:
|
|
302
302
|
whiskersProps['linewidth'] = whiskersLineWidth
|
|
303
303
|
|
|
304
304
|
flierProps = {"markerfacecolor": [
|
|
305
305
|
0, 0, 0, 0], "markeredgecolor": "black", "linestyle": "solid", "markeredgewidth": 1}
|
|
306
|
-
if flierFillColor
|
|
306
|
+
if flierFillColor is not None:
|
|
307
307
|
flierProps["markerfacecolor"] = flierFillColor
|
|
308
|
-
if flierEdgeColor
|
|
308
|
+
if flierEdgeColor is not None:
|
|
309
309
|
flierProps["markeredgecolor"] = flierEdgeColor
|
|
310
|
-
if flierLineWidth
|
|
310
|
+
if flierLineWidth is not None:
|
|
311
311
|
flierProps['markeredgewidth'] = flierLineWidth
|
|
312
|
-
if flierLineStyle
|
|
312
|
+
if flierLineStyle is not None:
|
|
313
313
|
flierProps['linestyle'] = flierLineStyle
|
|
314
314
|
medianProps = {"linestyle": 'solid', "linewidth": 1, "color": 'red'}
|
|
315
|
-
if medianColor
|
|
315
|
+
if medianColor is not None:
|
|
316
316
|
medianProps["color"] = medianColor
|
|
317
|
-
if medianLineStyle
|
|
317
|
+
if medianLineStyle is not None:
|
|
318
318
|
medianProps["linestyle"] = medianLineStyle
|
|
319
|
-
if medianLineWidth
|
|
319
|
+
if medianLineWidth is not None:
|
|
320
320
|
medianProps['linewidth'] = medianLineWidth
|
|
321
321
|
|
|
322
322
|
meanProps = {"color": "black", "marker": 'o', "markerfacecolor": "black",
|
|
323
323
|
"markeredgecolor": "black", "linestyle": "solid", "linewidth": 1}
|
|
324
324
|
|
|
325
|
-
if meanMarker
|
|
325
|
+
if meanMarker is not None:
|
|
326
326
|
meanProps['marker'] = meanMarker
|
|
327
|
-
if meanFillColor
|
|
327
|
+
if meanFillColor is not None:
|
|
328
328
|
meanProps["markerfacecolor"] = meanFillColor
|
|
329
|
-
if meanEdgeColor
|
|
329
|
+
if meanEdgeColor is not None:
|
|
330
330
|
meanProps['markeredgecolor'] = meanEdgeColor
|
|
331
|
-
if meanLineColor
|
|
331
|
+
if meanLineColor is not None:
|
|
332
332
|
meanProps["color"] = meanLineColor
|
|
333
|
-
if meanLineStyle
|
|
333
|
+
if meanLineStyle is not None:
|
|
334
334
|
meanProps['linestyle'] = meanLineStyle
|
|
335
|
-
if meanLineWidth
|
|
335
|
+
if meanLineWidth is not None:
|
|
336
336
|
meanProps['linewidth'] = meanLineWidth
|
|
337
337
|
|
|
338
338
|
bplot = ax.boxplot(self.data_groups,
|
|
@@ -901,6 +901,16 @@ class SwarmStatPlot(BaseStatPlot):
|
|
|
901
901
|
|
|
902
902
|
|
|
903
903
|
class SwarmStatPlot_subgrouping_betta(BaseStatPlot):
|
|
904
|
+
'''
|
|
905
|
+
Swarm plot with subgrouping support. Subgrouping is defined by the user as a list of labels (one per data point)
|
|
906
|
+
that indicate which subgroup each data point belongs to.
|
|
907
|
+
The plot will automatically assign different colors to each unique subgroup label,
|
|
908
|
+
and add a legend to indicate which color corresponds to which subgroup.
|
|
909
|
+
Not tested well, use with caution.
|
|
910
|
+
For now, only supports one subgrouping across all groups,
|
|
911
|
+
so the subgrouping list should have the same length as the total number of data points across all groups.
|
|
912
|
+
'''
|
|
913
|
+
|
|
904
914
|
|
|
905
915
|
def plot(self, linewidth=1.8):
|
|
906
916
|
if not self.error:
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# AutoStatLib package version:
|
|
2
|
-
__version__ = "0.
|
|
2
|
+
__version__ = "0.3.0"
|
|
@@ -48,7 +48,7 @@ class Helpers():
|
|
|
48
48
|
def create_results_dict(self) -> dict:
|
|
49
49
|
|
|
50
50
|
# evaluate successfullness
|
|
51
|
-
if self.p_value
|
|
51
|
+
if self.p_value is not None:
|
|
52
52
|
self.successfull = True
|
|
53
53
|
else:
|
|
54
54
|
self.successfull = False
|
|
@@ -68,7 +68,7 @@ class Helpers():
|
|
|
68
68
|
'Population_Mean': self.popmean if self.n_groups == 1 else 'N/A',
|
|
69
69
|
'Data_Normaly_Distributed': self.parametric if self.successfull else None,
|
|
70
70
|
'Parametric_Test_Applied': True if self.test_id in self.test_ids_parametric else False,
|
|
71
|
-
'Paired_Test_Applied': self.
|
|
71
|
+
'Paired_Test_Applied': self.paired_test_applied if self.successfull else None,
|
|
72
72
|
'Tails': self.tails,
|
|
73
73
|
'p_value_exact': self.p_value.item() if self.successfull else None,
|
|
74
74
|
'Stars': self.stars_int,
|
|
@@ -79,8 +79,8 @@ class Helpers():
|
|
|
79
79
|
'Groups_N': [len(self.data[i]) for i in range(len(self.data))],
|
|
80
80
|
'Groups_Median': [np.median(self.data[i]).item() for i in range(len(self.data))],
|
|
81
81
|
'Groups_Mean': [np.mean(self.data[i]).item() for i in range(len(self.data))],
|
|
82
|
-
'Groups_SD': [np.std(self.data[i]).item() for i in range(len(self.data))],
|
|
83
|
-
'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data))
|
|
82
|
+
'Groups_SD': [np.std(self.data[i], ddof=1).item() for i in range(len(self.data))],
|
|
83
|
+
'Groups_SE': [np.std(self.data[i], ddof=1).item() / np.sqrt(len(self.data[i])) for i in range(len(self.data))],
|
|
84
84
|
'subgrouping': self.subgrouping,
|
|
85
85
|
# actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
|
|
86
86
|
'Samples': self.data,
|
|
@@ -6,6 +6,16 @@ from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
|
|
6
6
|
from statsmodels.stats.multitest import multipletests
|
|
7
7
|
from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare
|
|
8
8
|
|
|
9
|
+
# Known bugs:
|
|
10
|
+
|
|
11
|
+
# One-tailed p-value: no directionality check
|
|
12
|
+
# File: statistical_tests.py — t_test_independent, t_test_paired, mann_whitney, wilcoxon, etc.
|
|
13
|
+
# if self.tails == 1:
|
|
14
|
+
# p_value /= 2
|
|
15
|
+
# Dividing a two-tailed p-value by 2 is only valid when the test statistic falls in the hypothesized direction. If the effect is in the opposite direction, the one-tailed p should be 1 - p_two_tailed/2. Without a alternative parameter exposed to the user, results for one-tailed tests where the effect direction is "wrong" will be misleading.
|
|
16
|
+
# Recommendation: Either expose an alternative='less'/'greater' parameter and pass it to scipy.stats directly (which handles it correctly), or document that one-tailed results are only valid when the observed effect is in the expected direction.
|
|
17
|
+
|
|
18
|
+
|
|
9
19
|
|
|
10
20
|
class StatisticalTests():
|
|
11
21
|
'''
|
|
@@ -76,11 +86,7 @@ class StatisticalTests():
|
|
|
76
86
|
case 'wilcoxon_single_sample': stat, p_value = self.wilcoxon_single_sample()
|
|
77
87
|
case 'none': stat, p_value = (None, None)
|
|
78
88
|
|
|
79
|
-
|
|
80
|
-
self.paired = True
|
|
81
|
-
else:
|
|
82
|
-
self.paired = False
|
|
83
|
-
|
|
89
|
+
self.paired_test_applied = test_id in self.test_ids_dependent
|
|
84
90
|
self.test_name = test_names_dict[test_id]
|
|
85
91
|
self.test_id = test_id
|
|
86
92
|
self.test_stat = stat
|
|
@@ -102,10 +108,10 @@ class StatisticalTests():
|
|
|
102
108
|
# Tukey's multiple comparisons
|
|
103
109
|
tukey_result = pairwise_tukeyhsd(data_flat, group_labels)
|
|
104
110
|
|
|
105
|
-
|
|
111
|
+
tukey_pvalues = tukey_result.pvalues.tolist()
|
|
106
112
|
n = self.n_groups
|
|
107
113
|
# prepare posthoc matrix
|
|
108
|
-
self.posthoc_matrix = self.list_to_matrix(
|
|
114
|
+
self.posthoc_matrix = self.list_to_matrix(tukey_pvalues, n)
|
|
109
115
|
|
|
110
116
|
return stat, p_value
|
|
111
117
|
|
|
@@ -119,7 +125,7 @@ class StatisticalTests():
|
|
|
119
125
|
|
|
120
126
|
df = self.matrix_to_dataframe(self.data)
|
|
121
127
|
res = AnovaRM(df, 'Value', 'Row', within=['Col']).fit()
|
|
122
|
-
|
|
128
|
+
|
|
123
129
|
stat = res.anova_table.iloc[0][0]
|
|
124
130
|
p_value = res.anova_table.iloc[0][3]
|
|
125
131
|
|
|
@@ -186,7 +192,7 @@ class StatisticalTests():
|
|
|
186
192
|
return stat, p_value
|
|
187
193
|
|
|
188
194
|
def t_test_single_sample(self):
|
|
189
|
-
if self.popmean
|
|
195
|
+
if self.popmean is None:
|
|
190
196
|
self.popmean = 0
|
|
191
197
|
self.AddWarning('no_pop_mean_set')
|
|
192
198
|
stat, p_value = ttest_1samp(self.data[0], self.popmean)
|
|
@@ -201,7 +207,7 @@ class StatisticalTests():
|
|
|
201
207
|
return stat, p_value
|
|
202
208
|
|
|
203
209
|
def wilcoxon_single_sample(self):
|
|
204
|
-
if self.popmean
|
|
210
|
+
if self.popmean is None:
|
|
205
211
|
self.popmean = 0
|
|
206
212
|
self.AddWarning('no_pop_mean_set')
|
|
207
213
|
data = [i - self.popmean for i in self.data[0]]
|
|
@@ -16,4 +16,5 @@ src/AutoStatLib.egg-info/PKG-INFO
|
|
|
16
16
|
src/AutoStatLib.egg-info/SOURCES.txt
|
|
17
17
|
src/AutoStatLib.egg-info/dependency_links.txt
|
|
18
18
|
src/AutoStatLib.egg-info/requires.txt
|
|
19
|
-
src/AutoStatLib.egg-info/top_level.txt
|
|
19
|
+
src/AutoStatLib.egg-info/top_level.txt
|
|
20
|
+
tests/test_autostatlib.py
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# tests/test_autostatlib.py
|
|
2
|
+
import pytest
|
|
3
|
+
import numpy as np
|
|
4
|
+
import AutoStatLib
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# --- Fixtures ---
|
|
8
|
+
@pytest.fixture
|
|
9
|
+
def normal_2groups():
|
|
10
|
+
np.random.seed(42)
|
|
11
|
+
return [list(np.random.normal(0, 1, 20)), list(np.random.normal(1, 1, 20))]
|
|
12
|
+
|
|
13
|
+
@pytest.fixture
|
|
14
|
+
def nonnormal_2groups():
|
|
15
|
+
np.random.seed(42)
|
|
16
|
+
return [list(np.random.exponential(1, 20)), list(np.random.exponential(2, 20))]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# --- Basic functionality ---
|
|
20
|
+
def test_run_auto_returns_result(normal_2groups):
|
|
21
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
22
|
+
a.RunAuto()
|
|
23
|
+
r = a.GetResult()
|
|
24
|
+
assert isinstance(r, dict)
|
|
25
|
+
assert 'p_value_exact' in r
|
|
26
|
+
assert 0.0 <= r['p_value_exact'] <= 1.0
|
|
27
|
+
|
|
28
|
+
def test_run_auto_selects_ttest_for_normal(normal_2groups):
|
|
29
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
30
|
+
a.RunAuto()
|
|
31
|
+
assert a.test_id == 't_test_independent'
|
|
32
|
+
|
|
33
|
+
def test_run_auto_selects_mann_whitney_for_nonnormal(nonnormal_2groups):
|
|
34
|
+
a = AutoStatLib.StatisticalAnalysis(nonnormal_2groups)
|
|
35
|
+
a.RunAuto()
|
|
36
|
+
assert a.test_id == 'mann_whitney'
|
|
37
|
+
|
|
38
|
+
def test_verbose_false_no_print(normal_2groups, capsys):
|
|
39
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups, verbose=False)
|
|
40
|
+
a.RunAuto()
|
|
41
|
+
captured = capsys.readouterr()
|
|
42
|
+
assert captured.out == ''
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# --- Result dict completeness ---
|
|
46
|
+
def test_result_dict_keys(normal_2groups):
|
|
47
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
48
|
+
a.RunAuto()
|
|
49
|
+
r = a.GetResult()
|
|
50
|
+
required_keys = [
|
|
51
|
+
'p_value', 'p_value_exact', 'Significance(p<0.05)', 'Stars',
|
|
52
|
+
'Stars_Printed', 'Test_Name', 'Groups_N', 'Groups_Mean',
|
|
53
|
+
'Groups_SD', 'Groups_SE', 'Groups_Median', 'Warnings',
|
|
54
|
+
]
|
|
55
|
+
for key in required_keys:
|
|
56
|
+
assert key in r, f"Missing key: {key}"
|
|
57
|
+
|
|
58
|
+
def test_se_calculation_correct(normal_2groups):
|
|
59
|
+
"""SE = std / sqrt(n) per group, not std / sqrt(num_groups)."""
|
|
60
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
61
|
+
a.RunAuto()
|
|
62
|
+
r = a.GetResult()
|
|
63
|
+
for i, group in enumerate(normal_2groups):
|
|
64
|
+
expected_se = np.std(group, ddof=1) / np.sqrt(len(group))
|
|
65
|
+
assert abs(r['Groups_SE'][i] - expected_se) < 0.01, \
|
|
66
|
+
f"SE for group {i} is wrong: {r['Groups_SE'][i]} vs {expected_se}"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# --- Error handling ---
|
|
70
|
+
def test_raises_on_too_few_samples():
|
|
71
|
+
a = AutoStatLib.StatisticalAnalysis([[1, 2, 3], [4, 5, 6]], raise_errors=True)
|
|
72
|
+
with pytest.raises(ValueError):
|
|
73
|
+
a.RunAuto()
|
|
74
|
+
|
|
75
|
+
def test_empty_result_on_wrong_group_count():
|
|
76
|
+
"""3-group test requested with 2 groups should fail gracefully."""
|
|
77
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]])
|
|
78
|
+
a.RunOnewayAnova()
|
|
79
|
+
assert a.GetResult() == {} or a.error
|
|
80
|
+
|
|
81
|
+
def test_non_numeric_data_filtered():
|
|
82
|
+
a = AutoStatLib.StatisticalAnalysis([['x', 'y', 1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
|
|
83
|
+
a.RunAuto()
|
|
84
|
+
r = a.GetResult()
|
|
85
|
+
assert isinstance(r, dict)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# --- Single-sample tests ---
|
|
89
|
+
def test_single_sample_ttest():
|
|
90
|
+
data = [list(np.random.normal(5, 1, 30))]
|
|
91
|
+
a = AutoStatLib.StatisticalAnalysis(data, popmean=0)
|
|
92
|
+
a.RunTtestSingleSample()
|
|
93
|
+
r = a.GetResult()
|
|
94
|
+
assert r['Significance(p<0.05)'] is True # mean ~5 vs popmean=0 should be significant
|
|
95
|
+
|
|
96
|
+
def test_no_popmean_triggers_warning():
|
|
97
|
+
data = [list(np.random.normal(1, 1, 20))]
|
|
98
|
+
a = AutoStatLib.StatisticalAnalysis(data)
|
|
99
|
+
a.RunTtestSingleSample()
|
|
100
|
+
r = a.GetResult()
|
|
101
|
+
assert len(r['Warnings']) > 0
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# --- Paired tests ---
|
|
105
|
+
def test_paired_ttest_equal_length_required():
|
|
106
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10,11]], paired=True, raise_errors=True)
|
|
107
|
+
with pytest.raises(ValueError):
|
|
108
|
+
a.RunTtestPaired()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# --- Posthoc ---
|
|
112
|
+
def test_posthoc_kruskal():
|
|
113
|
+
np.random.seed(0)
|
|
114
|
+
data = [list(np.random.normal(i, 1, 20)) for i in range(3)]
|
|
115
|
+
a = AutoStatLib.StatisticalAnalysis(data, posthoc=True)
|
|
116
|
+
a.RunKruskalWallis()
|
|
117
|
+
r = a.GetResult()
|
|
118
|
+
assert len(r['Posthoc_Matrix']) == 3
|
|
119
|
+
assert len(r['Posthoc_Matrix'][0]) == 3
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# --- Stars ---
|
|
123
|
+
@pytest.mark.parametrize("p,expected", [
|
|
124
|
+
(0.001, 3), (0.01, 2), (0.04, 1), (0.1, 0), (0.00001, 4)
|
|
125
|
+
])
|
|
126
|
+
def test_make_stars(p, expected):
|
|
127
|
+
a = AutoStatLib.StatisticalAnalysis([[1,2,3,4,5],[6,7,8,9,10]])
|
|
128
|
+
assert a.make_stars(p) == expected
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# --- Tails ---
|
|
132
|
+
def test_one_tailed_p_less_than_two_tailed(normal_2groups):
|
|
133
|
+
a2 = AutoStatLib.StatisticalAnalysis(normal_2groups, tails=2)
|
|
134
|
+
a2.RunTtest()
|
|
135
|
+
p2 = a2.GetResult()['p_value_exact']
|
|
136
|
+
|
|
137
|
+
a1 = AutoStatLib.StatisticalAnalysis(normal_2groups, tails=1)
|
|
138
|
+
a1.RunTtest()
|
|
139
|
+
p1 = a1.GetResult()['p_value_exact']
|
|
140
|
+
|
|
141
|
+
assert abs(p1 - p2 / 2) < 1e-10
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# --- GetSummary ---
|
|
145
|
+
def test_get_summary_contains_version(normal_2groups):
|
|
146
|
+
a = AutoStatLib.StatisticalAnalysis(normal_2groups)
|
|
147
|
+
a.RunAuto()
|
|
148
|
+
assert 'AutoStatLib' in a.GetSummary()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|