AutoStatLib 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of AutoStatLib might be problematic. Click here for more details.

@@ -16,7 +16,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
16
16
  paired=False,
17
17
  tails=2,
18
18
  popmean=None,
19
- posthoc=False,
19
+ posthoc=True,
20
20
  verbose=True):
21
21
  self.results = None
22
22
  self.error = False
@@ -92,6 +92,7 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
92
92
  self.p_value = None
93
93
  self.posthoc_matrix_df = None
94
94
  self.posthoc_matrix = []
95
+ self.posthoc_name = None
95
96
 
96
97
  self.log('\n' + '-'*67)
97
98
  self.log('Statistical analysis __init__iated for data in {} groups\n'.format(
@@ -174,7 +175,6 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
174
175
  else:
175
176
  self.run_test_auto()
176
177
 
177
-
178
178
  # print the results
179
179
  self.results = self.create_results_dict()
180
180
  self.print_results()
@@ -186,9 +186,8 @@ class StatisticalAnalysis(StatisticalTests, NormalityTests, TextFormatting, Help
186
186
  if self.verbose == True:
187
187
  print(self.summary)
188
188
 
189
-
190
-
191
189
  # public methods:
190
+
192
191
  def RunAuto(self):
193
192
  self.run_test(test='auto')
194
193
 
@@ -0,0 +1,626 @@
1
+ import random
2
+ # from math import comb
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import matplotlib.colors as mcolors
6
+ import matplotlib.colors as color
7
+
8
+
9
+ class Helpers():
10
+
11
+ def colors_to_rgba(self, colors, alpha=0.35):
12
+ rgba_colors = []
13
+ for col in colors:
14
+ rgba = list(mcolors.to_rgba(col))
15
+ rgba[3] = alpha
16
+ rgba_colors.append(tuple(rgba))
17
+ return rgba_colors
18
+
19
+ def get_colors(self, colormap):
20
+ # If a colormap is provided, use it;
21
+ # else generate default one with n_colors colors
22
+ # (the best color combination is 9 imho)
23
+ # but we cat change it later
24
+ if colormap:
25
+ colors_edge = [c if color.is_color_like(
26
+ c) else 'k' for c in colormap]
27
+ colors_fill = self.colors_to_rgba(colors_edge)
28
+ else:
29
+ n_colors = 9 # len(self.data_groups)
30
+ cmap = plt.get_cmap('Set1')
31
+ colors_edge = [cmap(i / n_colors) for i in range(n_colors)]
32
+ colors_edge.insert(0, 'k')
33
+ colors_fill = self.colors_to_rgba(colors_edge)
34
+ return colors_edge, colors_fill
35
+
36
+ def make_p_value_printed(self, p) -> str:
37
+ if p is not None:
38
+ if p > 0.99:
39
+ return 'p>0.99'
40
+ elif p >= 0.01:
41
+ return f'p={p:.2g}'
42
+ elif p >= 0.001:
43
+ return f'p={p:.2g}'
44
+ elif p >= 0.0001:
45
+ return f'p={p:.1g}'
46
+ elif p < 0.0001:
47
+ return 'p<0.0001'
48
+ else:
49
+ return 'N/A'
50
+ return 'N/A'
51
+
52
+ def make_stars(self, p) -> int:
53
+ if p is not None:
54
+ if p < 0.0001:
55
+ return 4
56
+ if p < 0.001:
57
+ return 3
58
+ elif p < 0.01:
59
+ return 2
60
+ elif p < 0.05:
61
+ return 1
62
+ else:
63
+ return 0
64
+ return 0
65
+
66
+ def make_stars_printed(self, n) -> str:
67
+ return '*' * n if n else 'ns'
68
+
69
+ def transpose(self, data):
70
+ return list(map(list, zip(*data)))
71
+
72
+
73
+ class BaseStatPlot(Helpers):
74
+
75
+ def __init__(self,
76
+ data_groups,
77
+ p=None,
78
+ testname='',
79
+ dependent=False,
80
+ plot_title='',
81
+ x_label='',
82
+ y_label='',
83
+ print_x_labels=True,
84
+ x_manual_tick_labels=None,
85
+ posthoc_matrix=[],
86
+ colormap=None,
87
+ **kwargs):
88
+ self.data_groups = data_groups
89
+ self.n_groups = len(self.data_groups)
90
+ self.p = p
91
+ self.testname = testname
92
+ self.posthoc_matrix = posthoc_matrix
93
+ self.n_significance_bars = 1
94
+ self.dependent = dependent
95
+ self.plot_title = plot_title
96
+ self.x_label = x_label
97
+ self.y_label = y_label
98
+ self.print_x_labels = print_x_labels
99
+
100
+ # sd sem mean and median calculation if they are not provided
101
+ self.mean = [
102
+ np.mean(self.data_groups[i]).item() for i in range(self.n_groups)]
103
+ self.median = [
104
+ np.median(self.data_groups[i]).item() for i in range(self.n_groups)]
105
+ self.sd = [
106
+ np.std(self.data_groups[i]).item() for i in range(self.n_groups)]
107
+ self.sem = [np.std(self.data_groups[i]).item() / np.sqrt(len(self.data_groups[i])).item()
108
+ for i in range(self.n_groups)]
109
+
110
+ self.n = [len(i) for i in self.data_groups]
111
+ self.p_printed = self.make_p_value_printed(self.p)
112
+ self.stars_printed = self.make_stars_printed(self.make_stars(self.p))
113
+
114
+ self.x_manual_tick_labels = x_manual_tick_labels if x_manual_tick_labels is not None else [
115
+ '']
116
+
117
+ if colormap is not None and colormap != ['']:
118
+ colormap = colormap
119
+ self.colormap_default = False
120
+ else:
121
+ colormap = []
122
+ self.colormap_default = True
123
+ self.colors_edge, self.colors_fill = self.get_colors(colormap)
124
+
125
+ self.y_max = max([max(data) for data in self.data_groups])
126
+
127
+ def setup_figure(self, ):
128
+ fig, ax = plt.subplots(figsize=(0.5 + 0.9 * self.n_groups, 4))
129
+ return fig, ax
130
+
131
+ def add_scatter(self, ax,
132
+ color='k',
133
+ alpha=0.5,
134
+ marker='o',
135
+ linewidth=1,
136
+ zorder=1):
137
+ # Generate x jitter pool.
138
+ spread_pool = [] # storing x positions of data points
139
+ for i, data in enumerate(self.data_groups):
140
+ spread = tuple(random.uniform(-.10, .10) for _ in data)
141
+ spread_pool.append(tuple(i + s for s in spread))
142
+
143
+ for i, data in enumerate(self.transpose(self.data_groups)):
144
+ # Plot individual data points with x jitter.
145
+ ax.plot(self.transpose(spread_pool)[i], data,
146
+ color=color,
147
+ alpha=alpha,
148
+ marker=marker,
149
+ linewidth=linewidth,
150
+ # Connect the data points if desired.
151
+ linestyle='-' if self.dependent else '',
152
+ zorder=zorder)
153
+
154
+ def add_barplot(self, ax, x,
155
+ fill=True,
156
+ linewidth=2,
157
+ zorder=1):
158
+
159
+ # Plot bar for mean
160
+ ax.bar(x, self.mean[x],
161
+ width=0.75,
162
+ facecolor=self.colors_fill[x % len(self.colors_fill)],
163
+ edgecolor=self.colors_edge[x % len(self.colors_edge)],
164
+ fill=fill,
165
+ linewidth=linewidth,
166
+ zorder=zorder)
167
+
168
+ def add_violinplot(self, ax, x,
169
+ linewidth=2,
170
+ widths=0.85,
171
+ vert=True,
172
+ showmeans=True,
173
+ showmedians=True,
174
+ showextrema=True,
175
+ points=200,
176
+ bw_method=0.5):
177
+
178
+ vp = ax.violinplot(self.data_groups[x], positions=[x], widths=widths, vert=vert,
179
+ showmeans=showmeans, showmedians=showmedians, showextrema=showextrema,
180
+ points=points, bw_method=bw_method)
181
+
182
+ for pc in vp['bodies']:
183
+ pc.set_facecolor(self.colors_fill[x % len(self.colors_fill)])
184
+ pc.set_edgecolor(self.colors_edge[x % len(self.colors_edge)])
185
+ pc.set_linewidth(linewidth)
186
+
187
+ def add_boxplot(self, ax,
188
+ # positions of boxes, defaults to range(1,n+1)
189
+ positions=None,
190
+ widths=0.6,
191
+ tickLabels=None,
192
+ notch=False,
193
+ confidences=None,
194
+ fliers=False,
195
+ fliersMarker='',
196
+ flierFillColor=None,
197
+ flierEdgeColor=None,
198
+ flierLineWidth=2,
199
+ flierLineStyle=None,
200
+ vertical=True,
201
+ # whiskers when one float is tukeys parameter, when a pair of percentages,
202
+ # defines the percentiles where the whiskers should be If a float,
203
+ # the lower whisker is at the lowest datum above Q1 - whis*(Q3-Q1),
204
+ # and the upper whisker at the highest datum below Q3 + whis*(Q3-Q1),
205
+ # where Q1 and Q3 are the first and third quartiles. The default value of whis = 1.5
206
+ # corresponds to Tukey's original definition of boxplots.
207
+ whiskers=1.5,
208
+ bootstrap=None,
209
+ whiskersColor=None,
210
+ whiskersLineWidth=2,
211
+ whiskersLineStyle=None,
212
+ showWhiskersCaps=True,
213
+ whiskersCapsWidths=None,
214
+ whiskersCapsColor=None,
215
+ whiskersCapsLineWidth=2,
216
+ whiskersCapsLineStyle=None,
217
+ boxFill=None,
218
+ boxBorderColor=None,
219
+ boxBorderWidth=2,
220
+ userMedians=None,
221
+ medianColor=None,
222
+ medianLineStyle=None,
223
+ medianLineWidth=2,
224
+ showMeans=False,
225
+ meanMarker=None,
226
+ meanFillColor=None,
227
+ meanEdgeColor=None,
228
+ meanLine=False,
229
+ meanLineColor=None,
230
+ meanLineStyle=None,
231
+ meanLineWidth=2,
232
+ autorange=False
233
+ ):
234
+
235
+ positions = list(range(self.n_groups))
236
+ # if (not hasattr(positions, "__len__") or
237
+ # len(positions) != self.length or
238
+ # any(not isinstance(x, (int, float)) for x in positions)):
239
+ # positions = None
240
+ if fliers == False:
241
+ fliersMarker = ""
242
+ else:
243
+ if fliersMarker == "":
244
+ fliersMarker = 'b+'
245
+ # write a function to make a dictionary
246
+ whiskersCapsStyles = dict()
247
+ if whiskersCapsColor != None:
248
+ whiskersCapsStyles["color"] = whiskersCapsColor
249
+ if whiskersCapsLineWidth != None:
250
+ whiskersCapsStyles["linewidth"] = whiskersCapsLineWidth
251
+ if whiskersCapsLineStyle != None:
252
+ whiskersCapsStyles['linestyle'] = whiskersCapsLineStyle
253
+
254
+ boxProps = {"facecolor": (0, 0, 0, 0),
255
+ "edgecolor": "black", "linewidth": 1}
256
+ if boxFill != None:
257
+ boxProps["facecolor"] = boxFill
258
+ if boxBorderColor != None:
259
+ boxProps["edgecolor"] = boxBorderColor
260
+ if boxBorderWidth != None:
261
+ boxProps['linewidth'] = boxBorderWidth
262
+ # if boxBorderStyle != None:
263
+ # boxProps['linestyle'] = boxBorderStyle !!!this feature is not working with patch_artist that is needed for facecolor to work
264
+
265
+ whiskersProps = {"color": 'black',
266
+ "linestyle": "solid", "linewidth": 1}
267
+ if whiskersColor != None:
268
+ whiskersProps["color"] = whiskersColor
269
+ if whiskersLineStyle != None:
270
+ whiskersProps["linestyle"] = whiskersLineStyle
271
+ if whiskersLineWidth != None:
272
+ whiskersProps['linewidth'] = whiskersLineWidth
273
+
274
+ flierProps = {"markerfacecolor": [
275
+ 0, 0, 0, 0], "markeredgecolor": "black", "linestyle": "solid", "markeredgewidth": 1}
276
+ if flierFillColor != None:
277
+ flierProps["markerfacecolor"] = flierFillColor
278
+ if flierEdgeColor != None:
279
+ flierProps["markeredgecolor"] = flierEdgeColor
280
+ if flierLineWidth != None:
281
+ flierProps['markeredgewidth'] = flierLineWidth
282
+ if flierLineStyle != None:
283
+ flierProps['linestyle'] = flierLineStyle
284
+ medianProps = {"linestyle": 'solid', "linewidth": 1, "color": 'red'}
285
+ if medianColor != None:
286
+ medianProps["color"] = medianColor
287
+ if medianLineStyle != None:
288
+ medianProps["linestyle"] = medianLineStyle
289
+ if medianLineWidth != None:
290
+ medianProps['linewidth'] = medianLineWidth
291
+
292
+ meanProps = {"color": "black", "marker": 'o', "markerfacecolor": "black",
293
+ "markeredgecolor": "black", "linestyle": "solid", "linewidth": 1}
294
+
295
+ if meanMarker != None:
296
+ meanProps['marker'] = meanMarker
297
+ if meanFillColor != None:
298
+ meanProps["markerfacecolor"] = meanFillColor
299
+ if meanEdgeColor != None:
300
+ meanProps['markeredgecolor'] = meanEdgeColor
301
+ if meanLineColor != None:
302
+ meanProps["color"] = meanLineColor
303
+ if meanLineStyle != None:
304
+ meanProps['linestyle'] = meanLineStyle
305
+ if meanLineWidth != None:
306
+ meanProps['linewidth'] = meanLineWidth
307
+
308
+ bplot = ax.boxplot(self.data_groups,
309
+ positions=positions,
310
+ widths=widths,
311
+ # tick_labels=tickLabels,
312
+ notch=notch,
313
+ conf_intervals=confidences,
314
+ sym=fliersMarker,
315
+ flierprops=flierProps,
316
+ vert=vertical,
317
+ whis=whiskers,
318
+ whiskerprops=whiskersProps,
319
+ showcaps=showWhiskersCaps,
320
+ capwidths=whiskersCapsWidths,
321
+ capprops=whiskersCapsStyles,
322
+ boxprops=boxProps,
323
+ usermedians=userMedians,
324
+ medianprops=medianProps,
325
+ bootstrap=bootstrap,
326
+ showmeans=showMeans,
327
+ meanline=meanLine,
328
+ meanprops=meanProps,
329
+ autorange=autorange,
330
+ patch_artist=True)
331
+
332
+ # apply use r colormap if provided
333
+ # else left white face with black border
334
+ if not self.colormap_default:
335
+ for x, patch in enumerate(bplot['boxes']):
336
+ patch.set_facecolor(
337
+ self.colors_fill[x % len(self.colors_fill)])
338
+
339
+ def add_errorbar_sd(self, ax, x,
340
+ capsize=8,
341
+ ecolor='r',
342
+ linewidth=2,
343
+ zorder=3):
344
+ # Add error bars
345
+ ax.errorbar(x, self.mean[x],
346
+ yerr=self.sd[x],
347
+ fmt='none',
348
+ capsize=capsize,
349
+ ecolor=ecolor,
350
+ linewidth=linewidth,
351
+ zorder=zorder)
352
+
353
+ def add_errorbar_sem(self, ax, x,
354
+ capsize=8,
355
+ ecolor='r',
356
+ linewidth=2,
357
+ zorder=3):
358
+ # Add error bars
359
+ ax.errorbar(x, self.mean[x],
360
+ yerr=self.sem[x],
361
+ fmt='none',
362
+ capsize=capsize,
363
+ ecolor=ecolor,
364
+ linewidth=linewidth,
365
+ zorder=zorder)
366
+
367
+ def add_mean_marker(self, ax, x,
368
+ marker='_',
369
+ markerfacecolor='#00000000',
370
+ markeredgecolor='r',
371
+ markersize=16,
372
+ markeredgewidth=1):
373
+ # Overlay mean marker
374
+ ax.plot(x, self.mean[x],
375
+ marker=marker,
376
+ markerfacecolor=markerfacecolor,
377
+ markeredgecolor=markeredgecolor,
378
+ markersize=markersize,
379
+ markeredgewidth=markeredgewidth)
380
+
381
+ def add_median_marker(self, ax, x,
382
+ marker='x',
383
+ markerfacecolor='#00000000',
384
+ markeredgecolor='r',
385
+ markersize=10,
386
+ markeredgewidth=1):
387
+ # Overlay median marker
388
+ ax.plot(x, self.median[x],
389
+ marker=marker,
390
+ markerfacecolor=markerfacecolor,
391
+ markeredgecolor=markeredgecolor,
392
+ markersize=markersize,
393
+ markeredgewidth=markeredgewidth)
394
+
395
+ def add_significance_bars(self, ax,
396
+ linewidth=2,
397
+ capsize=0.01,
398
+ col='k',
399
+ label=''):
400
+ '''label can be "p", "s", "both"'''
401
+
402
+ # # Estimate how many bars needed
403
+ # self.n_significance_bars = comb(
404
+ # self.n_groups, 2) if self.n_groups > 2 else 1
405
+
406
+ posthoc_matrix_printed = [[self.make_p_value_printed(element) for element in row]
407
+ for row in self.posthoc_matrix] if self.posthoc_matrix else []
408
+ posthoc_matrix_stars = [[self.make_stars_printed(self.make_stars(element)) for element in row]
409
+ for row in self.posthoc_matrix] if self.posthoc_matrix else []
410
+
411
+ def draw_bar(p, stars, order=0, x1=0, x2=self.n_groups-1, capsize=capsize, linewidth=linewidth, col=col, label=label):
412
+ if label == 'p':
413
+ vspace = capsize+0.03
414
+ label = '{}'.format(p)
415
+ elif label == 's':
416
+ vspace = capsize+0.03
417
+ label = '{}'.format(stars)
418
+ else:
419
+ vspace = capsize+0.06
420
+ label = '{}\n{}'.format(p, stars)
421
+
422
+ # Draw significance bar connecting x1 and x2 coords
423
+ y, h = ((1.05 + (order*vspace)) *
424
+ self.y_max), capsize * self.y_max
425
+ ax.plot([x1, x1, x2, x2], [y, y + h, y + h, y],
426
+ lw=linewidth, c=col)
427
+
428
+ ax.text((x1 + x2) * 0.5, y + h, label,
429
+ ha='center', va='bottom', color=col, fontweight='bold', fontsize=8)
430
+
431
+ def draw_bar_from_posthoc_matrix(x1, x2, o):
432
+ draw_bar(
433
+ posthoc_matrix_printed[x1][x2], posthoc_matrix_stars[x1][x2], order=o, x1=x1, x2=x2)
434
+
435
+ # bars_args= []
436
+ # vshift=[0 for _ in self.data_groups]
437
+
438
+ # for i in range(len(self.posthoc_matrix)):
439
+ # for j in range(i+1, len(self.posthoc_matrix[i])):
440
+ # bars_args.append((i, j, j*3-i*3))
441
+ # for i in bars_args:
442
+ # draw_bar(i[0], i[1], i[2])
443
+
444
+ if (self.p is not None) or (self.posthoc_matrix != []):
445
+ if not self.posthoc_matrix:
446
+ draw_bar(
447
+ self.p_printed, self.stars_printed)
448
+ elif len(self.posthoc_matrix) == 3:
449
+ draw_bar_from_posthoc_matrix(0, 1, 0)
450
+ draw_bar_from_posthoc_matrix(1, 2, 1)
451
+ draw_bar_from_posthoc_matrix(0, 2, 3)
452
+ elif len(self.posthoc_matrix) == 4:
453
+ draw_bar_from_posthoc_matrix(0, 1, 0)
454
+ draw_bar_from_posthoc_matrix(2, 3, 0)
455
+ draw_bar_from_posthoc_matrix(1, 2, 1)
456
+
457
+ draw_bar_from_posthoc_matrix(0, 2, 3)
458
+ draw_bar_from_posthoc_matrix(1, 3, 5)
459
+
460
+ draw_bar_from_posthoc_matrix(0, 3, 7)
461
+
462
+ elif len(self.posthoc_matrix) == 5:
463
+
464
+ draw_bar_from_posthoc_matrix(0, 1, 0)
465
+ draw_bar_from_posthoc_matrix(2, 3, 0)
466
+ draw_bar_from_posthoc_matrix(1, 2, 1)
467
+ draw_bar_from_posthoc_matrix(3, 4, 1)
468
+
469
+ draw_bar_from_posthoc_matrix(0, 2, 4)
470
+ draw_bar_from_posthoc_matrix(2, 4, 5)
471
+ draw_bar_from_posthoc_matrix(1, 3, 8)
472
+
473
+ draw_bar_from_posthoc_matrix(0, 3, 11)
474
+ draw_bar_from_posthoc_matrix(1, 4, 14)
475
+
476
+ draw_bar_from_posthoc_matrix(0, 4, 17)
477
+
478
+ else:
479
+ draw_bar(
480
+ self.p_printed, self.stars_printed)
481
+
482
+ def axes_formatting(self, ax,
483
+ linewidth=2):
484
+ # Remove all spines except left
485
+ for spine in ax.spines.values():
486
+ spine.set_visible(False)
487
+ ax.spines['left'].set_visible(True)
488
+ ax.xaxis.set_visible(bool(self.x_label or self.print_x_labels))
489
+ plt.tight_layout()
490
+
491
+ # Set x ticks and labels
492
+ if self.print_x_labels:
493
+ plt.subplots_adjust(bottom=0.11)
494
+ if self.x_manual_tick_labels != ['']:
495
+ ax.set_xticks(range(self.n_groups))
496
+ ax.set_xticklabels([self.x_manual_tick_labels[i % len(self.x_manual_tick_labels)]
497
+ for i in range(self.n_groups)])
498
+ else:
499
+ ax.set_xticks(range(self.n_groups))
500
+ ax.set_xticklabels(['Group {}'.format(i + 1)
501
+ for i in range(self.n_groups)], fontweight='regular', fontsize=8)
502
+ else:
503
+ plt.subplots_adjust(bottom=0.08)
504
+ ax.tick_params(axis='x', which='both',
505
+ labeltop=False, labelbottom=False)
506
+
507
+ # Additional formatting
508
+ for ytick in ax.get_yticklabels():
509
+ ytick.set_fontweight('bold')
510
+ ax.tick_params(width=linewidth)
511
+ ax.xaxis.set_tick_params(labelsize=10)
512
+ ax.yaxis.set_tick_params(labelsize=12)
513
+ ax.spines['left'].set_linewidth(linewidth)
514
+ ax.tick_params(axis='y', which='both',
515
+ length=linewidth * 2, width=linewidth)
516
+ ax.tick_params(axis='x', which='both', length=0)
517
+
518
+ def add_titles_and_labels(self, fig, ax):
519
+ if self.plot_title:
520
+ ax.set_title(self.plot_title, fontsize=12, fontweight='bold')
521
+ if self.x_label:
522
+ ax.set_xlabel(self.x_label, fontsize=10, fontweight='bold')
523
+ if self.y_label:
524
+ ax.set_ylabel(self.y_label, fontsize=10, fontweight='bold')
525
+ fig.text(0.95, 0.0,
526
+ '{}\nn={}'.format(self.testname,
527
+ str(self.n)[1:-1] if not self.dependent else str(self.n[0])),
528
+ ha='right', va='bottom', fontsize=8, fontweight='regular')
529
+
530
+ def show(self):
531
+ plt.show()
532
+
533
+ def save(self, path):
534
+ plt.savefig(path)
535
+
536
+ def plot(self):
537
+ # Abstract method—each subclass must implement its own plot method
538
+ raise NotImplementedError(
539
+ "Implement the plot() method in the subclass")
540
+
541
+
542
+ class BarStatPlot(BaseStatPlot):
543
+
544
+ def plot(self):
545
+ fig, ax = self.setup_figure()
546
+ linewidth = 2
547
+
548
+ for x in range(len(self.data_groups)):
549
+
550
+ # Create a bar for given group.
551
+ self.add_barplot(ax, x)
552
+
553
+ # Overlay errbars, and markers.
554
+ self.add_median_marker(ax, x)
555
+ self.add_mean_marker(ax, x)
556
+ self.add_errorbar_sd(ax, x)
557
+
558
+ self.add_scatter(ax)
559
+ self.add_significance_bars(ax, linewidth)
560
+ self.add_titles_and_labels(fig, ax)
561
+ self.axes_formatting(ax, linewidth)
562
+
563
+
564
+ class ViolinStatPlot(BaseStatPlot):
565
+ '''
566
+ Violin plot, for adjusting see
567
+ https://matplotlib.org/stable/gallery/statistics/customized_violin.html#sphx-glr-gallery-statistics-customized-violin-py
568
+ https://medium.com/@mohammadaryayi/anything-about-violin-plots-in-matplotlib-ffd58a62bbb5
569
+
570
+ Kernel Density Estimation (violin shape prediction approach)
571
+ https://scikit-learn.org/stable/modules/density.html
572
+
573
+ SeaBorn violins:
574
+ https://seaborn.pydata.org/archive/0.11/generated/seaborn.violinplot.html
575
+ '''
576
+
577
+ def plot(self):
578
+ fig, ax = self.setup_figure()
579
+ linewidth = 2
580
+
581
+ for x in range(len(self.data_groups)):
582
+
583
+ # Create a violin for given group.
584
+ self.add_violinplot(ax, x)
585
+
586
+ # Overlay errbars and markers.
587
+ self.add_median_marker(ax, x)
588
+ self.add_mean_marker(ax, x)
589
+ # self.add_errorbar_sd(ax, x)
590
+
591
+ self.add_scatter(ax)
592
+ self.add_significance_bars(ax, linewidth)
593
+ self.add_titles_and_labels(fig, ax)
594
+ self.axes_formatting(ax, linewidth)
595
+
596
+
597
+ class BoxStatPlot(BaseStatPlot):
598
+
599
+ def plot(self):
600
+ fig, ax = self.setup_figure()
601
+ linewidth = 2
602
+
603
+ self.add_boxplot(ax)
604
+ self.add_scatter(ax)
605
+ self.add_significance_bars(ax, linewidth)
606
+ self.add_titles_and_labels(fig, ax)
607
+ self.axes_formatting(ax, linewidth)
608
+
609
+
610
+ class ScatterStatPlot(BaseStatPlot):
611
+
612
+ def plot(self):
613
+ fig, ax = self.setup_figure()
614
+ linewidth = 2
615
+
616
+ for x in range(len(self.data_groups)):
617
+
618
+ # Overlay errbars, and markers.
619
+ self.add_median_marker(ax, x)
620
+ self.add_mean_marker(ax, x)
621
+ self.add_errorbar_sd(ax, x)
622
+
623
+ self.add_scatter(ax)
624
+ self.add_significance_bars(ax, linewidth)
625
+ self.add_titles_and_labels(fig, ax)
626
+ self.axes_formatting(ax, linewidth)
AutoStatLib/__init__.py CHANGED
@@ -1,2 +1,3 @@
1
1
  from AutoStatLib.AutoStatLib import StatisticalAnalysis
2
+ from AutoStatLib.StatPlots import *
2
3
  from AutoStatLib._version import __version__
AutoStatLib/__main__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env python
2
2
  from AutoStatLib.AutoStatLib import StatisticalAnalysis
3
+ from AutoStatLib.StatPlots import *
3
4
  from AutoStatLib._version import __version__
4
5
 
5
6
  if __name__ == '__main__':
AutoStatLib/_version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  # AutoStatLib package version:
2
- __version__ = "0.2.5"
2
+ __version__ = "0.2.7"
AutoStatLib/helpers.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
3
 
4
+
4
5
  class Helpers():
5
6
 
6
7
  def matrix_to_dataframe(self, matrix):
@@ -61,9 +62,11 @@ class Helpers():
61
62
  'Groups_SE': [np.std(self.data[i]).item() / np.sqrt(len(self.data)).item() for i in range(len(self.data))],
62
63
  # actually returns list of lists of numpy dtypes of float64, next make it return regular floats:
63
64
  'Samples': self.data,
64
- 'Posthoc_Matrix': self.posthoc_matrix if self.posthoc_matrix else 'N/A',
65
- 'Posthoc_Matrix_printed': [[self.make_p_value_printed(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else 'N/A',
66
- 'Posthoc_Matrix_stars': [[self.make_stars_printed(self.make_stars(element)) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else 'N/A',
65
+ 'Posthoc_Tests_Name': self.posthoc_name if self.posthoc_name is not None else '',
66
+ 'Posthoc_Matrix': self.posthoc_matrix if self.posthoc_matrix else [],
67
+ 'Posthoc_Matrix_bool': [[bool(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
68
+ 'Posthoc_Matrix_printed': [[self.make_p_value_printed(element) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
69
+ 'Posthoc_Matrix_stars': [[self.make_stars_printed(self.make_stars(element)) for element in row] for row in self.posthoc_matrix] if self.posthoc_matrix else [],
67
70
  }
68
71
 
69
72
  def log(self, *args, **kwargs):
@@ -74,4 +77,4 @@ class Helpers():
74
77
  def AddWarning(self, warning_id):
75
78
  message = self.warning_ids_all[warning_id]
76
79
  self.log(message)
77
- self.warnings.append(message)
80
+ self.warnings.append(message)
@@ -20,7 +20,7 @@ class NormalityTests():
20
20
 
21
21
  # Shapiro-Wilk test
22
22
  sw_stat, sw_p_value = shapiro(data)
23
- if sw_p_value > 0.05:
23
+ if sw_p_value and sw_p_value > 0.05:
24
24
  sw = True
25
25
  else:
26
26
  sw = False
@@ -28,7 +28,7 @@ class NormalityTests():
28
28
  # Lilliefors test
29
29
  lf_stat, lf_p_value = lilliefors(
30
30
  data, dist='norm')
31
- if lf_p_value > 0.05:
31
+ if lf_p_value and lf_p_value > 0.05:
32
32
  lf = True
33
33
  else:
34
34
  lf = False
@@ -37,7 +37,7 @@ class NormalityTests():
37
37
  if n >= 20:
38
38
  ad_stat, ad_p_value = self.anderson_get_p(
39
39
  data, dist='norm')
40
- if ad_p_value > 0.05:
40
+ if ad_p_value and ad_p_value > 0.05:
41
41
  ad = True
42
42
  else:
43
43
  ad = False
@@ -46,13 +46,11 @@ class NormalityTests():
46
46
  # test result is skewed if n<20
47
47
  if n >= 20:
48
48
  ap_stat, ap_p_value = normaltest(data)
49
- if ap_p_value > 0.05:
49
+ if ap_p_value and ap_p_value > 0.05:
50
50
  ap = True
51
51
  else:
52
52
  ap = False
53
53
 
54
- # print(ap_p_value, ad_p_value, sw_p_value, lf_p_value)
55
-
56
54
  return (sw, lf, ad, ap)
57
55
 
58
56
  def anderson_get_p(self, data, dist='norm'):
@@ -82,4 +80,4 @@ class NormalityTests():
82
80
  else:
83
81
  p = None
84
82
 
85
- return ad, p
83
+ return ad, p
@@ -1,10 +1,10 @@
1
1
  import numpy as np
2
2
  import scikit_posthocs as sp
3
3
  from statsmodels.stats.anova import AnovaRM
4
+ from statsmodels.stats.multicomp import pairwise_tukeyhsd
4
5
  from scipy.stats import ttest_rel, ttest_ind, ttest_1samp, wilcoxon, mannwhitneyu, f_oneway, kruskal, friedmanchisquare
5
6
 
6
7
 
7
-
8
8
  class StatisticalTests():
9
9
  '''
10
10
  Statistical tests mixin
@@ -50,7 +50,7 @@ class StatisticalTests():
50
50
  test_names_dict = {
51
51
  'anova_1w_ordinary': 'Ordinary One-Way ANOVA',
52
52
  'anova_1w_rm': 'Repeated Measures One-Way ANOVA',
53
- 'friedman': 'Friedman test',
53
+ 'friedman': 'Friedman test',
54
54
  'kruskal_wallis': 'Kruskal-Wallis test',
55
55
  'mann_whitney': 'Mann-Whitney U test',
56
56
  't_test_independent': 't-test for independent samples',
@@ -89,6 +89,15 @@ class StatisticalTests():
89
89
  # p_value /= 2
90
90
  # if self.tails == 1:
91
91
  # p_value /= 2
92
+
93
+ # if p_value < 0.05 and self.posthoc:
94
+ # data_flat = np.concatenate(self.data)
95
+ # self.posthoc_name = 'Tukey`s multiple comparisons'
96
+ # group_labels = np.concatenate(
97
+ # [[f"Group_{i+1}"] * len(group) for i, group in enumerate(self.data)])
98
+ # # Tukey's multiple comparisons
99
+ # tukey_result = pairwise_tukeyhsd(data_flat, group_labels)
100
+ # print(tukey_result)
92
101
  return stat, p_value
93
102
 
94
103
  def anova_1w_rm(self):
@@ -117,7 +126,9 @@ class StatisticalTests():
117
126
 
118
127
  # Perform Dunn's multiple comparisons if Kruskal-Wallis is significant
119
128
  if p_value < 0.05 and self.posthoc:
120
- self.posthoc_matrix = sp.posthoc_dunn(self.data, p_adjust='bonferroni').values.tolist()
129
+ self.posthoc_matrix = sp.posthoc_dunn(
130
+ self.data, p_adjust='bonferroni').values.tolist()
131
+ self.posthoc_name = 'Dunn`s multiple comparisons'
121
132
  return stat, p_value
122
133
 
123
134
  def mann_whitney(self):
@@ -161,7 +172,7 @@ class StatisticalTests():
161
172
  if self.tails == 1:
162
173
  p_value /= 2
163
174
  return stat, p_value
164
-
175
+
165
176
  def wilcoxon_single_sample(self):
166
177
  if self.popmean == None:
167
178
  self.popmean = 0
@@ -170,4 +181,4 @@ class StatisticalTests():
170
181
  stat, p_value = wilcoxon(data)
171
182
  if self.tails == 1:
172
183
  p_value /= 2
173
- return stat, p_value
184
+ return stat, p_value
@@ -51,22 +51,26 @@ class TextFormatting():
51
51
  break
52
52
  self.log(self.autospace(row_values, space))
53
53
 
54
- def make_stars(self, p) -> int:
55
- if p is not None:
56
- if p < 0.0001:
57
- return 4
58
- if p < 0.001:
59
- return 3
60
- elif p < 0.01:
61
- return 2
62
- elif p < 0.05:
63
- return 1
54
+ def print_results(self):
55
+ self.log('\n\nResults: \n')
56
+ for i in self.results:
57
+ shift = 27 - len(i)
58
+ if i == 'Warnings':
59
+ self.log(i, ':', ' ' * shift, len(self.results[i]))
60
+ elif i == 'Posthoc_Tests_Name':
61
+ self.log(i, ':', ' ' * shift,
62
+ self.results[i]) if self.results[i] != '' else 'N/A'
63
+ elif i == 'Posthoc_Matrix':
64
+ self.log(i, ':', ' ' * shift, '{0}x{0} matrix'.format(
65
+ len(self.results[i])) if self.results[i] else 'N/A')
66
+ elif (i == 'Samples'
67
+ or i == 'Posthoc_Matrix_bool'
68
+ or i == 'Posthoc_Matrix_printed'
69
+ or i == 'Posthoc_Matrix_stars'
70
+ ):
71
+ pass
64
72
  else:
65
- return 0
66
- return 0
67
-
68
- def make_stars_printed(self, n) -> str:
69
- return '*' * n if n else 'ns'
73
+ self.log(i, ':', ' ' * shift, self.results[i])
70
74
 
71
75
  def make_p_value_printed(self, p) -> str:
72
76
  if p is not None:
@@ -84,15 +88,19 @@ class TextFormatting():
84
88
  return 'N/A'
85
89
  return 'N/A'
86
90
 
87
- def print_results(self):
88
- self.log('\n\nResults: \n')
89
- for i in self.results:
90
- shift = 27 - len(i)
91
- if i == 'Warnings':
92
- self.log(i, ':', ' ' * shift, len(self.results[i]))
93
- if i == 'Posthoc_Matrix':
94
- self.log(i, ':', ' ' * shift, '{0}x{0} matrix'.format(len(self.results[i])))
95
- elif i == 'Samples' or i == 'Posthoc_Matrix_printed' or i == 'Posthoc_Matrix_stars':
96
- pass
91
+ def make_stars(self, p) -> int:
92
+ if p is not None:
93
+ if p < 0.0001:
94
+ return 4
95
+ if p < 0.001:
96
+ return 3
97
+ elif p < 0.01:
98
+ return 2
99
+ elif p < 0.05:
100
+ return 1
97
101
  else:
98
- self.log(i, ':', ' ' * shift, self.results[i])
102
+ return 0
103
+ return 0
104
+
105
+ def make_stars_printed(self, n) -> str:
106
+ return '*' * n if n else 'ns'
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: AutoStatLib
3
- Version: 0.2.5
3
+ Version: 0.2.7
4
4
  Summary: AutoStatLib - a simple statistical analysis tool
5
5
  Author: Stemonitis, SciWare LLC
6
6
  Author-email: konung-yaropolk <yaropolk1995@gmail.com>
@@ -509,15 +509,16 @@ License: GNU LESSER GENERAL PUBLIC LICENSE
509
509
 
510
510
  That's all there is to it!
511
511
 
512
- Project-URL: Homepage, https://github.com/konung-yaropolk/NPL
513
- Project-URL: Issues, https://github.com/konung-yaropolk/NPL/issues
512
+ Project-URL: Homepage, https://github.com/konung-yaropolk/AutoStatLib
513
+ Project-URL: Repository, https://github.com/konung-yaropolk/AutoStatLib.git
514
+ Project-URL: Issues, https://github.com/konung-yaropolk/AutoStatLib/issues
514
515
  Keywords: Science,Statistics
515
516
  Classifier: Programming Language :: Python
516
517
  Classifier: Programming Language :: Python :: 3
517
518
  Classifier: Programming Language :: Python :: 3.12
518
519
  Classifier: License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)
519
520
  Classifier: Operating System :: OS Independent
520
- Classifier: Development Status :: 2 - Pre-Alpha
521
+ Classifier: Development Status :: 4 - Beta
521
522
  Classifier: Intended Audience :: Developers
522
523
  Classifier: Intended Audience :: Science/Research
523
524
  Classifier: Natural Language :: English
@@ -533,6 +534,7 @@ Requires-Dist: scipy
533
534
  Requires-Dist: statsmodels
534
535
  Requires-Dist: scikit-posthocs
535
536
  Requires-Dist: pandas
537
+ Dynamic: license-file
536
538
 
537
539
  # AutoStatLib - python library for automated statistical analysis
538
540
 
@@ -620,26 +622,30 @@ results = analysis.GetResult()
620
622
  The results dictionary keys with representing value types:
621
623
  ```
622
624
  {
623
- 'p-value': String
624
- 'Significance(p<0.05)': Boolean
625
- 'Stars_Printed': String
626
- 'Test_Name': String
627
- 'Groups_Compared': Integer
628
- 'Population_Mean': Float (taken from the input)
629
- 'Data_Normaly_Distributed': Boolean
630
- 'Parametric_Test_Applied': Boolean
631
- 'Paired_Test_Applied': Boolean
632
- 'Tails': Integer (taken from the input)
633
- 'p-value_exact': Float
634
- 'Stars': Integer
635
- 'Warnings': String
636
- 'Groups_N': List of integers
637
- 'Groups_Median': List of floats
638
- 'Groups_Mean': List of floats
639
- 'Groups_SD': List of floats
640
- 'Groups_SE': List of floats
641
- 'Samples': List of input values by groups
625
+ 'p-value' : String
626
+ 'Significance(p<0.05)' : Boolean
627
+ 'Stars_Printed' : String
628
+ 'Test_Name' : String
629
+ 'Groups_Compared' : Integer
630
+ 'Population_Mean' : Float (taken from the input)
631
+ 'Data_Normaly_Distributed' : Boolean
632
+ 'Parametric_Test_Applied' : Boolean
633
+ 'Paired_Test_Applied' : Boolean
634
+ 'Tails' : Integer (taken from the input)
635
+ 'p-value_exact' : Float
636
+ 'Stars' : Integer
637
+ 'Warnings' : String
638
+ 'Groups_N' : List of integers
639
+ 'Groups_Median' : List of floats
640
+ 'Groups_Mean' : List of floats
641
+ 'Groups_SD' : List of floats
642
+ 'Groups_SE' : List of floats
643
+ 'Samples' : List of input values by groups
642
644
  (taken from the input)
645
+ 'Posthoc_Matrix' : 2D List of floats
646
+ 'Posthoc_Matrix_bool' : 2D List of Boolean
647
+ 'Posthoc_Matrix_printed': 2D List of String
648
+ 'Posthoc_Matrix_stars': 2D List of String
643
649
  }
644
650
  ```
645
651
  If errors occured, *GetResult()* returns an empty dictionary
@@ -0,0 +1,14 @@
1
+ AutoStatLib/AutoStatLib.py,sha256=KJM2x-fChnxVinnCFsAKpoacKeoIJcJw_r8FYqPCljk,9677
2
+ AutoStatLib/StatPlots.py,sha256=KJA-xnuBef0pa6L985XxdWtwEVmb-dcu5DHKLv6Y5Ew,23810
3
+ AutoStatLib/__init__.py,sha256=r7VdcL7F4UCRxEFh8WFBd9y61KavX_qt7fFbKjtjfjo,137
4
+ AutoStatLib/__main__.py,sha256=0OIv5sqFNI-diyHFtYL6HPcYrOWdLiqYYOO_nxrHuTk,283
5
+ AutoStatLib/_version.py,sha256=82BMDplnoVlfpnbkt4ImTQlV7kA099OEgHW37crfaE4,53
6
+ AutoStatLib/helpers.py,sha256=d8P6_q706rjuc6N4WBbdOqNQFuAIjCHfmrhgJABFxqE,3646
7
+ AutoStatLib/normality_tests.py,sha256=TYeKpfpJRzOHvDZucObuZhPktjiZpSZwh381eJ8ENC4,2381
8
+ AutoStatLib/statistical_tests.py,sha256=xfHdTtN5Es_qoVMUwX8VFsl-FLpF3zd56S9ya7dPXVo,6566
9
+ AutoStatLib/text_formatting.py,sha256=rWDsrlZdquook7lUg8t2mb3az8nR12BDprxfy_NwE2o,3576
10
+ autostatlib-0.2.7.dist-info/licenses/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
11
+ autostatlib-0.2.7.dist-info/METADATA,sha256=Xb1tAK4NcJwv8r2D4Y2SUM5sPeiTPFgSBfLfQiuGMbo,36894
12
+ autostatlib-0.2.7.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
13
+ autostatlib-0.2.7.dist-info/top_level.txt,sha256=BuHzVyE2andc7RwD_UPmDjLl9CUAyBH6WHZGjaIReUI,12
14
+ autostatlib-0.2.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,13 +0,0 @@
1
- AutoStatLib/AutoStatLib.py,sha256=yPNnwCvHSSlEKQvtnoaLFDq6znPlXCz-CrzGInG-1Ys,9647
2
- AutoStatLib/__init__.py,sha256=0wHYnglzKRPqSHtZlfbMEA2Bj5rDR4LLaXbOrJi-sqM,101
3
- AutoStatLib/__main__.py,sha256=ROKWensrxDh3Gl-yhexJ-BYFohDSh9y-CuMkaLpmnnQ,247
4
- AutoStatLib/_version.py,sha256=-QrGYOb9bx4vC_twSInOBJoijtj78lvUzV19y4-tH38,53
5
- AutoStatLib/helpers.py,sha256=9Fj9pHlXSM3tGHF5L0-i6DilA9VZk6Re93ob_IRxsYg,3424
6
- AutoStatLib/normality_tests.py,sha256=wvOmo6F7drnhhikoGltyQJC4OBk3PLCszY6ItJk1e0M,2385
7
- AutoStatLib/statistical_tests.py,sha256=LDcBRkq56hepR23RZtbBnZOs9k9frVjmiB2EKiEkCYs,5990
8
- AutoStatLib/text_formatting.py,sha256=ShE4BRO69lsC1VT3SsYrmPkuvW7QnyfHVPZEbjNQ_hI,3250
9
- AutoStatLib-0.2.5.dist-info/LICENSE,sha256=IMF9i4xIpgCADf0U-V1cuf9HBmqWQd3qtI3FSuyW4zE,26526
10
- AutoStatLib-0.2.5.dist-info/METADATA,sha256=qJxSrqHlL0wsqaH-ah6MAJa15ikH4NCco1dyVxuNlWs,36572
11
- AutoStatLib-0.2.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
12
- AutoStatLib-0.2.5.dist-info/top_level.txt,sha256=BuHzVyE2andc7RwD_UPmDjLl9CUAyBH6WHZGjaIReUI,12
13
- AutoStatLib-0.2.5.dist-info/RECORD,,