py2ls 0.2.4.9.4__py3-none-any.whl → 0.2.4.9.5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
py2ls/stats.py
CHANGED
@@ -37,7 +37,7 @@ def FuncStars(
|
|
37
37
|
report=None,
|
38
38
|
report_scale=-0.1,
|
39
39
|
report_loc=None,
|
40
|
-
**kwargs
|
40
|
+
**kwargs,
|
41
41
|
):
|
42
42
|
if ax is None:
|
43
43
|
ax = plt.gca()
|
@@ -53,7 +53,7 @@ def FuncStars(
|
|
53
53
|
if y_loc is None:
|
54
54
|
y_loc = np.min(ylim) + yscale * (np.max(ylim) - np.min(ylim))
|
55
55
|
else:
|
56
|
-
y_loc=y_loc+(1-yscale) * np.abs(np.diff(ylim))+0.1 *y_loc
|
56
|
+
y_loc = y_loc + (1 - yscale) * np.abs(np.diff(ylim)) + 0.1 * y_loc
|
57
57
|
xcenter = np.mean([x1, x2])
|
58
58
|
if pval is not None:
|
59
59
|
# ns / *
|
@@ -79,7 +79,7 @@ def FuncStars(
|
|
79
79
|
y_loc,
|
80
80
|
symbol,
|
81
81
|
ha="center",
|
82
|
-
va="top"
|
82
|
+
va="top", # "center_baseline",
|
83
83
|
fontsize=fontsize,
|
84
84
|
fontname=fontname,
|
85
85
|
color=symbolcolor,
|
@@ -90,7 +90,7 @@ def FuncStars(
|
|
90
90
|
y_loc,
|
91
91
|
symbol * 2,
|
92
92
|
ha="center",
|
93
|
-
va="top"
|
93
|
+
va="top", # "center_baseline",
|
94
94
|
fontsize=fontsize,
|
95
95
|
fontname=fontname,
|
96
96
|
color=symbolcolor,
|
@@ -101,7 +101,7 @@ def FuncStars(
|
|
101
101
|
y_loc,
|
102
102
|
symbol * 3,
|
103
103
|
ha="center",
|
104
|
-
va="top"
|
104
|
+
va="top", # "center_baseline",
|
105
105
|
fontsize=fontsize,
|
106
106
|
fontname=fontname,
|
107
107
|
color=symbolcolor,
|
@@ -111,7 +111,10 @@ def FuncStars(
|
|
111
111
|
# horizontal line
|
112
112
|
if yscale <= 0.99:
|
113
113
|
ax.plot(
|
114
|
-
[
|
114
|
+
[
|
115
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
116
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
117
|
+
],
|
115
118
|
[
|
116
119
|
y_loc - np.abs(np.diff(ylim)) * 0.03,
|
117
120
|
y_loc - np.abs(np.diff(ylim)) * 0.03,
|
@@ -122,7 +125,10 @@ def FuncStars(
|
|
122
125
|
)
|
123
126
|
# vertical line
|
124
127
|
ax.plot(
|
125
|
-
[
|
128
|
+
[
|
129
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
130
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
131
|
+
],
|
126
132
|
[
|
127
133
|
y_loc - np.abs(np.diff(ylim)) * tailindicator[0],
|
128
134
|
y_loc - np.abs(np.diff(ylim)) * 0.03,
|
@@ -132,7 +138,10 @@ def FuncStars(
|
|
132
138
|
linewidth=linewidth,
|
133
139
|
)
|
134
140
|
ax.plot(
|
135
|
-
[
|
141
|
+
[
|
142
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
143
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
144
|
+
],
|
136
145
|
[
|
137
146
|
y_loc - np.abs(np.diff(ylim)) * tailindicator[1],
|
138
147
|
y_loc - np.abs(np.diff(ylim)) * 0.03,
|
@@ -143,7 +152,10 @@ def FuncStars(
|
|
143
152
|
)
|
144
153
|
else:
|
145
154
|
ax.plot(
|
146
|
-
[
|
155
|
+
[
|
156
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
157
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
158
|
+
],
|
147
159
|
[
|
148
160
|
np.min(ylim)
|
149
161
|
+ 0.95 * (np.max(ylim) - np.min(ylim))
|
@@ -158,7 +170,10 @@ def FuncStars(
|
|
158
170
|
)
|
159
171
|
# vertical line
|
160
172
|
ax.plot(
|
161
|
-
[
|
173
|
+
[
|
174
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
175
|
+
x1 + np.abs(np.diff(xlim)) * 0.01,
|
176
|
+
],
|
162
177
|
[
|
163
178
|
np.min(ylim)
|
164
179
|
+ 0.95 * (np.max(ylim) - np.min(ylim))
|
@@ -172,7 +187,10 @@ def FuncStars(
|
|
172
187
|
linewidth=linewidth,
|
173
188
|
)
|
174
189
|
ax.plot(
|
175
|
-
[
|
190
|
+
[
|
191
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
192
|
+
x2 - np.abs(np.diff(xlim)) * 0.01,
|
193
|
+
],
|
176
194
|
[
|
177
195
|
np.min(ylim)
|
178
196
|
+ 0.95 * (np.max(ylim) - np.min(ylim))
|
@@ -312,9 +330,7 @@ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
|
|
312
330
|
)
|
313
331
|
notes_stat = "paired t test"
|
314
332
|
# note: APA FORMAT
|
315
|
-
notes_APA = (
|
316
|
-
f"t({sum([nX1-1])})={round(stat_value,3)},p={round(pval,3)}"
|
317
|
-
)
|
333
|
+
notes_APA = f"t({sum([nX1-1])})={round(stat_value,3)},p={round(pval,3)}"
|
318
334
|
elif cfg_pmc == "non-parametric":
|
319
335
|
if "np" in pair: # Perform Mann-Whitney
|
320
336
|
stat_value, pval = stats.mannwhitneyu(
|
@@ -324,7 +340,9 @@ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
|
|
324
340
|
if nX1 == nX2:
|
325
341
|
notes_APA = f"U(n={nX1})={round(stat_value,3)},p={round(pval,3)}"
|
326
342
|
else:
|
327
|
-
notes_APA =
|
343
|
+
notes_APA = (
|
344
|
+
f"U(n1={nX1},n2={nX2})={round(stat_value,3)},p={round(pval,3)}"
|
345
|
+
)
|
328
346
|
elif "pa" in pair and "np" not in pair: # Wilcoxon signed-rank test
|
329
347
|
stat_value, pval = stats.wilcoxon(
|
330
348
|
x1, x2, method="exact", nan_policy="omit"
|
@@ -333,7 +351,9 @@ def FuncCmpt(x1, x2, pmc="auto", pair="unpaired", verbose=True):
|
|
333
351
|
if nX1 == nX2:
|
334
352
|
notes_APA = f"Z(n={nX1})={round(stat_value,3)},p={round(pval,3)}"
|
335
353
|
else:
|
336
|
-
notes_APA =
|
354
|
+
notes_APA = (
|
355
|
+
f"Z(n1={nX1},n2={nX2})={round(stat_value,3)},p={round(pval,3)}"
|
356
|
+
)
|
337
357
|
|
338
358
|
# filling output
|
339
359
|
output["stat"] = stat_value
|
@@ -408,7 +428,7 @@ def FuncMultiCmpt(
|
|
408
428
|
subject=None,
|
409
429
|
group=None,
|
410
430
|
verbose=True,
|
411
|
-
post_hoc=False
|
431
|
+
post_hoc=False,
|
412
432
|
):
|
413
433
|
if group is None:
|
414
434
|
group = factor
|
@@ -520,12 +540,16 @@ def FuncMultiCmpt(
|
|
520
540
|
if "np" in cfg_pair: # 'unpaired'
|
521
541
|
res_tab = run_kruskal(data, dv, factor)
|
522
542
|
notes_stat = f"Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA"
|
523
|
-
notes_APA = [
|
543
|
+
notes_APA = [
|
544
|
+
f'H({res_tab.ddof1[0]},N={data.shape[0]})={round(res_tab.H[0],3)},p={round(res_tab["p-unc"][0],3)}'
|
545
|
+
]
|
524
546
|
|
525
547
|
elif "pa" in cfg_pair and "np" not in cfg_pair: # 'paired'
|
526
548
|
res_tab = run_friedman(data, dv, factor, subject, method="chisq")
|
527
549
|
notes_stat = f"Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA"
|
528
|
-
notes_APA = [
|
550
|
+
notes_APA = [
|
551
|
+
f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0],3)},p={round(res_tab["p-unc"][0],3)}'
|
552
|
+
]
|
529
553
|
|
530
554
|
# =============================================================================
|
531
555
|
# # Post-hoc
|
@@ -542,7 +566,7 @@ def FuncMultiCmpt(
|
|
542
566
|
go_mix_within = factor if ("pa" in cfg_pair) or ("np" not in cfg_pair) else None
|
543
567
|
|
544
568
|
if res_tab["p-unc"][0] <= 0.05:
|
545
|
-
post_hoc=True
|
569
|
+
post_hoc = True
|
546
570
|
if post_hoc:
|
547
571
|
# Pairwise Comparisons
|
548
572
|
method_post_hoc = [
|
@@ -610,9 +634,9 @@ def FuncMultiCmpt(
|
|
610
634
|
# # filling output
|
611
635
|
# =============================================================================
|
612
636
|
|
613
|
-
pd.set_option(
|
614
|
-
pd.set_option(
|
615
|
-
pd.set_option(
|
637
|
+
pd.set_option("display.max_columns", None) # Show all columns
|
638
|
+
pd.set_option("display.max_colwidth", None) # No limit on column width
|
639
|
+
pd.set_option("display.expand_frame_repr", False) # Prevent line-wrapping
|
616
640
|
|
617
641
|
output["stat"] = notes_stat
|
618
642
|
# print(output['APA'])
|
@@ -627,7 +651,7 @@ def FuncMultiCmpt(
|
|
627
651
|
def display_output(output: dict):
|
628
652
|
if isinstance(output, pd.DataFrame):
|
629
653
|
output = output.to_dict(orient="list")
|
630
|
-
# ['res_posthoc', 'stat', 'APA', 'pval', 'res_tab']
|
654
|
+
# ['res_posthoc', 'stat', 'APA', 'pval', 'res_tab']
|
631
655
|
|
632
656
|
# ? show APA
|
633
657
|
# print(f"\n\ndisplay stat_output")
|
@@ -641,7 +665,7 @@ def display_output(output: dict):
|
|
641
665
|
except:
|
642
666
|
pass
|
643
667
|
try:
|
644
|
-
print(f"APA ⤵\n{output[
|
668
|
+
print(f"APA ⤵\n{output['APA'][0]} ⤵\npost-hoc analysis ⤵")
|
645
669
|
display(output["res_posthoc"])
|
646
670
|
except:
|
647
671
|
pass
|
@@ -659,26 +683,27 @@ def corr_pair(pair):
|
|
659
683
|
|
660
684
|
|
661
685
|
def check_normality(data, verbose=True):
|
662
|
-
if len(data)<=5000:
|
663
|
-
# Shapiro-Wilk test is designed to test the normality of a small sample, typically less than 5000 observations.
|
686
|
+
if len(data) <= 5000:
|
687
|
+
# Shapiro-Wilk test is designed to test the normality of a small sample, typically less than 5000 observations.
|
664
688
|
stat_shapiro, pval4norm = stats.shapiro(data)
|
665
|
-
method=
|
689
|
+
method = "Shapiro-Wilk test"
|
666
690
|
else:
|
667
691
|
from scipy.stats import kstest, zscore
|
668
|
-
|
669
|
-
|
670
|
-
|
692
|
+
|
693
|
+
data_scaled = zscore(data) # a standard normal distribution(mean=0,sd=1)
|
694
|
+
stat_kstest, pval4norm = kstest(data_scaled, "norm")
|
695
|
+
method = "Kolmogorov–Smirnov test"
|
671
696
|
if pval4norm >= 0.05:
|
672
697
|
Normality = True
|
673
698
|
else:
|
674
699
|
Normality = False
|
675
700
|
if verbose:
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
701
|
+
print(f"'{method}' was used to test for normality")
|
702
|
+
(
|
703
|
+
print("\nnormally distributed")
|
704
|
+
if Normality
|
705
|
+
else print(f"\n NOT normally distributed\n")
|
706
|
+
)
|
682
707
|
return Normality
|
683
708
|
|
684
709
|
|
@@ -714,7 +739,7 @@ def extract_apa(res_tab):
|
|
714
739
|
for irow in range(res_tab.shape[0]):
|
715
740
|
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow],3)},p={round(res_tab["p-unc"][irow],3)}'
|
716
741
|
notes_APA.append(note_tmp)
|
717
|
-
elif "DF" in res_tab:
|
742
|
+
elif "DF" in res_tab:
|
718
743
|
for irow in range(res_tab.shape[0] - 1):
|
719
744
|
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow],3)},p={round(res_tab["p-unc"][irow],3)}'
|
720
745
|
notes_APA.append(note_tmp)
|
@@ -876,7 +901,7 @@ def df_wide_long(df):
|
|
876
901
|
elif rows > columns:
|
877
902
|
return "Long"
|
878
903
|
|
879
|
-
|
904
|
+
|
880
905
|
def sort_rows_move_nan(arr, sort=False):
|
881
906
|
# Handle edge cases where all values are NaN
|
882
907
|
if np.all(np.isnan(arr)):
|
@@ -917,7 +942,7 @@ def df2array(data: pd.DataFrame, x=None, y=None, hue=None, sort=False):
|
|
917
942
|
if hue is None:
|
918
943
|
a = []
|
919
944
|
if sort:
|
920
|
-
cat_x=np.sort(data[x].unique().tolist()).tolist()
|
945
|
+
cat_x = np.sort(data[x].unique().tolist()).tolist()
|
921
946
|
else:
|
922
947
|
cat_x = data[x].unique().tolist()
|
923
948
|
for i, x_ in enumerate(cat_x):
|
@@ -938,6 +963,7 @@ def df2array(data: pd.DataFrame, x=None, y=None, hue=None, sort=False):
|
|
938
963
|
a = padcat(a, new_, axis=0)
|
939
964
|
return sort_rows_move_nan(a).T
|
940
965
|
|
966
|
+
|
941
967
|
def array2df(data: np.ndarray):
|
942
968
|
df = pd.DataFrame()
|
943
969
|
df["group"] = (
|
@@ -949,6 +975,8 @@ def array2df(data: np.ndarray):
|
|
949
975
|
)
|
950
976
|
df["value"] = data.reshape(-1, 1, order="F")
|
951
977
|
return df
|
978
|
+
|
979
|
+
|
952
980
|
def padcat(*args, fill_value=np.nan, axis=1, order="row"):
|
953
981
|
"""
|
954
982
|
Concatenate vectors with padding.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: py2ls
|
3
|
-
Version: 0.2.4.9.
|
3
|
+
Version: 0.2.4.9.5
|
4
4
|
Summary: py(thon)2(too)ls
|
5
5
|
Author: Jianfeng
|
6
6
|
Author-email: Jianfeng.Liu0413@gmail.com
|
@@ -63,6 +63,7 @@ Requires-Dist: defusedxml (>=0.7.1)
|
|
63
63
|
Requires-Dist: distlib (>=0.3.8)
|
64
64
|
Requires-Dist: docopt (>=0.6.2)
|
65
65
|
Requires-Dist: docx (>=0.2.4)
|
66
|
+
Requires-Dist: duckduckgo-search (>=0.5.1,<0.6.0)
|
66
67
|
Requires-Dist: dulwich (>=0.21.7)
|
67
68
|
Requires-Dist: executing (>=2.0.1)
|
68
69
|
Requires-Dist: faiss-cpu (>=1.8.0.post1)
|
@@ -222,9 +222,9 @@ py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
|
|
222
222
|
py2ls/plot.py,sha256=LeQpTLvRHMDrQtU8yaeXEOgDdVm7KWLcAuRia6wWMYQ,167604
|
223
223
|
py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
|
224
224
|
py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso68,52145
|
225
|
-
py2ls/stats.py,sha256=
|
225
|
+
py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
|
226
226
|
py2ls/translator.py,sha256=zBeq4pYZeroqw3DT-5g7uHfVqKd-EQptT6LJ-Adi8JY,34244
|
227
227
|
py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
|
228
|
-
py2ls-0.2.4.9.
|
229
|
-
py2ls-0.2.4.9.
|
230
|
-
py2ls-0.2.4.9.
|
228
|
+
py2ls-0.2.4.9.5.dist-info/METADATA,sha256=9r2YCzit9C4tbEwsmPAC5uukE3vJ4w-jxnCw8b2flHo,20090
|
229
|
+
py2ls-0.2.4.9.5.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
230
|
+
py2ls-0.2.4.9.5.dist-info/RECORD,,
|
File without changes
|