py2ls 0.1.4.8__py3-none-any.whl → 0.1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/COMMIT_EDITMSG +1 -1
- py2ls/.git/FETCH_HEAD +1 -1
- py2ls/.git/index +0 -0
- py2ls/.git/logs/HEAD +3 -0
- py2ls/.git/logs/refs/heads/main +3 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +5 -0
- py2ls/.git/logs/refs/remotes/origin/main +3 -0
- py2ls/.git/objects/01/d5bd8065e6860c0bd23ff9fa57161806a099e1 +0 -0
- py2ls/.git/objects/09/08da26de58c114225ad81f484b80bf5d351b34 +0 -0
- py2ls/.git/objects/1c/3f92adda34344bcbbbf9d409c79855ae2aaea8 +2 -0
- py2ls/.git/objects/32/fd627b62fad7cf3b2f9e34ab9777126a0987ad +0 -0
- py2ls/.git/objects/39/7ead045fbbcfb17c62019eb18fe21ed05dbee5 +0 -0
- py2ls/.git/objects/4f/7afb40dff2153d857fc85748c2eecb85125042 +0 -0
- py2ls/.git/objects/62/4488173ed2c8936fa5cea3cf5dd3f26a30b86e +0 -0
- py2ls/.git/objects/6d/ee29dbdcc84edeeacede105110446f3ccac963 +0 -0
- py2ls/.git/objects/b7/2c9e75ab7d0afe594664650aa8f6c772f5ac64 +0 -0
- py2ls/.git/objects/bb/81ccc0513f18fc160b54a82861e9a80d23f4f6 +0 -0
- py2ls/.git/objects/cd/822b3574a88ebdd1ed82fd6983f37e626d52b4 +0 -0
- py2ls/.git/objects/d8/4688b54c0040a30976b3a6540bc47adf7ce680 +0 -0
- py2ls/.git/objects/f1/e50757fddc28b445545dc7e2759b54cdd0f42e +0 -0
- py2ls/.git/refs/heads/main +1 -1
- py2ls/.git/refs/remotes/origin/main +1 -1
- py2ls/__init__.py +2 -12
- py2ls/data/.DS_Store +0 -0
- py2ls/data/db2ls_sql_chtsht.json +39 -0
- py2ls/data/lang_code_iso639.json +97 -0
- py2ls/db2ls.py +356 -0
- py2ls/ips.py +160 -1190
- py2ls/stats.py +810 -0
- py2ls/stdshade.py +173 -0
- py2ls/translator.py +6 -99
- {py2ls-0.1.4.8.dist-info → py2ls-0.1.5.0.dist-info}/METADATA +1 -1
- {py2ls-0.1.4.8.dist-info → py2ls-0.1.5.0.dist-info}/RECORD +34 -16
- py2ls/dbhandler.py +0 -97
- {py2ls-0.1.4.8.dist-info → py2ls-0.1.5.0.dist-info}/WHEEL +0 -0
py2ls/ips.py
CHANGED
@@ -1,51 +1,47 @@
|
|
1
|
-
from scipy.ndimage import convolve1d
|
2
1
|
import numpy as np
|
3
2
|
import pandas as pd
|
3
|
+
|
4
4
|
import json
|
5
|
-
import matplotlib.pyplot as plt
|
6
|
-
import seaborn as sns
|
7
|
-
# import scienceplots
|
8
5
|
import matplotlib
|
9
|
-
import
|
10
|
-
import os
|
11
|
-
from scipy.signal import savgol_filter
|
12
|
-
import pingouin as pg
|
13
|
-
from scipy import stats
|
6
|
+
import matplotlib.pyplot as plt
|
14
7
|
import matplotlib.ticker as tck
|
8
|
+
from mpl_toolkits.mplot3d import Axes3D
|
9
|
+
# import seaborn as sns
|
10
|
+
|
11
|
+
import sys, os,shutil,re, yaml,json
|
15
12
|
from cycler import cycler
|
16
|
-
import
|
13
|
+
import time
|
14
|
+
from dateutil import parser
|
15
|
+
from datetime import datetime
|
16
|
+
|
17
17
|
from PIL import Image,ImageEnhance, ImageOps,ImageFilter
|
18
18
|
from rembg import remove,new_session
|
19
|
-
|
19
|
+
|
20
20
|
import docx
|
21
|
-
import pandas as pd
|
22
21
|
from fpdf import FPDF
|
23
|
-
import yaml
|
24
22
|
from lxml import etree
|
25
23
|
from docx import Document
|
26
24
|
from PyPDF2 import PdfReader
|
27
25
|
from pdf2image import convert_from_path, pdfinfo_from_path
|
28
|
-
from nltk.tokenize import sent_tokenize,word_tokenize
|
26
|
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
29
27
|
import nltk # nltk.download("punkt")
|
30
28
|
from docx2pdf import convert
|
31
29
|
import img2pdf as image2pdf
|
32
|
-
import
|
30
|
+
import nbformat
|
31
|
+
from nbconvert import MarkdownExporter
|
32
|
+
|
33
33
|
from itertools import pairwise
|
34
|
-
import time
|
35
34
|
from box import Box, BoxList
|
36
35
|
from numerizer import numerize
|
37
36
|
from tqdm import tqdm
|
38
37
|
import mimetypes
|
39
38
|
from pprint import pp
|
40
|
-
from dateutil import parser
|
41
|
-
from datetime import datetime
|
42
39
|
from collections import Counter
|
43
40
|
from fuzzywuzzy import fuzz,process
|
44
|
-
from py2ls import netfinder
|
45
41
|
from langdetect import detect
|
46
|
-
import shutil
|
47
42
|
from duckduckgo_search import DDGS
|
48
43
|
|
44
|
+
from py2ls import netfinder
|
49
45
|
|
50
46
|
dir_save='/Users/macjianfeng/Dropbox/Downloads/'
|
51
47
|
|
@@ -58,6 +54,45 @@ def rm_folder(folder_path, verbose=True):
|
|
58
54
|
if verbose:
|
59
55
|
print(f'Failed to delete {folder_path}. Reason: {e}')
|
60
56
|
|
57
|
+
def fremove(path, verbose=True):
|
58
|
+
"""
|
59
|
+
Remove a folder and all its contents or a single file.
|
60
|
+
Parameters:
|
61
|
+
path (str): The path to the folder or file to remove.
|
62
|
+
verbose (bool): If True, print success or failure messages. Default is True.
|
63
|
+
"""
|
64
|
+
try:
|
65
|
+
if os.path.isdir(path):
|
66
|
+
shutil.rmtree(path)
|
67
|
+
if verbose:
|
68
|
+
print(f'Successfully deleted folder {path}')
|
69
|
+
elif os.path.isfile(path):
|
70
|
+
os.remove(path)
|
71
|
+
if verbose:
|
72
|
+
print(f'Successfully deleted file {path}')
|
73
|
+
else:
|
74
|
+
if verbose:
|
75
|
+
print(f'Path {path} does not exist')
|
76
|
+
except Exception as e:
|
77
|
+
if verbose:
|
78
|
+
print(f'Failed to delete {path}. Reason: {e}')
|
79
|
+
|
80
|
+
|
81
|
+
def get_cwd(verbose:bool = True):
|
82
|
+
"""
|
83
|
+
get_cwd: to get the current working directory
|
84
|
+
Args:
|
85
|
+
verbose (bool, optional): to show which function is use. Defaults to True.
|
86
|
+
"""
|
87
|
+
try:
|
88
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
89
|
+
if verbose:
|
90
|
+
print("os.path.dirname(os.path.abspath(__file__)):", script_dir)
|
91
|
+
except NameError:
|
92
|
+
# This works in an interactive environment (like a Jupyter notebook)
|
93
|
+
script_dir = os.getcwd()
|
94
|
+
if verbose:
|
95
|
+
print("os.getcwd():", script_dir)
|
61
96
|
|
62
97
|
def search(query, limit=5, kind='text', output='df',verbose=False,download=True, dir_save=dir_save):
|
63
98
|
from duckduckgo_search import DDGS
|
@@ -157,102 +192,10 @@ def ai(*args, **kwargs):
|
|
157
192
|
return echo(**kwargs)
|
158
193
|
|
159
194
|
def detect_lang(text, output='lang',verbose=True):
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
'Albanian': 'sq',
|
165
|
-
'Amharic': 'am',
|
166
|
-
'Arabic': 'ar',
|
167
|
-
'Armenian': 'hy',
|
168
|
-
'Assamese': 'as',
|
169
|
-
# 'Avaric': 'av',
|
170
|
-
'Aymara': 'ay',
|
171
|
-
'Azerbaijani': 'az',
|
172
|
-
'Bashkir': 'ba',
|
173
|
-
'Basque': 'eu',
|
174
|
-
'Belarusian': 'be',
|
175
|
-
'Bislama': 'bi',
|
176
|
-
'Breton': 'br',
|
177
|
-
'Burmese': 'my',
|
178
|
-
'Catalan, Valencian': 'ca',
|
179
|
-
'Chamorro': 'ch',
|
180
|
-
'Chichewa, Chewa, Nyanja': 'ny',
|
181
|
-
'Chinese': 'zh',
|
182
|
-
'Corsican': 'co',
|
183
|
-
'Cree': 'cr',
|
184
|
-
'Croatian': 'hr',
|
185
|
-
'Danish': 'da',
|
186
|
-
'Dutch, Flemish': 'nl',
|
187
|
-
'Dzongkha': 'dz',
|
188
|
-
'English': 'en',
|
189
|
-
'Finnish': 'fi',
|
190
|
-
'French': 'fr',
|
191
|
-
'Galician': 'gl',
|
192
|
-
'Georgian': 'ka',
|
193
|
-
'German': 'de',
|
194
|
-
'Greek, Modern (1453–)': 'el',
|
195
|
-
'Gujarati': 'gu',
|
196
|
-
'Hausa': 'ha',
|
197
|
-
'Hebrew': 'he',
|
198
|
-
'Hindi': 'hi',
|
199
|
-
'Hungarian': 'hu',
|
200
|
-
'Icelandic': 'is',
|
201
|
-
'Italian': 'it',
|
202
|
-
'Kikuyu, Gikuyu': 'ki',
|
203
|
-
'Korean': 'ko',
|
204
|
-
'Kurdish': 'ku',
|
205
|
-
'Latin': 'la',
|
206
|
-
'Limburgan, Limburger, Limburgish': 'li',
|
207
|
-
'Luba-Katanga': 'lu',
|
208
|
-
'Macedonian': 'mk',
|
209
|
-
'Malay': 'ms',
|
210
|
-
'Nauru': 'na',
|
211
|
-
'North Ndebele': 'nd',
|
212
|
-
'Nepali': 'ne',
|
213
|
-
'Norwegian': 'no',
|
214
|
-
'Norwegian Nynorsk': 'nn',
|
215
|
-
'Sichuan Yi, Nuosu': 'ii',
|
216
|
-
'Occitan': 'oc',
|
217
|
-
'Ojibwa': 'oj',
|
218
|
-
'Oriya': 'or',
|
219
|
-
'Ossetian, Ossetic': 'os',
|
220
|
-
'Persian': 'fa',
|
221
|
-
'Punjabi, Panjabi': 'pa',
|
222
|
-
'Quechua': 'qu',
|
223
|
-
'Romanian, Moldavian, Moldovan': 'ro',
|
224
|
-
'Russian': 'ru',
|
225
|
-
'Samoan': 'sm',
|
226
|
-
'Sanskrit': 'sa',
|
227
|
-
'Serbian': 'sr',
|
228
|
-
'Shona': 'sn',
|
229
|
-
'Sinhala, Sinhalese': 'si',
|
230
|
-
'Slovenian': 'sl',
|
231
|
-
'Somali': 'so',
|
232
|
-
'Sundanese': 'su',
|
233
|
-
'Swahili': 'sw',
|
234
|
-
'Swati': 'ss',
|
235
|
-
'Tajik': 'tg',
|
236
|
-
'Tamil': 'ta',
|
237
|
-
'Telugu': 'te',
|
238
|
-
'Thai': 'th',
|
239
|
-
'Tibetan': 'bo',
|
240
|
-
'Tigrinya': 'ti',
|
241
|
-
'Tonga (Tonga Islands)': 'to',
|
242
|
-
'Tsonga': 'ts',
|
243
|
-
'Twi': 'tw',
|
244
|
-
'Ukrainian': 'uk',
|
245
|
-
'Urdu': 'ur',
|
246
|
-
'Uzbek': 'uz',
|
247
|
-
'Venda': 've',
|
248
|
-
'Vietnamese': 'vi',
|
249
|
-
'Volapük': 'vo',
|
250
|
-
'Welsh': 'cy',
|
251
|
-
'Wolof': 'wo',
|
252
|
-
'Xhosa': 'xh',
|
253
|
-
'Yiddish': 'yi',
|
254
|
-
'Yoruba': 'yo',
|
255
|
-
'Zulu': 'zu'}
|
195
|
+
dir_curr_script=os.path.dirname(os.path.abspath(__file__))
|
196
|
+
dir_lang_code=dir_curr_script+"/data/lang_code_iso639.json"
|
197
|
+
print(dir_curr_script,os.getcwd(),dir_lang_code)
|
198
|
+
lang_code_iso639=fload(dir_lang_code)
|
256
199
|
l_lang,l_code = [],[]
|
257
200
|
[[l_lang.append(v),l_code.append(k)] for v,k in lang_code_iso639.items()]
|
258
201
|
try:
|
@@ -340,21 +283,7 @@ def counter(list_, verbose=True):
|
|
340
283
|
# print(f"Return a list of the n most common elements:\n{c.most_common()}")
|
341
284
|
# print(f"Compute the sum of the counts:\n{c.total()}")
|
342
285
|
|
343
|
-
|
344
|
-
"""
|
345
|
-
Check if a string can be converted to a number (int or float).
|
346
|
-
Parameters:
|
347
|
-
- s (str): The string to check.
|
348
|
-
Returns:
|
349
|
-
- bool: True if the string can be converted to a number, False otherwise.
|
350
|
-
"""
|
351
|
-
try:
|
352
|
-
float(s) # Try converting the string to a float
|
353
|
-
return True
|
354
|
-
except ValueError:
|
355
|
-
return False
|
356
|
-
def isnum(s):
|
357
|
-
return is_num(s)
|
286
|
+
|
358
287
|
|
359
288
|
def str2time(time_str, fmt='24'):
|
360
289
|
"""
|
@@ -600,6 +529,7 @@ def paper_size(paper_type_str='a4'):
|
|
600
529
|
if not paper_type:
|
601
530
|
paper_type='a4' # default
|
602
531
|
return df[paper_type].tolist()
|
532
|
+
|
603
533
|
def docx2pdf(dir_docx, dir_pdf=None):
|
604
534
|
if dir_pdf:
|
605
535
|
convert(dir_docx,dir_pdf)
|
@@ -815,7 +745,7 @@ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
|
|
815
745
|
df_dir_img_single_page = pd.DataFrame()
|
816
746
|
dir_single_page = []
|
817
747
|
if verbose:
|
818
|
-
|
748
|
+
pp(pdfinfo_from_path(dir_pdf))
|
819
749
|
if isinstance(page, tuple) and page:
|
820
750
|
page = list(page)
|
821
751
|
if isinstance(page,int):
|
@@ -888,7 +818,14 @@ def fload(fpath, kind=None, **kwargs):
|
|
888
818
|
def load_xlsx(fpath, **kwargs):
|
889
819
|
df = pd.read_excel(fpath, **kwargs)
|
890
820
|
return df
|
891
|
-
|
821
|
+
def load_ipynb(fpath,**kwargs):
|
822
|
+
as_version=kwargs.get("as_version",4)
|
823
|
+
with open(fpath, "r") as file:
|
824
|
+
nb = nbformat.read(file, as_version=as_version)
|
825
|
+
md_exporter = MarkdownExporter()
|
826
|
+
md_body, _ = md_exporter.from_notebook_node(nb)
|
827
|
+
return md_body
|
828
|
+
|
892
829
|
def load_pdf(fpath, page='all', verbose=False, **kwargs):
|
893
830
|
"""
|
894
831
|
Parameters:
|
@@ -950,7 +887,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
950
887
|
|
951
888
|
kind = kind.lstrip('.').lower()
|
952
889
|
img_types=[ 'bmp','eps', 'gif', 'icns', 'ico', 'im', 'jpg','jpeg', 'jpeg2000','msp', 'pcx', 'png', 'ppm', 'sgi', 'spider', 'tga','tiff','webp',"json"]
|
953
|
-
doc_types = ["docx", "txt", "md", "html", "json", "yaml", "xml", "csv", "xlsx", "pdf"]
|
890
|
+
doc_types = ["docx", "txt", "md", "html", "json", "yaml", "xml", "csv", "xlsx", "pdf","ipynb"]
|
954
891
|
supported_types = [*doc_types, *img_types]
|
955
892
|
if kind not in supported_types:
|
956
893
|
raise ValueError(f"Error:\n{kind} is not in the supported list {supported_types}")
|
@@ -970,6 +907,8 @@ def fload(fpath, kind=None, **kwargs):
|
|
970
907
|
return load_csv(fpath, **kwargs)
|
971
908
|
elif kind == "xlsx":
|
972
909
|
return load_xlsx(fpath, **kwargs)
|
910
|
+
elif kind == "ipynb":
|
911
|
+
return load_ipynb(fpath, **kwargs)
|
973
912
|
elif kind == "pdf":
|
974
913
|
print('usage:load_pdf(fpath, page="all", verbose=False)')
|
975
914
|
return load_pdf(fpath, **kwargs)
|
@@ -1093,7 +1032,25 @@ def fsave(
|
|
1093
1032
|
df = pd.DataFrame(data)
|
1094
1033
|
df.to_excel(fpath, **kwargs)
|
1095
1034
|
|
1035
|
+
def save_ipynb(fpath,data,**kwargs):
|
1036
|
+
# Split the content by code fences to distinguish between code and markdown
|
1037
|
+
parts = data.split('```')
|
1038
|
+
cells = []
|
1096
1039
|
|
1040
|
+
for i, part in enumerate(parts):
|
1041
|
+
if i % 2 == 0:
|
1042
|
+
# Even index: markdown content
|
1043
|
+
cells.append(nbf.v4.new_markdown_cell(part.strip()))
|
1044
|
+
else:
|
1045
|
+
# Odd index: code content
|
1046
|
+
cells.append(nbf.v4.new_code_cell(part.strip()))
|
1047
|
+
# Create a new notebook
|
1048
|
+
nb = nbformat.v4.new_notebook()
|
1049
|
+
nb['cells'] = cells
|
1050
|
+
# Write the notebook to a file
|
1051
|
+
with open(fpath, 'w', encoding='utf-8') as ipynb_file:
|
1052
|
+
nbf.write(fpath, ipynb_file)
|
1053
|
+
|
1097
1054
|
# def save_json(fpath, data, **kwargs):
|
1098
1055
|
# with open(fpath, "w") as file:
|
1099
1056
|
# json.dump(data, file, **kwargs)
|
@@ -1152,6 +1109,7 @@ def fsave(
|
|
1152
1109
|
"json",
|
1153
1110
|
"xml",
|
1154
1111
|
"yaml",
|
1112
|
+
"ipynb"
|
1155
1113
|
]:
|
1156
1114
|
print(
|
1157
1115
|
f"Warning:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
@@ -1168,19 +1126,17 @@ def fsave(
|
|
1168
1126
|
elif kind == "pdf":
|
1169
1127
|
save_pdf(fpath, content, font_name, font_size)
|
1170
1128
|
elif kind == "csv":
|
1171
|
-
save_csv(
|
1172
|
-
fpath, content, **kwargs
|
1173
|
-
) # Assuming content is in tabular form (list of dicts or DataFrame)
|
1129
|
+
save_csv(fpath, content, **kwargs)
|
1174
1130
|
elif kind == "xlsx":
|
1175
|
-
save_xlsx(
|
1176
|
-
fpath, content, **kwargs
|
1177
|
-
) # Assuming content is in tabular form (list of dicts or DataFrame)
|
1131
|
+
save_xlsx(fpath, content, **kwargs)
|
1178
1132
|
elif kind == "json":
|
1179
|
-
save_json(fpath, content)
|
1133
|
+
save_json(fpath, content)
|
1180
1134
|
elif kind == "xml":
|
1181
|
-
save_xml(fpath, content)
|
1135
|
+
save_xml(fpath, content)
|
1182
1136
|
elif kind == "yaml":
|
1183
|
-
save_yaml(fpath, content, **kwargs)
|
1137
|
+
save_yaml(fpath, content, **kwargs)
|
1138
|
+
elif kind == "ipynb":
|
1139
|
+
save_ipynb(fpath, content, **kwargs)
|
1184
1140
|
else:
|
1185
1141
|
try:
|
1186
1142
|
netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
|
@@ -1285,42 +1241,19 @@ def isa(*args,**kwargs):
|
|
1285
1241
|
elif 'zip' in contains.lower():
|
1286
1242
|
return is_zip(fpath)
|
1287
1243
|
elif 'dir' in contains.lower() or ('f' in contains.lower() and 'd' in contains.lower()):
|
1288
|
-
return
|
1244
|
+
return os.path.isdir(fpath)
|
1289
1245
|
elif 'fi' in contains.lower():#file
|
1290
1246
|
return os.path.isfile(fpath)
|
1247
|
+
elif 'num' in contains.lower():#file
|
1248
|
+
return os.path.isfile(fpath)
|
1249
|
+
elif 'text' in contains.lower() or 'txt' in contains.lower():#file
|
1250
|
+
return is_text(fpath)
|
1251
|
+
elif 'color' in contains.lower():#file
|
1252
|
+
return is_str_color(fpath)
|
1291
1253
|
else:
|
1292
1254
|
print(f"{contains} was not set up correctly")
|
1293
1255
|
return False
|
1294
1256
|
|
1295
|
-
def is_image(fpath):
|
1296
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
1297
|
-
if mime_type and mime_type.startswith('image'):
|
1298
|
-
return True
|
1299
|
-
else:
|
1300
|
-
return False
|
1301
|
-
|
1302
|
-
def is_document(fpath):
|
1303
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
1304
|
-
if mime_type and (
|
1305
|
-
mime_type.startswith('text/') or
|
1306
|
-
mime_type == 'application/pdf' or
|
1307
|
-
mime_type == 'application/msword' or
|
1308
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
1309
|
-
mime_type == 'application/vnd.ms-excel' or
|
1310
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
1311
|
-
mime_type == 'application/vnd.ms-powerpoint' or
|
1312
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
1313
|
-
):
|
1314
|
-
return True
|
1315
|
-
else:
|
1316
|
-
return False
|
1317
|
-
|
1318
|
-
def is_zip(fpath):
|
1319
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
1320
|
-
if mime_type == 'application/zip':
|
1321
|
-
return True
|
1322
|
-
else:
|
1323
|
-
return False
|
1324
1257
|
def listdir(
|
1325
1258
|
rootdir,
|
1326
1259
|
kind="folder",
|
@@ -1428,21 +1361,15 @@ def list_func(lib_name, opt="call"):
|
|
1428
1361
|
funcs = dir(lib_name)
|
1429
1362
|
return funcs
|
1430
1363
|
def func_list(lib_name, opt="call"):
|
1431
|
-
|
1432
|
-
funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
|
1433
|
-
else:
|
1434
|
-
funcs = dir(lib_name)
|
1435
|
-
return funcs
|
1364
|
+
return list_func(lib_name, opt=opt)
|
1436
1365
|
|
1437
1366
|
def newfolder(*args, **kwargs):
|
1438
1367
|
"""
|
1439
1368
|
newfolder(pardir, chdir)
|
1440
|
-
|
1441
1369
|
Args:
|
1442
1370
|
pardir (dir): parent dir
|
1443
1371
|
chdir (str): children dir
|
1444
1372
|
overwrite (bool): overwrite?
|
1445
|
-
|
1446
1373
|
Returns:
|
1447
1374
|
mkdir, giving a option if exists_ok or not
|
1448
1375
|
"""
|
@@ -1501,33 +1428,29 @@ def newfolder(*args, **kwargs):
|
|
1501
1428
|
return rootdir
|
1502
1429
|
|
1503
1430
|
def figsave(*args,dpi=300):
|
1504
|
-
|
1431
|
+
dir_save = None
|
1505
1432
|
fname = None
|
1506
|
-
|
1507
1433
|
for arg in args:
|
1508
1434
|
if isinstance(arg, str):
|
1509
1435
|
if '/' in arg or '\\' in arg:
|
1510
|
-
|
1436
|
+
dir_save = arg
|
1511
1437
|
elif '/' not in arg and '\\' not in arg:
|
1512
1438
|
fname = arg
|
1513
|
-
|
1514
1439
|
# Backup original values
|
1515
|
-
if '/' in
|
1516
|
-
if
|
1517
|
-
|
1518
|
-
elif '\\' in
|
1519
|
-
if
|
1520
|
-
|
1440
|
+
if '/' in dir_save:
|
1441
|
+
if dir_save[-1] != '/':
|
1442
|
+
dir_save = dir_save + '/'
|
1443
|
+
elif '\\' in dir_save:
|
1444
|
+
if dir_save[-1] != '\\':
|
1445
|
+
dir_save = dir_save + '\\'
|
1521
1446
|
else:
|
1522
|
-
raise ValueError('Check the Path of
|
1523
|
-
|
1447
|
+
raise ValueError('Check the Path of dir_save Directory')
|
1524
1448
|
ftype = fname.split('.')[-1]
|
1525
1449
|
if len(fname.split('.')) == 1:
|
1526
1450
|
ftype = 'nofmt'
|
1527
|
-
fname =
|
1451
|
+
fname = dir_save + fname + '.' + ftype
|
1528
1452
|
else:
|
1529
|
-
fname =
|
1530
|
-
|
1453
|
+
fname = dir_save + fname
|
1531
1454
|
# Save figure based on file type
|
1532
1455
|
if ftype.lower() == 'eps':
|
1533
1456
|
plt.savefig(fname, format='eps', bbox_inches='tight')
|
@@ -1552,295 +1475,55 @@ def figsave(*args,dpi=300):
|
|
1552
1475
|
plt.savefig(fname, format='emf', dpi=dpi, bbox_inches='tight')
|
1553
1476
|
elif ftype.lower() == 'fig':
|
1554
1477
|
plt.savefig(fname, format='pdf', bbox_inches='tight',dpi=dpi)
|
1555
|
-
|
1556
1478
|
print(f'\nSaved @: dpi={dpi}\n{fname}')
|
1557
1479
|
|
1558
1480
|
|
1559
|
-
# ==============FuncStars(ax,x1=1,x2=2, yscale=0.9, pval=0.01)====================================================
|
1560
|
-
# Usage:
|
1561
|
-
# FuncStars(ax, x1=2, x2=3, yscale=0.99, pval=0.02)
|
1562
|
-
# =============================================================================
|
1563
|
-
|
1564
|
-
# FuncStars --v 0.1.1
|
1565
|
-
def FuncStars(ax,
|
1566
|
-
pval=None,
|
1567
|
-
Ylim=None,
|
1568
|
-
Xlim=None,
|
1569
|
-
symbol='*',
|
1570
|
-
yscale=0.95,
|
1571
|
-
x1=0,
|
1572
|
-
x2=1,
|
1573
|
-
alpha=0.05,
|
1574
|
-
fontsize=14,
|
1575
|
-
fontsize_note=6,
|
1576
|
-
rotation=0,
|
1577
|
-
fontname='Arial',
|
1578
|
-
values_below=None,
|
1579
|
-
linego=True,
|
1580
|
-
linestyle='-',
|
1581
|
-
linecolor='k',
|
1582
|
-
linewidth=.8,
|
1583
|
-
nsshow='off',
|
1584
|
-
symbolcolor='k',
|
1585
|
-
tailindicator=[0.06, 0.06],
|
1586
|
-
report=None,
|
1587
|
-
report_scale=-0.1,
|
1588
|
-
report_loc=None):
|
1589
|
-
|
1590
|
-
|
1591
|
-
if ax is None:
|
1592
|
-
ax = plt.gca()
|
1593
|
-
if Ylim is None:
|
1594
|
-
Ylim = plt.gca().get_ylim()
|
1595
|
-
if Xlim is None:
|
1596
|
-
Xlim = ax.get_xlim()
|
1597
|
-
if report_loc is None and report is not None:
|
1598
|
-
report_loc = np.min(Ylim) + report_scale*np.abs(np.diff(Ylim))
|
1599
|
-
if report_scale > 0:
|
1600
|
-
report_scale = -np.abs(report_scale)
|
1601
|
-
|
1602
|
-
yscale = np.float64(yscale)
|
1603
|
-
y_loc = np.min(Ylim) + yscale*(np.max(Ylim)-np.min(Ylim))
|
1604
|
-
xcenter = np.mean([x1, x2])
|
1605
|
-
|
1606
|
-
# ns / *
|
1607
|
-
if alpha < pval:
|
1608
|
-
if nsshow == 'on':
|
1609
|
-
ns_str = f'p={round(pval, 3)}' if pval < 0.9 else 'ns'
|
1610
|
-
color = 'm' if pval < 0.1 else 'k'
|
1611
|
-
plt.text(xcenter, y_loc, ns_str,
|
1612
|
-
ha='center', va='bottom', # 'center_baseline',
|
1613
|
-
fontsize=fontsize-6 if fontsize > 6 else fontsize,
|
1614
|
-
fontname=fontname, color=color, rotation=rotation
|
1615
|
-
# bbox=dict(facecolor=None, edgecolor=None, color=None, linewidth=None)
|
1616
|
-
)
|
1617
|
-
elif 0.01 < pval <= alpha:
|
1618
|
-
plt.text(xcenter, y_loc, symbol,
|
1619
|
-
ha='center', va='center_baseline',
|
1620
|
-
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1621
|
-
elif 0.001 < pval <= 0.01:
|
1622
|
-
plt.text(xcenter, y_loc, symbol * 2,
|
1623
|
-
ha='center', va='center_baseline',
|
1624
|
-
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1625
|
-
elif 0 < pval <= 0.001:
|
1626
|
-
plt.text(xcenter, y_loc, symbol * 3,
|
1627
|
-
ha='center', va='center_baseline',
|
1628
|
-
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1629
|
-
|
1630
|
-
# lines indicators
|
1631
|
-
if linego: # and 0 < pval <= 0.05:
|
1632
|
-
print(pval)
|
1633
|
-
print(linego)
|
1634
|
-
# horizontal line
|
1635
|
-
if yscale < 0.99:
|
1636
|
-
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1637
|
-
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1638
|
-
[y_loc - np.abs(np.diff(Ylim)) * .03,
|
1639
|
-
y_loc - np.abs(np.diff(Ylim)) * .03],
|
1640
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1641
|
-
# vertical line
|
1642
|
-
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1643
|
-
x1 + np.abs(np.diff(Xlim)) * 0.01],
|
1644
|
-
[y_loc - np.abs(np.diff(Ylim)) * tailindicator[0],
|
1645
|
-
y_loc - np.abs(np.diff(Ylim)) * .03],
|
1646
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1647
|
-
plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
|
1648
|
-
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1649
|
-
[y_loc - np.abs(np.diff(Ylim)) * tailindicator[1],
|
1650
|
-
y_loc - np.abs(np.diff(Ylim)) * .03],
|
1651
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1652
|
-
else:
|
1653
|
-
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1654
|
-
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1655
|
-
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002,
|
1656
|
-
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1657
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1658
|
-
# vertical line
|
1659
|
-
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1660
|
-
x1 + np.abs(np.diff(Xlim)) * 0.01],
|
1661
|
-
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[0],
|
1662
|
-
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1663
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1664
|
-
plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
|
1665
|
-
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1666
|
-
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[1],
|
1667
|
-
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1668
|
-
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1669
|
-
|
1670
|
-
if values_below is not None:
|
1671
|
-
plt.text(xcenter, y_loc * (-0.1), values_below,
|
1672
|
-
ha='center', va='bottom', # 'center_baseline', rotation=rotation,
|
1673
|
-
fontsize=fontsize_note, fontname=fontname, color='k')
|
1674
|
-
|
1675
|
-
# report / comments
|
1676
|
-
if report is not None:
|
1677
|
-
plt.text(xcenter, report_loc, report,
|
1678
|
-
ha='left', va='bottom', # 'center_baseline', rotation=rotation,
|
1679
|
-
fontsize=fontsize_note, fontname=fontname, color='.7')
|
1680
1481
|
def is_str_color(s):
|
1681
1482
|
# Regular expression pattern for hexadecimal color codes
|
1682
1483
|
color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
|
1683
1484
|
return re.match(color_code_pattern, s) is not None
|
1684
|
-
|
1685
|
-
|
1686
|
-
if (
|
1687
|
-
|
1688
|
-
|
1689
|
-
|
1690
|
-
|
1691
|
-
|
1692
|
-
|
1693
|
-
|
1694
|
-
|
1695
|
-
|
1696
|
-
|
1697
|
-
|
1698
|
-
|
1699
|
-
|
1700
|
-
|
1701
|
-
|
1702
|
-
|
1703
|
-
l_style2 = ["--", "-."]
|
1704
|
-
l_style1 = ["-", ":"]
|
1705
|
-
l_mark = ["o", "+", "*", ".", "x", "_", "|", "s", "d", "^", "v", ">", "<", "p", "h"]
|
1706
|
-
|
1707
|
-
# Check each argument
|
1708
|
-
for iarg in range(len(args)):
|
1709
|
-
if (
|
1710
|
-
isinstance(args[iarg], np.ndarray)
|
1711
|
-
and args[iarg].ndim == 2
|
1712
|
-
and min(args[iarg].shape) > 1
|
1713
|
-
and max(args[iarg].shape) > 1
|
1714
|
-
):
|
1715
|
-
y = args[iarg]
|
1716
|
-
# Except y, continuous data is 'F'
|
1717
|
-
if (isinstance(args[iarg], np.ndarray) and args[iarg].ndim == 1) or isinstance(
|
1718
|
-
args[iarg], range
|
1719
|
-
):
|
1720
|
-
x = args[iarg]
|
1721
|
-
if isinstance(x, range):
|
1722
|
-
x = np.arange(start=x.start, stop=x.stop, step=x.step)
|
1723
|
-
# Only one number( 0~1), 'alpha' / color
|
1724
|
-
if isinstance(args[iarg], (int, float)):
|
1725
|
-
if np.size(args[iarg]) == 1 and 0 <= args[iarg] <= 1:
|
1726
|
-
alpha = args[iarg]
|
1727
|
-
if isinstance(args[iarg], (list, tuple)) and np.size(args[iarg]) == 3:
|
1728
|
-
acolor = args[iarg]
|
1729
|
-
acolor = tuple(acolor) if isinstance(acolor, list) else acolor
|
1730
|
-
# Color / plotStyle /
|
1731
|
-
if (
|
1732
|
-
isinstance(args[iarg], str)
|
1733
|
-
and len(args[iarg]) == 1
|
1734
|
-
and args[iarg] in l_c_one
|
1735
|
-
):
|
1736
|
-
acolor = args[iarg]
|
1737
|
-
else:
|
1738
|
-
if isinstance(args[iarg], str):
|
1739
|
-
if args[iarg] in ["sem", "std"]:
|
1740
|
-
paraStdSem = args[iarg]
|
1741
|
-
if args[iarg].startswith("#"):
|
1742
|
-
acolor=hue2rgb(args[iarg])
|
1743
|
-
if str2list(args[iarg])[0] in l_c_one:
|
1744
|
-
if len(args[iarg]) == 3:
|
1745
|
-
k = [i for i in str2list(args[iarg]) if i in l_c_one]
|
1746
|
-
if k != []:
|
1747
|
-
acolor = k[0]
|
1748
|
-
st = [i for i in l_style2 if i in args[iarg]]
|
1749
|
-
if st != []:
|
1750
|
-
plotStyle = st[0]
|
1751
|
-
elif len(args[iarg]) == 2:
|
1752
|
-
k = [i for i in str2list(args[iarg]) if i in l_c_one]
|
1753
|
-
if k != []:
|
1754
|
-
acolor = k[0]
|
1755
|
-
mk = [i for i in str2list(args[iarg]) if i in l_mark]
|
1756
|
-
if mk != []:
|
1757
|
-
plotMarker = mk[0]
|
1758
|
-
st = [i for i in l_style1 if i in args[iarg]]
|
1759
|
-
if st != []:
|
1760
|
-
plotStyle = st[0]
|
1761
|
-
if len(args[iarg]) == 1:
|
1762
|
-
k = [i for i in str2list(args[iarg]) if i in l_c_one]
|
1763
|
-
if k != []:
|
1764
|
-
acolor = k[0]
|
1765
|
-
mk = [i for i in str2list(args[iarg]) if i in l_mark]
|
1766
|
-
if mk != []:
|
1767
|
-
plotMarker = mk[0]
|
1768
|
-
st = [i for i in l_style1 if i in args[iarg]]
|
1769
|
-
if st != []:
|
1770
|
-
plotStyle = st[0]
|
1771
|
-
if len(args[iarg]) == 2:
|
1772
|
-
st = [i for i in l_style2 if i in args[iarg]]
|
1773
|
-
if st != []:
|
1774
|
-
plotStyle = st[0]
|
1775
|
-
# smth
|
1776
|
-
if (
|
1777
|
-
isinstance(args[iarg], (int, float))
|
1778
|
-
and np.size(args[iarg]) == 1
|
1779
|
-
and args[iarg] >= 1
|
1780
|
-
):
|
1781
|
-
smth = args[iarg]
|
1782
|
-
|
1783
|
-
if "x" not in locals() or x is None:
|
1784
|
-
x = np.arange(1, y.shape[1] + 1)
|
1785
|
-
elif len(x) < y.shape[1]:
|
1786
|
-
y = y[:, x]
|
1787
|
-
nRow = y.shape[0]
|
1788
|
-
nCol = y.shape[1]
|
1789
|
-
print(f"y was corrected, please confirm that {nRow} row, {nCol} col")
|
1485
|
+
def is_num(s):
|
1486
|
+
"""
|
1487
|
+
Check if a string can be converted to a number (int or float).
|
1488
|
+
Parameters:
|
1489
|
+
- s (str): The string to check.
|
1490
|
+
Returns:
|
1491
|
+
- bool: True if the string can be converted to a number, False otherwise.
|
1492
|
+
"""
|
1493
|
+
try:
|
1494
|
+
float(s) # Try converting the string to a float
|
1495
|
+
return True
|
1496
|
+
except ValueError:
|
1497
|
+
return False
|
1498
|
+
def isnum(s):
|
1499
|
+
return is_num(s)
|
1500
|
+
def is_image(fpath):
|
1501
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1502
|
+
if mime_type and mime_type.startswith('image'):
|
1503
|
+
return True
|
1790
1504
|
else:
|
1791
|
-
|
1792
|
-
|
1793
|
-
|
1794
|
-
|
1795
|
-
|
1796
|
-
|
1797
|
-
|
1505
|
+
return False
|
1506
|
+
def is_document(fpath):
|
1507
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1508
|
+
if mime_type and (
|
1509
|
+
mime_type.startswith('text/') or
|
1510
|
+
mime_type == 'application/pdf' or
|
1511
|
+
mime_type == 'application/msword' or
|
1512
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
1513
|
+
mime_type == 'application/vnd.ms-excel' or
|
1514
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
1515
|
+
mime_type == 'application/vnd.ms-powerpoint' or
|
1516
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
1517
|
+
):
|
1518
|
+
return True
|
1798
1519
|
else:
|
1799
|
-
|
1800
|
-
|
1801
|
-
|
1802
|
-
|
1803
|
-
|
1804
|
-
wings = np.nanstd(y, axis=0) / np.sqrt(y.shape[0])
|
1805
|
-
elif paraStdSem == "std":
|
1806
|
-
if smth > 1:
|
1807
|
-
wings = savgol_filter(np.nanstd(y, axis=0), smth, 1)
|
1808
|
-
else:
|
1809
|
-
wings = np.nanstd(y, axis=0)
|
1810
|
-
|
1811
|
-
fill_kws = kwargs.get('fill_kws', {})
|
1812
|
-
line_kws = kwargs.get('line_kws', {})
|
1813
|
-
fill = ax.fill_between(x, yMean + wings, yMean - wings, color=acolor, alpha=alpha, lw=0,**fill_kws)
|
1814
|
-
if line_kws != {} and not any(key.lower() in ['lw', 'linewidth'] for key in line_kws.keys()):
|
1815
|
-
line = ax.plot(x, yMean, color=acolor, lw=1.5, ls=plotStyle, marker=plotMarker, **line_kws)
|
1520
|
+
return False
|
1521
|
+
def is_zip(fpath):
|
1522
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1523
|
+
if mime_type == 'application/zip':
|
1524
|
+
return True
|
1816
1525
|
else:
|
1817
|
-
|
1818
|
-
return line[0], fill
|
1819
|
-
|
1820
|
-
|
1821
|
-
# =============================================================================
|
1822
|
-
# # for plot figures {Qiu et al.2023}
|
1823
|
-
# =============================================================================
|
1824
|
-
# =============================================================================
|
1825
|
-
# plt.rcParams.update({'figure.max_open_warning': 0})
|
1826
|
-
# # Output matplotlib figure to SVG with text as text, not curves
|
1827
|
-
# plt.rcParams['svg.fonttype'] = 'none'
|
1828
|
-
# plt.rcParams['pdf.fonttype'] = 42
|
1829
|
-
#
|
1830
|
-
# plt.rc('text', usetex=False)
|
1831
|
-
# # plt.style.use('ggplot')
|
1832
|
-
# plt.style.use('science')
|
1833
|
-
# plt.rc('font', family='serif')
|
1834
|
-
# plt.rcParams.update({
|
1835
|
-
# "font.family": "serif", # specify font family here
|
1836
|
-
# "font.serif": ["Arial"], # specify font here
|
1837
|
-
# "font.size": 11})
|
1838
|
-
# # plt.tight_layout()
|
1839
|
-
# =============================================================================
|
1840
|
-
# =============================================================================
|
1841
|
-
# # axis spine
|
1842
|
-
# # use it like: adjust_spines(ax, ['left', 'bottom'])
|
1843
|
-
# =============================================================================
|
1526
|
+
return False
|
1844
1527
|
|
1845
1528
|
|
1846
1529
|
def adjust_spines(ax=None, spines=['left', 'bottom'],distance=2):
|
@@ -1883,692 +1566,6 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
|
|
1883
1566
|
# =============================================================================
|
1884
1567
|
|
1885
1568
|
|
1886
|
-
def FuncCmpt(X1, X2, pmc='auto', pair='unpaired'):
|
1887
|
-
# output = {}
|
1888
|
-
|
1889
|
-
# pmc correction: 'parametric'/'non-parametric'/'auto'
|
1890
|
-
# meawhile get the opposite setting (to compare the results)
|
1891
|
-
def corr_pmc(pmc):
|
1892
|
-
cfg_pmc = None
|
1893
|
-
if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'npmc', 'nonparametric', 'non-parametric'}:
|
1894
|
-
cfg_pmc = 'parametric'
|
1895
|
-
elif pmc.lower() in {'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
|
1896
|
-
cfg_pmc = 'non-parametric'
|
1897
|
-
else:
|
1898
|
-
cfg_pmc = 'auto'
|
1899
|
-
return cfg_pmc
|
1900
|
-
|
1901
|
-
def corr_pair(pair):
|
1902
|
-
cfg_pair = None
|
1903
|
-
if 'pa' in pair.lower() and 'np' not in pair.lower():
|
1904
|
-
cfg_pair = 'paired'
|
1905
|
-
elif 'np' in pair.lower():
|
1906
|
-
cfg_pair = 'unpaired'
|
1907
|
-
return cfg_pair
|
1908
|
-
|
1909
|
-
def check_normality(data):
|
1910
|
-
stat_shapiro, pval_shapiro = stats.shapiro(data)
|
1911
|
-
if pval_shapiro > 0.05:
|
1912
|
-
Normality = True
|
1913
|
-
else:
|
1914
|
-
Normality = False
|
1915
|
-
print(f'\n normally distributed\n') if Normality else print(
|
1916
|
-
f'\n NOT normally distributed\n')
|
1917
|
-
return Normality
|
1918
|
-
|
1919
|
-
def sub_cmpt_2group(X1, X2, cfg_pmc='pmc', pair='unpaired'):
|
1920
|
-
output = {}
|
1921
|
-
nX1 = np.sum(~np.isnan(X1))
|
1922
|
-
nX2 = np.sum(~np.isnan(X2))
|
1923
|
-
if cfg_pmc == 'parametric' or cfg_pmc == 'auto':
|
1924
|
-
# VarType correction by checking variance Type via "levene"
|
1925
|
-
stat_lev, pval_lev = stats.levene(
|
1926
|
-
X1, X2, center='median', proportiontocut=0.05)
|
1927
|
-
VarType = True if pval_lev > 0.05 and nX1 == nX2 else False
|
1928
|
-
|
1929
|
-
if 'np' in pair: # 'unpaired'
|
1930
|
-
if VarType and Normality:
|
1931
|
-
# The independent t-test requires that the dependent variable is approximately normally
|
1932
|
-
# distributed within each group
|
1933
|
-
# Note: Technically, it is the residuals that need to be normally distributed, but for
|
1934
|
-
# an independent t-test, both will give you the same result.
|
1935
|
-
stat_value, pval= stats.ttest_ind(
|
1936
|
-
X1, X2, axis=0, equal_var=True, nan_policy='omit', alternative='two-sided')
|
1937
|
-
notes_stat = 'unpaired t test'
|
1938
|
-
notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1939
|
-
else:
|
1940
|
-
# If the Levene's Test for Equality of Variances is statistically significant,
|
1941
|
-
# which indicates that the group variances are unequal in the population, you
|
1942
|
-
# can correct for this violation by not using the pooled estimate for the error
|
1943
|
-
# term for the t-statistic, but instead using an adjustment to the degrees of
|
1944
|
-
# freedom using the Welch-Satterthwaite method
|
1945
|
-
stat_value, pval= stats.ttest_ind(
|
1946
|
-
X1, X2, axis=0, equal_var=False, nan_policy='omit', alternative='two-sided')
|
1947
|
-
notes_stat = 'Welchs t-test'
|
1948
|
-
# note: APA FORMAT
|
1949
|
-
notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1950
|
-
elif 'pa' in pair and 'np' not in pair: # 'paired'
|
1951
|
-
# the paired-samples t-test is considered “robust” in handling violations of normality
|
1952
|
-
# to some extent. It can still yield valid results even if the data is not normally
|
1953
|
-
# distributed. Therefore, this test typically requires only approximately normal data
|
1954
|
-
stat_value, pval= stats.ttest_rel(
|
1955
|
-
X1, X2, axis=0, nan_policy='omit', alternative='two-sided')
|
1956
|
-
notes_stat = 'paired t test'
|
1957
|
-
# note: APA FORMAT
|
1958
|
-
notes_APA = f't({sum([nX1-1])})={round(stat_value, 5)},p={round(pval, 5)}'
|
1959
|
-
elif cfg_pmc == 'non-parametric':
|
1960
|
-
if 'np' in pair: # Perform Mann-Whitney
|
1961
|
-
stat_value, pval = stats.mannwhitneyu(
|
1962
|
-
X1, X2, method='exact', nan_policy='omit')
|
1963
|
-
notes_stat = 'Mann-Whitney U'
|
1964
|
-
if nX1 == nX2:
|
1965
|
-
notes_APA = f'U(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
|
1966
|
-
else:
|
1967
|
-
notes_APA = f'U(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1968
|
-
elif 'pa' in pair and 'np' not in pair: # Wilcoxon signed-rank test
|
1969
|
-
stat_value, pval = stats.wilcoxon(
|
1970
|
-
X1, X2, method='exact', nan_policy='omit')
|
1971
|
-
notes_stat = 'Wilcoxon signed-rank'
|
1972
|
-
if nX1 == nX2:
|
1973
|
-
notes_APA = f'Z(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
|
1974
|
-
else:
|
1975
|
-
notes_APA = f'Z(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1976
|
-
|
1977
|
-
# filling output
|
1978
|
-
output['stat'] = stat_value
|
1979
|
-
output['pval'] = pval
|
1980
|
-
output['method'] = notes_stat
|
1981
|
-
output['APA'] = notes_APA
|
1982
|
-
|
1983
|
-
print(f"{output['method']}\n {notes_APA}\n\n")
|
1984
|
-
|
1985
|
-
return output, pval
|
1986
|
-
|
1987
|
-
Normality1 = check_normality(X1)
|
1988
|
-
Normality2 = check_normality(X2)
|
1989
|
-
Normality = True if all([Normality1, Normality2]) else False
|
1990
|
-
|
1991
|
-
nX1 = np.sum(~np.isnan(X1))
|
1992
|
-
nX2 = np.sum(~np.isnan(X2))
|
1993
|
-
|
1994
|
-
cfg_pmc = corr_pmc(pmc)
|
1995
|
-
cfg_pair = corr_pair(pair)
|
1996
|
-
|
1997
|
-
output, p = sub_cmpt_2group(
|
1998
|
-
X1, X2, cfg_pmc=cfg_pmc, pair=cfg_pair)
|
1999
|
-
return p, output
|
2000
|
-
|
2001
|
-
|
2002
|
-
# ======compare 2 group test===================================================
|
2003
|
-
# # Example
|
2004
|
-
# X1 = [19, 22, 16, 29, 24]
|
2005
|
-
# X2 = [20, 11, 17, 12, 22]
|
2006
|
-
|
2007
|
-
# p, res= FuncCmpt(X1, X2, pmc='pmc', pair='unparrr')
|
2008
|
-
|
2009
|
-
# =============================================================================
|
2010
|
-
|
2011
|
-
# =============================================================================
|
2012
|
-
# # method = ['anova', # 'One-way and N-way ANOVA',
|
2013
|
-
# # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
|
2014
|
-
# # 'mixed_anova', # 'Two way mixed ANOVA',
|
2015
|
-
# # 'welch_anova', # 'One-way Welch ANOVA',
|
2016
|
-
# # 'kruskal', # 'Non-parametric one-way ANOVA'
|
2017
|
-
# # 'friedman', # Non-parametric one-way repeated measures ANOVA
|
2018
|
-
# # ]
|
2019
|
-
# =============================================================================
|
2020
|
-
|
2021
|
-
|
2022
|
-
# =============================================================================
|
2023
|
-
# # method = ['anova', # 'One-way and N-way ANOVA',
|
2024
|
-
# # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
|
2025
|
-
# # 'mixed_anova', # 'Two way mixed ANOVA',
|
2026
|
-
# # 'welch_anova', # 'One-way Welch ANOVA',
|
2027
|
-
# # 'kruskal', # 'Non-parametric one-way ANOVA'
|
2028
|
-
# # 'friedman', # Non-parametric one-way repeated measures ANOVA
|
2029
|
-
# # ]
|
2030
|
-
# =============================================================================
|
2031
|
-
def df_wide_long(df):
|
2032
|
-
rows, columns = df.shape
|
2033
|
-
if columns > rows:
|
2034
|
-
return "Wide"
|
2035
|
-
elif rows > columns:
|
2036
|
-
return "Long"
|
2037
|
-
|
2038
|
-
def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
|
2039
|
-
ss_type=2, detailed=True, effsize='np2',
|
2040
|
-
correction='auto', between=None, within=None,
|
2041
|
-
subject=None, group=None
|
2042
|
-
):
|
2043
|
-
|
2044
|
-
def corr_pair(pair):
|
2045
|
-
cfg_pair = None
|
2046
|
-
if 'pa' in pair.lower() and 'np' not in pair.lower():
|
2047
|
-
cfg_pair = 'paired'
|
2048
|
-
elif 'np' in pair.lower():
|
2049
|
-
cfg_pair = 'unpaired'
|
2050
|
-
elif 'mix' in pair.lower():
|
2051
|
-
cfg_pair = 'mix'
|
2052
|
-
return cfg_pair
|
2053
|
-
|
2054
|
-
def check_normality(data):
|
2055
|
-
stat_shapiro, pval_shapiro = stats.shapiro(data)
|
2056
|
-
if pval_shapiro > 0.05:
|
2057
|
-
Normality = True
|
2058
|
-
else:
|
2059
|
-
Normality = False
|
2060
|
-
print(f'\n normally distributed\n') if Normality else print(
|
2061
|
-
f'\n NOT normally distributed\n')
|
2062
|
-
return Normality
|
2063
|
-
|
2064
|
-
def corr_pmc(pmc):
|
2065
|
-
cfg_pmc = None
|
2066
|
-
if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'upmc', 'npmc', 'nonparametric', 'non-parametric'}:
|
2067
|
-
cfg_pmc = 'parametric'
|
2068
|
-
elif pmc.lower() in {'upmc', 'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
|
2069
|
-
cfg_pmc = 'non-parametric'
|
2070
|
-
else:
|
2071
|
-
cfg_pmc = 'auto'
|
2072
|
-
return cfg_pmc
|
2073
|
-
|
2074
|
-
def extract_apa(res_tab):
|
2075
|
-
notes_APA = []
|
2076
|
-
if "ddof1" in res_tab:
|
2077
|
-
for irow in range(res_tab.shape[0]):
|
2078
|
-
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
|
2079
|
-
notes_APA.append([note_tmp])
|
2080
|
-
elif "DF" in res_tab:
|
2081
|
-
print(res_tab.shape[0])
|
2082
|
-
for irow in range(res_tab.shape[0]-1):
|
2083
|
-
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
|
2084
|
-
notes_APA.append([note_tmp])
|
2085
|
-
notes_APA.append(['NaN'])
|
2086
|
-
elif "DF1" in res_tab: # in 'mix' case
|
2087
|
-
for irow in range(res_tab.shape[0]):
|
2088
|
-
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF1[irow]),round(res_tab.DF2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
|
2089
|
-
notes_APA.append([note_tmp])
|
2090
|
-
return notes_APA
|
2091
|
-
|
2092
|
-
def anovatable(res_tab):
|
2093
|
-
if 'df' in res_tab: # statsmodels
|
2094
|
-
res_tab['mean_sq'] = res_tab[:]['sum_sq']/res_tab[:]['df']
|
2095
|
-
res_tab['est_sq'] = res_tab[:-1]['sum_sq'] / \
|
2096
|
-
sum(res_tab['sum_sq'])
|
2097
|
-
res_tab['omega_sq'] = (res_tab[:-1]['sum_sq']-(res_tab[:-1]['df'] *
|
2098
|
-
res_tab['mean_sq'][-1]))/(sum(res_tab['sum_sq'])+res_tab['mean_sq'][-1])
|
2099
|
-
elif 'DF' in res_tab:
|
2100
|
-
res_tab['MS'] = res_tab[:]['SS']/res_tab[:]['DF']
|
2101
|
-
res_tab['est_sq'] = res_tab[:-1]['SS']/sum(res_tab['SS'])
|
2102
|
-
res_tab['omega_sq'] = (res_tab[:-1]['SS']-(res_tab[:-1]['DF'] *
|
2103
|
-
res_tab['MS'][1]))/(sum(res_tab['SS'])+res_tab['MS'][1])
|
2104
|
-
if 'p-unc' in res_tab:
|
2105
|
-
if 'np2' in res_tab:
|
2106
|
-
res_tab['est_sq'] = res_tab['np2']
|
2107
|
-
if 'p-unc' in res_tab:
|
2108
|
-
res_tab['PR(>F)'] = res_tab['p-unc']
|
2109
|
-
return res_tab
|
2110
|
-
|
2111
|
-
def run_anova(data, dv, factor, ss_type=2, detailed=True, effsize='np2'):
|
2112
|
-
# perform ANOVA
|
2113
|
-
# =============================================================================
|
2114
|
-
# # # ANOVA (input: formula, dataset)
|
2115
|
-
# =============================================================================
|
2116
|
-
# # note: if the data is balanced (equal sample size for each group), Type 1, 2, and 3 sums of squares
|
2117
|
-
# # (typ parameter) will produce similar results.
|
2118
|
-
# lm = ols("values ~ C(group)", data=df).fit()
|
2119
|
-
# res_tab = anova_lm(lm, typ=ss_type)
|
2120
|
-
|
2121
|
-
# # however, it does not provide any effect size measures to tell if the
|
2122
|
-
# # statistical significance is meaningful. The function below calculates
|
2123
|
-
# # eta-squared () and omega-squared (). A quick note, is the exact same
|
2124
|
-
# # thing as except when coming from the ANOVA framework people call it ;
|
2125
|
-
# # is considered a better measure of effect size since it is unbiased in
|
2126
|
-
# # it's calculation by accounting for the degrees of freedom in the model.
|
2127
|
-
# # note: No effect sizes are calculated when using statsmodels.
|
2128
|
-
# # to calculate eta squared, use the sum of squares from the table
|
2129
|
-
# res_tab = anovatable(res_tab)
|
2130
|
-
|
2131
|
-
# =============================================================================
|
2132
|
-
# # alternativ for ANOVA
|
2133
|
-
# =============================================================================
|
2134
|
-
res_tab = pg.anova(dv=dv, between=factor, data=data,
|
2135
|
-
detailed=detailed, ss_type=ss_type, effsize=effsize)
|
2136
|
-
res_tab = anovatable(res_tab)
|
2137
|
-
return res_tab
|
2138
|
-
|
2139
|
-
def run_rmanova(data, dv, factor, subject, correction='auto', detailed=True, effsize='ng2'):
|
2140
|
-
# One-way repeated-measures ANOVA using a long-format dataset.
|
2141
|
-
res_tab = pg.rm_anova(data=data, dv=dv, within=factor,
|
2142
|
-
subject=subject, detailed=detailed, effsize=effsize)
|
2143
|
-
return res_tab
|
2144
|
-
|
2145
|
-
def run_welchanova(data, dv, factor):
|
2146
|
-
# When the groups are balanced and have equal variances, the optimal
|
2147
|
-
# post-hoc test is the Tukey-HSD test (pingouin.pairwise_tukey()). If the
|
2148
|
-
# groups have unequal variances, the Games-Howell test is more adequate
|
2149
|
-
# (pingouin.pairwise_gameshowell()). Results have been tested against R.
|
2150
|
-
res_tab = pg.welch_anova(data=data, dv=dv, between=factor)
|
2151
|
-
res_tab = anovatable(res_tab)
|
2152
|
-
return res_tab
|
2153
|
-
|
2154
|
-
def run_mixedanova(data, dv, between, within, subject, correction='auto', effsize='np2'):
|
2155
|
-
# Notes
|
2156
|
-
# Data are expected to be in long-format (even the repeated measures).
|
2157
|
-
# If your data is in wide-format, you can use the pandas.melt() function
|
2158
|
-
# to convert from wide to long format.
|
2159
|
-
|
2160
|
-
# Warning
|
2161
|
-
# If the between-subject groups are unbalanced(=unequal sample sizes), a
|
2162
|
-
# type II ANOVA will be computed. Note however that SPSS, JAMOVI and JASP
|
2163
|
-
# by default return a type III ANOVA, which may lead to slightly different
|
2164
|
-
# results.
|
2165
|
-
res_tab = pg.mixed_anova(data=data, dv=dv, within=within, subject=subject,
|
2166
|
-
between=between, correction=correction, effsize=effsize)
|
2167
|
-
res_tab = anovatable(res_tab)
|
2168
|
-
return res_tab
|
2169
|
-
|
2170
|
-
def run_friedman(data, dv, factor, subject, method='chisq'):
|
2171
|
-
# Friedman test for repeated measurements
|
2172
|
-
# The Friedman test is used for non-parametric (rank-based) one-way
|
2173
|
-
# repeated measures ANOVA
|
2174
|
-
|
2175
|
-
# check df form ('long' or 'wide')
|
2176
|
-
# df_long = data.melt(ignore_index=False).reset_index()
|
2177
|
-
# if data.describe().shape[1] >= df_long.describe().shape[1]:
|
2178
|
-
# res_tab = pg.friedman(data, method=method)
|
2179
|
-
# else:
|
2180
|
-
# res_tab = pg.friedman(data=df_long, dv='value',
|
2181
|
-
# within="variable", subject="index", method=method)
|
2182
|
-
if "Wide" in df_wide_long(data):
|
2183
|
-
df_long = data.melt(ignore_index=False).reset_index()
|
2184
|
-
res_tab = pg.friedman(data=df_long, dv='value',
|
2185
|
-
within="variable", subject="index", method=method)
|
2186
|
-
else:
|
2187
|
-
res_tab = pg.friedman(data, dv=dv, within=factor, subject=subject,method=method)
|
2188
|
-
res_tab = anovatable(res_tab)
|
2189
|
-
return res_tab
|
2190
|
-
|
2191
|
-
def run_kruskal(data, dv, factor):
|
2192
|
-
# Kruskal-Wallis H-test for independent samples
|
2193
|
-
res_tab = pg.kruskal(data=data, dv=dv, between=factor)
|
2194
|
-
res_tab = anovatable(res_tab)
|
2195
|
-
return res_tab
|
2196
|
-
|
2197
|
-
# Normality Check:
|
2198
|
-
# Conduct normality tests (Shapiro-Wilk) for each group.
|
2199
|
-
# If the data is approximately normally distributed, ANOVA is robust to
|
2200
|
-
# moderate departures from normality, especially with larger sample sizes.
|
2201
|
-
|
2202
|
-
# print(data[factor])
|
2203
|
-
# print(type(data[factor]))
|
2204
|
-
# print(len(data[factor].columns))
|
2205
|
-
# print(data[factor].nunique())
|
2206
|
-
# print(data[factor[0]])
|
2207
|
-
# print(data[factor[0]].unique())
|
2208
|
-
if group is None:
|
2209
|
-
group = factor
|
2210
|
-
|
2211
|
-
# print(f'\ngroup is :\n{data[group]},\ndv is :\n{dv}\n')
|
2212
|
-
norm_array = []
|
2213
|
-
for sub_group in data[group].unique():
|
2214
|
-
norm_curr = check_normality(
|
2215
|
-
data.loc[data[group] == sub_group, dv])
|
2216
|
-
norm_array.append(norm_curr)
|
2217
|
-
norm_all = True if all(norm_array) else False
|
2218
|
-
|
2219
|
-
# Homogeneity of Variances:
|
2220
|
-
# Check for homogeneity of variances (homoscedasticity) among groups.
|
2221
|
-
# Levene's test or Bartlett's test can be used for this purpose.
|
2222
|
-
# If variances are significantly different, consider transformations or use a
|
2223
|
-
# robust ANOVA method.
|
2224
|
-
|
2225
|
-
# # =============================================================================
|
2226
|
-
# # # method1: stats.levene
|
2227
|
-
# # =============================================================================
|
2228
|
-
# # data_array = []
|
2229
|
-
# # for sub_group in df["group"].unique():
|
2230
|
-
# # data_array.append(df.loc[df['group'] == sub_group, 'values'].values)
|
2231
|
-
# # print(data_array)
|
2232
|
-
# # variance_all = stats.levene(data_array[0],data_array[1],data_array[2])
|
2233
|
-
|
2234
|
-
# =============================================================================
|
2235
|
-
# # method2: pingouin.homoscedasticity
|
2236
|
-
# =============================================================================
|
2237
|
-
res_levene = None
|
2238
|
-
variance_all = pg.homoscedasticity(
|
2239
|
-
data, dv=dv, group=group, method='levene', alpha=0.05)
|
2240
|
-
res_levene = True if variance_all.iloc[0,1] > 0.05 else False
|
2241
|
-
# =============================================================================
|
2242
|
-
# # ANOVA Assumptions:
|
2243
|
-
# # Ensure that the assumptions of independence, homogeneity of variances, and
|
2244
|
-
# # normality are reasonably met before proceeding.
|
2245
|
-
# =============================================================================
|
2246
|
-
notes_norm = 'normally' if norm_all else 'NOT-normally'
|
2247
|
-
notes_variance = 'equal' if res_levene else 'unequal'
|
2248
|
-
print(f'Data is {notes_norm} distributed, shows {notes_variance} variance')
|
2249
|
-
|
2250
|
-
cfg_pmc = corr_pmc(pmc)
|
2251
|
-
cfg_pair = corr_pair(pair)
|
2252
|
-
output = {}
|
2253
|
-
if (cfg_pmc == 'parametric') or (cfg_pmc == 'auto'):
|
2254
|
-
if 'np' in cfg_pair: # 'unpaired'
|
2255
|
-
if cfg_pmc == 'auto':
|
2256
|
-
if norm_all:
|
2257
|
-
if res_levene:
|
2258
|
-
res_tab = run_anova(data, dv, factor, ss_type=ss_type,
|
2259
|
-
detailed=True, effsize='np2')
|
2260
|
-
notes_stat = f'{data[factor].nunique()} Way ANOVA'
|
2261
|
-
notes_APA = extract_apa(res_tab)
|
2262
|
-
|
2263
|
-
else:
|
2264
|
-
res_tab = run_welchanova(data, dv, factor)
|
2265
|
-
notes_stat = f'{data[factor].nunique()} Way Welch ANOVA'
|
2266
|
-
notes_APA = extract_apa(res_tab)
|
2267
|
-
|
2268
|
-
else:
|
2269
|
-
|
2270
|
-
res_tab = run_kruskal(data, dv, factor)
|
2271
|
-
notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
|
2272
|
-
notes_APA = extract_apa(res_tab)
|
2273
|
-
|
2274
|
-
elif cfg_pmc == 'parametric':
|
2275
|
-
res_tab = run_anova(data, dv, factor, ss_type=ss_type,
|
2276
|
-
detailed=True, effsize='np2')
|
2277
|
-
notes_stat = f'{data[factor].nunique()} Way ANOVA'
|
2278
|
-
notes_APA = extract_apa(res_tab)
|
2279
|
-
|
2280
|
-
elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
|
2281
|
-
res_tab = run_rmanova(data, dv, factor, subject, correction='auto',
|
2282
|
-
detailed=True, effsize='ng2')
|
2283
|
-
notes_stat = f'{data[factor].nunique()} Way Repeated measures ANOVA'
|
2284
|
-
notes_APA = extract_apa(res_tab)
|
2285
|
-
|
2286
|
-
elif 'mix' in cfg_pair or 'both' in cfg_pair:
|
2287
|
-
res_tab = run_mixedanova(data, dv, between, within, subject)
|
2288
|
-
# notes_stat = f'{len(sum(len(between)+sum(len(within))))} Way Mixed ANOVA'
|
2289
|
-
notes_stat = ""
|
2290
|
-
# n_inter = res_tab.loc(res_tab["Source"] == "Interaction")
|
2291
|
-
# print(n_inter)
|
2292
|
-
notes_APA = extract_apa(res_tab)
|
2293
|
-
|
2294
|
-
elif cfg_pmc == 'non-parametric':
|
2295
|
-
if 'np' in cfg_pair: # 'unpaired'
|
2296
|
-
res_tab = run_kruskal(data, dv, factor)
|
2297
|
-
notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
|
2298
|
-
notes_APA = f'H({res_tab.ddof1[0]},n={data.shape[0]})={round(res_tab.H[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
|
2299
|
-
|
2300
|
-
elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
|
2301
|
-
res_tab = run_friedman(data, dv, factor, subject, method='chisq')
|
2302
|
-
notes_stat = f'Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA'
|
2303
|
-
notes_APA = f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
|
2304
|
-
|
2305
|
-
# =============================================================================
|
2306
|
-
# # Post-hoc
|
2307
|
-
# Post-Hoc Tests (if significant):
|
2308
|
-
# If ANOVA indicates significant differences, perform post-hoc tests (e.g.,
|
2309
|
-
# Tukey's HSD, Bonferroni, or Scheffé) to identify which groups differ from each other.
|
2310
|
-
# # https://pingouin-stats.org/build/html/generated/pingouin.pairwise_tests.html
|
2311
|
-
# =============================================================================
|
2312
|
-
go_pmc = True if cfg_pmc == 'parametric' else False
|
2313
|
-
go_subject = subject if ('pa' in cfg_pair) and (
|
2314
|
-
'np' not in cfg_pair) else None
|
2315
|
-
go_mix_between = between if ('mix' in cfg_pair) or (
|
2316
|
-
'both' in cfg_pair) else None
|
2317
|
-
go_mix_between = None if ('pa' in cfg_pair) or (
|
2318
|
-
'np' not in cfg_pair) else factor
|
2319
|
-
go_mix_within = within if ('mix' in cfg_pair) or (
|
2320
|
-
'both' in cfg_pair) else None
|
2321
|
-
go_mix_within = factor if ('pa' in cfg_pair) or (
|
2322
|
-
'np' not in cfg_pair) else None
|
2323
|
-
|
2324
|
-
if res_tab['p-unc'][0] <= .05:
|
2325
|
-
# Pairwise Comparisons
|
2326
|
-
method_post_hoc = [
|
2327
|
-
"bonf", # 'bonferroni', # : one-step correction
|
2328
|
-
"sidak", # one-step correction
|
2329
|
-
"holm", # step-down method using Bonferroni adjustments
|
2330
|
-
"fdr_bh", # Benjamini/Hochberg (non-negative)
|
2331
|
-
"fdr_by", # Benjamini/Yekutieli (negative)
|
2332
|
-
]
|
2333
|
-
res_posthoc = pd.DataFrame()
|
2334
|
-
for met in method_post_hoc:
|
2335
|
-
post_curr = pg.pairwise_tests(data=data, dv=dv, between=go_mix_between, within=go_mix_within, subject=go_subject, parametric=go_pmc, marginal=True, alpha=0.05, alternative='two-sided',
|
2336
|
-
padjust=met)
|
2337
|
-
|
2338
|
-
res_posthoc = pd.concat([res_posthoc, post_curr],
|
2339
|
-
ignore_index=True)
|
2340
|
-
else:
|
2341
|
-
res_posthoc = None
|
2342
|
-
output['res_posthoc'] = res_posthoc
|
2343
|
-
# =============================================================================
|
2344
|
-
# # filling output
|
2345
|
-
# =============================================================================
|
2346
|
-
|
2347
|
-
pd.set_option('display.max_columns', None)
|
2348
|
-
output['stat'] = notes_stat
|
2349
|
-
# print(output['APA'])
|
2350
|
-
output['APA'] = notes_APA
|
2351
|
-
output['pval'] = res_tab['p-unc']
|
2352
|
-
output['res_tab'] = res_tab
|
2353
|
-
if res_tab.shape[0] == len(notes_APA):
|
2354
|
-
output['res_tab']['APA'] = output['APA'] # note APA in the table
|
2355
|
-
# print(output['stat'])
|
2356
|
-
# print(output['res_tab'])
|
2357
|
-
|
2358
|
-
return output
|
2359
|
-
|
2360
|
-
|
2361
|
-
# =============================================================================
|
2362
|
-
# # One-way ANOVA
|
2363
|
-
# =============================================================================
|
2364
|
-
# url = "http://stats191.stanford.edu/data/rehab.csv"
|
2365
|
-
# rehab_table = pd.read_table(url, delimiter=",")
|
2366
|
-
# rehab_table.to_csv("rehab.table")
|
2367
|
-
# fig, ax = plt.subplots(figsize=(8, 6))
|
2368
|
-
# fig = rehab_table.boxplot("Time", "Fitness", ax=ax, grid=False)
|
2369
|
-
# # fig, ax = plt.subplots(figsize=(8, 6))
|
2370
|
-
# # set_pub()
|
2371
|
-
# # sns.boxenplot(x="Time",y="Fitness",data = rehab_table)
|
2372
|
-
|
2373
|
-
# out2 = FuncMultiCmpt(pmc='pmc', pair='unpair',
|
2374
|
-
# data=rehab_table, dv='Time', factor='Fitness')
|
2375
|
-
# # print(out2['res_tab'])
|
2376
|
-
# # print(out2['APA'])
|
2377
|
-
# out2['res_posthoc']
|
2378
|
-
# out2['res_posthoc']['p-unc'][0]
|
2379
|
-
# out2['res_posthoc']['p-adjust'][0]
|
2380
|
-
# out2['res_posthoc']['p-corr'][0]
|
2381
|
-
|
2382
|
-
|
2383
|
-
# =============================================================================
|
2384
|
-
# # Interactions and ANOVA
|
2385
|
-
# https://www.statsmodels.org/dev/examples/notebooks/generated/interactions_anova.html
|
2386
|
-
# url = "http://stats191.stanford.edu/data/salary.table"
|
2387
|
-
# fh = urlopen(url)
|
2388
|
-
# df = pd.read_table(fh)
|
2389
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
2390
|
-
# dv='S', factor=['X', 'E', 'M'], group='M')
|
2391
|
-
# # # two-way anova
|
2392
|
-
# # https://www.statology.org/two-way-anova-python/
|
2393
|
-
# # =============================================================================
|
2394
|
-
# # df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
|
2395
|
-
# # 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
|
2396
|
-
# # 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
|
2397
|
-
# # 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
|
2398
|
-
# # 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
|
2399
|
-
# # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
2400
|
-
# # dv='height', factor=['water','sun'],group='water')
|
2401
|
-
|
2402
|
-
|
2403
|
-
# =============================================================================
|
2404
|
-
# # two way anova
|
2405
|
-
# https://www.geeksforgeeks.org/how-to-perform-a-two-way-anova-in-python/
|
2406
|
-
# =============================================================================
|
2407
|
-
# df1=pd.DataFrame({'Fertilizer': np.repeat(['daily', 'weekly'], 15),
|
2408
|
-
# 'Watering': np.repeat(['daily', 'weekly'], 15),
|
2409
|
-
# 'height': [14, 16, 15, 15, 16, 13, 12, 11,
|
2410
|
-
# 14, 15, 16, 16, 17, 18, 14, 13,
|
2411
|
-
# 14, 14, 14, 15, 16, 16, 17, 18,
|
2412
|
-
# 14, 13, 14, 14, 14, 15]})
|
2413
|
-
|
2414
|
-
# df1['subject'] = np.tile(range(0, 15), (1, 2)).T
|
2415
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df1,
|
2416
|
-
# dv='height', factor=['Fertilizer','Watering'],group='Watering')
|
2417
|
-
# # print(out1['stat'])
|
2418
|
-
# # print(out1['res_tab'])
|
2419
|
-
|
2420
|
-
# =============================================================================
|
2421
|
-
# # welch anova
|
2422
|
-
# https://www.geeksforgeeks.org/how-to-perform-welchs-anova-in-python/
|
2423
|
-
# =============================================================================
|
2424
|
-
# df = pd.DataFrame({'score': [64, 66, 68, 75, 78, 94, 98, 79, 71, 80,
|
2425
|
-
# 91, 92, 93, 90, 97, 94, 82, 88, 95, 96,
|
2426
|
-
# 79, 78, 88, 94, 92, 85, 83, 85, 82, 81],
|
2427
|
-
# 'group': np.repeat(['strat1', 'strat2', 'strat3'],repeats=10)})
|
2428
|
-
# out1 = FuncMultiCmpt(pmc='auto',pair='unpaired',data=df, dv='score', factor='group', group='group')
|
2429
|
-
# =============================================================================
|
2430
|
-
# # two way anova
|
2431
|
-
# https://www.statology.org/two-way-anova-python/
|
2432
|
-
# =============================================================================
|
2433
|
-
# df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
|
2434
|
-
# 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
|
2435
|
-
# 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
|
2436
|
-
# 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
|
2437
|
-
# 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
|
2438
|
-
# df['subject'] = np.tile(range(0, 15), (1, 2)).T
|
2439
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
2440
|
-
# dv='height', factor=['water', 'sun'], subject='subject', group='water')
|
2441
|
-
# # print(out1['stat'])
|
2442
|
-
# # print(out1['res_tab'])
|
2443
|
-
|
2444
|
-
# =============================================================================
|
2445
|
-
# # 3-way ANOVA
|
2446
|
-
# =============================================================================
|
2447
|
-
# df = pd.DataFrame({'program': np.repeat([1, 2], 20),
|
2448
|
-
# 'gender': np.tile(np.repeat(['M', 'F'], 10), 2),
|
2449
|
-
# 'division': np.tile(np.repeat([1, 2], 5), 4),
|
2450
|
-
# 'height': [7, 7, 8, 8, 7, 6, 6, 5, 6, 5,
|
2451
|
-
# 5, 5, 4, 5, 4, 3, 3, 4, 3, 3,
|
2452
|
-
# 6, 6, 5, 4, 5, 4, 5, 4, 4, 3,
|
2453
|
-
# 2, 2, 1, 4, 4, 2, 1, 1, 2, 1]})
|
2454
|
-
# df['subject'] = np.tile(range(0, 20), (1, 2)).T
|
2455
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
2456
|
-
# dv='height', factor=['gender', 'program', 'division'], subject='subject', group='program')
|
2457
|
-
# # print(out1['stat'])
|
2458
|
-
# # print(out1['res_tab'])
|
2459
|
-
|
2460
|
-
# =============================================================================
|
2461
|
-
# # Repeated Measures ANOVA in Python
|
2462
|
-
# =============================================================================
|
2463
|
-
# df = pd.DataFrame({'patient': np.repeat([1, 2, 3, 4, 5], 4),
|
2464
|
-
# 'drug': np.tile([1, 2, 3, 4], 5),
|
2465
|
-
# 'response': [30, 28, 16, 34,
|
2466
|
-
# 14, 18, 10, 22,
|
2467
|
-
# 24, 20, 18, 30,
|
2468
|
-
# 38, 34, 20, 44,
|
2469
|
-
# 26, 28, 14, 30]})
|
2470
|
-
# # df['subject'] = np.tile(range(0, 20), (1, 2)).T
|
2471
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
|
2472
|
-
# dv='response', factor=['drug'], subject='patient', group='drug')
|
2473
|
-
# print(out1['stat'])
|
2474
|
-
# print(out1['res_tab'])
|
2475
|
-
# print(out1['APA'])
|
2476
|
-
|
2477
|
-
# =============================================================================
|
2478
|
-
# # repeated anova
|
2479
|
-
# https://www.geeksforgeeks.org/how-to-perform-a-repeated-measures-anova-in-python/
|
2480
|
-
# =============================================================================
|
2481
|
-
# df = pd.DataFrame({'Cars': np.repeat([1, 2, 3, 4, 5], 4),
|
2482
|
-
# 'Engine Oil': np.tile([1, 2, 3, 4], 5),
|
2483
|
-
# 'Mileage': [36, 38, 30, 29,
|
2484
|
-
# 34, 38, 30, 29,
|
2485
|
-
# 34, 28, 38, 32,
|
2486
|
-
# 38, 34, 20, 44,
|
2487
|
-
# 26, 28, 34, 50]})
|
2488
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
|
2489
|
-
# dv='Mileage', factor=['Engine Oil'], subject='Cars', group='Cars')
|
2490
|
-
# =============================================================================
|
2491
|
-
# #two-way repeated anova
|
2492
|
-
# =============================================================================
|
2493
|
-
# df = pd.read_csv(
|
2494
|
-
# "https://reneshbedre.github.io/assets/posts/anova/plants_leaves_two_within.csv")
|
2495
|
-
# df
|
2496
|
-
# # df['subject'] = np.tile(range(0, 20), (1, 2)).T
|
2497
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
|
2498
|
-
# dv='num_leaves', factor=['year', 'time'], subject='plants', group='year')
|
2499
|
-
# print(out1['stat'])
|
2500
|
-
# print(out1['res_tab'])
|
2501
|
-
# print(out1['APA'])
|
2502
|
-
|
2503
|
-
# =============================================================================
|
2504
|
-
# # repeated anova
|
2505
|
-
# =============================================================================
|
2506
|
-
# df = pd.read_csv('/Users/macjianfeng/Desktop/test.csv')
|
2507
|
-
# df.head()
|
2508
|
-
# df.loc[df['animal'].str.contains('Sleep'), 'experiment'] = 'sleep'
|
2509
|
-
# df.loc[df['animal'].str.contains('Wake'), 'experiment'] = 'wake'
|
2510
|
-
# df.loc[df['variable'].str.contains('hypo'), 'region'] = 'hypo'
|
2511
|
-
# df.loc[df['variable'].str.contains('cort'), 'region'] = 'cort'
|
2512
|
-
# df
|
2513
|
-
# for i in range(4):
|
2514
|
-
# match i:
|
2515
|
-
# case 0:
|
2516
|
-
# prot_name = 'A1'
|
2517
|
-
# case 1:
|
2518
|
-
# prot_name = 'A2'
|
2519
|
-
# case 2:
|
2520
|
-
# prot_name = '845'
|
2521
|
-
# case 3:
|
2522
|
-
# prot_name = '831'
|
2523
|
-
# df_tmp = df[df["variable"].str.contains(prot_name)]
|
2524
|
-
# df_tmp['protein'] = prot_name
|
2525
|
-
# df_tmp = df_tmp.reset_index()
|
2526
|
-
# print(df_tmp)
|
2527
|
-
|
2528
|
-
# out1 = FuncMultiCmpt(pmc='pmc', pair='mix', data=df_tmp,
|
2529
|
-
# dv='value', between='experiment', within='region', subject='animal', group='experiment')
|
2530
|
-
# print(out1['stat'])
|
2531
|
-
# print(out1['res_tab'])
|
2532
|
-
# # =============================================================================
|
2533
|
-
# One-way ANOVA
|
2534
|
-
# df1 = pd.read_csv('/Users/macjianfeng/Desktop/Book2.csv')
|
2535
|
-
# df2 = df1.melt()
|
2536
|
-
# out1 = FuncMultiCmpt(pmc='npmc', pair='unpaired', data=df2,
|
2537
|
-
# dv='libido', factor=['brand x', 'brand y', 'brand z'], subject='participant')
|
2538
|
-
# print(out1['stat'])
|
2539
|
-
# print(out1['res_tab'])
|
2540
|
-
# =============================================================================
|
2541
|
-
|
2542
|
-
|
2543
|
-
# =============================================================================
|
2544
|
-
# # #One-way ANOVA new example: https://www.pythonfordatascience.org/anova-python/
|
2545
|
-
# =============================================================================
|
2546
|
-
# df1 = pd.read_csv(
|
2547
|
-
# "https://raw.githubusercontent.com/researchpy/Data-sets/master/difficile.csv")
|
2548
|
-
# df1.drop('person', axis=1, inplace=True)
|
2549
|
-
# # Recoding value from numeric to string
|
2550
|
-
# df1['dose'].replace({1: 'placebo', 2: 'low', 3: 'high'}, inplace=True)
|
2551
|
-
# df1.head(10)
|
2552
|
-
|
2553
|
-
# out3= FuncMultiCmpt(pmc='pmc', data=df1, dv='libido', factor='dose')
|
2554
|
-
# # print(out3['res_tab'])
|
2555
|
-
# # # print(out3['res_posthoc'])
|
2556
|
-
# # print(out3['APA'])
|
2557
|
-
|
2558
|
-
# =============================================================================
|
2559
|
-
# https://lifewithdata.com/2023/06/08/how-to-perform-a-two-way-anova-in-python/
|
2560
|
-
# =============================================================================
|
2561
|
-
# data = {
|
2562
|
-
# 'Diet': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C'],
|
2563
|
-
# 'Workout': ['Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High'],
|
2564
|
-
# 'WeightLoss': [3, 4, 5, 3.2, 5, 6, 5.2, 6, 5.5, 4, 5.5, 6.2]
|
2565
|
-
# }
|
2566
|
-
# df = pd.DataFrame(data)
|
2567
|
-
# out4= FuncMultiCmpt(pmc='pmc', pair='unpaired',data=df, dv='WeightLoss', factor=['Diet','Workout'],group='Diet')
|
2568
|
-
|
2569
|
-
# =============================================================================
|
2570
|
-
# # convert to list to string
|
2571
|
-
# =============================================================================
|
2572
1569
|
def list2str(x_str):
|
2573
1570
|
s = ''.join(str(x) for x in x_str)
|
2574
1571
|
return s
|
@@ -2580,18 +1577,14 @@ def str2list(str_):
|
|
2580
1577
|
def load_img(fpath):
|
2581
1578
|
"""
|
2582
1579
|
Load an image from the specified file path.
|
2583
|
-
|
2584
1580
|
Args:
|
2585
1581
|
fpath (str): The file path to the image.
|
2586
|
-
|
2587
1582
|
Returns:
|
2588
1583
|
PIL.Image: The loaded image.
|
2589
|
-
|
2590
1584
|
Raises:
|
2591
1585
|
FileNotFoundError: If the specified file is not found.
|
2592
1586
|
OSError: If the specified file cannot be opened or is not a valid image file.
|
2593
1587
|
"""
|
2594
|
-
|
2595
1588
|
try:
|
2596
1589
|
img = Image.open(fpath)
|
2597
1590
|
return img
|
@@ -2604,12 +1597,10 @@ def apply_filter(img, *args):
|
|
2604
1597
|
# def apply_filter(img, filter_name, filter_value=None):
|
2605
1598
|
"""
|
2606
1599
|
Apply the specified filter to the image.
|
2607
|
-
|
2608
1600
|
Args:
|
2609
1601
|
img (PIL.Image): The input image.
|
2610
1602
|
filter_name (str): The name of the filter to apply.
|
2611
1603
|
**kwargs: Additional parameters specific to the filter.
|
2612
|
-
|
2613
1604
|
Returns:
|
2614
1605
|
PIL.Image: The filtered image.
|
2615
1606
|
"""
|
@@ -2813,10 +1804,8 @@ def imgsets(
|
|
2813
1804
|
def auto_enhance(img):
|
2814
1805
|
"""
|
2815
1806
|
Automatically enhances the image based on its characteristics.
|
2816
|
-
|
2817
1807
|
Args:
|
2818
1808
|
img (PIL.Image): The input image.
|
2819
|
-
|
2820
1809
|
Returns:
|
2821
1810
|
dict: A dictionary containing the optimal enhancement values.
|
2822
1811
|
"""
|
@@ -2832,47 +1821,33 @@ def imgsets(
|
|
2832
1821
|
bit_depth = 16
|
2833
1822
|
else:
|
2834
1823
|
raise ValueError("Unsupported image mode")
|
2835
|
-
|
2836
1824
|
# Calculate the brightness and contrast for each channel
|
2837
1825
|
num_channels = len(img.getbands())
|
2838
1826
|
brightness_factors = []
|
2839
1827
|
contrast_factors = []
|
2840
1828
|
for channel in range(num_channels):
|
2841
1829
|
channel_histogram = img.split()[channel].histogram()
|
2842
|
-
brightness = sum(i * w for i, w in enumerate(channel_histogram))
|
2843
|
-
channel_histogram
|
2844
|
-
)
|
1830
|
+
brightness = sum(i * w for i, w in enumerate(channel_histogram))/sum(channel_histogram)
|
2845
1831
|
channel_min, channel_max = img.split()[channel].getextrema()
|
2846
1832
|
contrast = channel_max - channel_min
|
2847
|
-
|
2848
1833
|
# Adjust calculations based on bit depth
|
2849
1834
|
normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
|
2850
|
-
brightness_factor = (
|
2851
|
-
|
2852
|
-
)
|
2853
|
-
contrast_factor = (
|
2854
|
-
1.0 + (contrast - normalization_factor / 2) / normalization_factor
|
2855
|
-
)
|
2856
|
-
|
1835
|
+
brightness_factor = (1.0 + (brightness - normalization_factor / 2) / normalization_factor)
|
1836
|
+
contrast_factor = (1.0 + (contrast - normalization_factor / 2) / normalization_factor)
|
2857
1837
|
brightness_factors.append(brightness_factor)
|
2858
1838
|
contrast_factors.append(contrast_factor)
|
2859
|
-
|
2860
1839
|
# Calculate the average brightness and contrast factors across channels
|
2861
1840
|
avg_brightness_factor = sum(brightness_factors) / num_channels
|
2862
1841
|
avg_contrast_factor = sum(contrast_factors) / num_channels
|
2863
|
-
|
2864
1842
|
return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
|
2865
|
-
|
2866
1843
|
# Load image if input is a file path
|
2867
1844
|
if isinstance(img, str):
|
2868
1845
|
img = load_img(img)
|
2869
|
-
|
2870
1846
|
img_update = img.copy()
|
2871
1847
|
# Auto-enhance image if requested
|
2872
1848
|
if auto:
|
2873
1849
|
auto_params = auto_enhance(img_update)
|
2874
1850
|
sets.update(auto_params)
|
2875
|
-
|
2876
1851
|
if sets is None:
|
2877
1852
|
sets = {}
|
2878
1853
|
for k, value in sets.items():
|
@@ -2947,12 +1922,9 @@ def imgsets(
|
|
2947
1922
|
if len(value)==3:
|
2948
1923
|
value+=(255,)
|
2949
1924
|
img_update = remove(img_update, bgcolor=value)
|
2950
|
-
|
2951
1925
|
if filter_kws:
|
2952
1926
|
for filter_name, filter_value in filter_kws.items():
|
2953
1927
|
img_update = apply_filter(img_update, filter_name, filter_value)
|
2954
|
-
|
2955
|
-
|
2956
1928
|
# Display the image if requested
|
2957
1929
|
if show:
|
2958
1930
|
if figsize is None:
|
@@ -2961,7 +1933,6 @@ def imgsets(
|
|
2961
1933
|
plt.figure(figsize=figsize, dpi=dpi)
|
2962
1934
|
plt.imshow(img_update)
|
2963
1935
|
plt.axis("on") if show_axis else plt.axis("off")
|
2964
|
-
|
2965
1936
|
return img_update
|
2966
1937
|
# # usage:
|
2967
1938
|
# img = imgsets(
|
@@ -2982,7 +1953,6 @@ def figsets(*args):
|
|
2982
1953
|
"scatter","ieee","no-latex","std-colors","high-vis","bright","dark_background","science",
|
2983
1954
|
"high-vis","vibrant","muted","retro","grid","high-contrast","light","cjk-tc-font","cjk-kr-font",
|
2984
1955
|
]
|
2985
|
-
|
2986
1956
|
def sets_priority(ax,key, value):
|
2987
1957
|
if ("fo" in key) and (("size" in key) or ("sz" in key)):
|
2988
1958
|
fontsize=value
|