py2ls 0.1.4.8__py3-none-any.whl → 0.1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. py2ls/.git/COMMIT_EDITMSG +1 -1
  2. py2ls/.git/FETCH_HEAD +1 -1
  3. py2ls/.git/index +0 -0
  4. py2ls/.git/logs/HEAD +3 -0
  5. py2ls/.git/logs/refs/heads/main +3 -0
  6. py2ls/.git/logs/refs/remotes/origin/HEAD +5 -0
  7. py2ls/.git/logs/refs/remotes/origin/main +3 -0
  8. py2ls/.git/objects/01/d5bd8065e6860c0bd23ff9fa57161806a099e1 +0 -0
  9. py2ls/.git/objects/09/08da26de58c114225ad81f484b80bf5d351b34 +0 -0
  10. py2ls/.git/objects/1c/3f92adda34344bcbbbf9d409c79855ae2aaea8 +2 -0
  11. py2ls/.git/objects/32/fd627b62fad7cf3b2f9e34ab9777126a0987ad +0 -0
  12. py2ls/.git/objects/39/7ead045fbbcfb17c62019eb18fe21ed05dbee5 +0 -0
  13. py2ls/.git/objects/4f/7afb40dff2153d857fc85748c2eecb85125042 +0 -0
  14. py2ls/.git/objects/62/4488173ed2c8936fa5cea3cf5dd3f26a30b86e +0 -0
  15. py2ls/.git/objects/6d/ee29dbdcc84edeeacede105110446f3ccac963 +0 -0
  16. py2ls/.git/objects/b7/2c9e75ab7d0afe594664650aa8f6c772f5ac64 +0 -0
  17. py2ls/.git/objects/bb/81ccc0513f18fc160b54a82861e9a80d23f4f6 +0 -0
  18. py2ls/.git/objects/cd/822b3574a88ebdd1ed82fd6983f37e626d52b4 +0 -0
  19. py2ls/.git/objects/d8/4688b54c0040a30976b3a6540bc47adf7ce680 +0 -0
  20. py2ls/.git/objects/f1/e50757fddc28b445545dc7e2759b54cdd0f42e +0 -0
  21. py2ls/.git/refs/heads/main +1 -1
  22. py2ls/.git/refs/remotes/origin/main +1 -1
  23. py2ls/__init__.py +2 -12
  24. py2ls/data/.DS_Store +0 -0
  25. py2ls/data/db2ls_sql_chtsht.json +39 -0
  26. py2ls/data/lang_code_iso639.json +97 -0
  27. py2ls/db2ls.py +356 -0
  28. py2ls/ips.py +160 -1190
  29. py2ls/stats.py +810 -0
  30. py2ls/stdshade.py +173 -0
  31. py2ls/translator.py +6 -99
  32. {py2ls-0.1.4.8.dist-info → py2ls-0.1.5.0.dist-info}/METADATA +1 -1
  33. {py2ls-0.1.4.8.dist-info → py2ls-0.1.5.0.dist-info}/RECORD +34 -16
  34. py2ls/dbhandler.py +0 -97
  35. {py2ls-0.1.4.8.dist-info → py2ls-0.1.5.0.dist-info}/WHEEL +0 -0
py2ls/ips.py CHANGED
@@ -1,51 +1,47 @@
1
- from scipy.ndimage import convolve1d
2
1
  import numpy as np
3
2
  import pandas as pd
3
+
4
4
  import json
5
- import matplotlib.pyplot as plt
6
- import seaborn as sns
7
- # import scienceplots
8
5
  import matplotlib
9
- import sys
10
- import os
11
- from scipy.signal import savgol_filter
12
- import pingouin as pg
13
- from scipy import stats
6
+ import matplotlib.pyplot as plt
14
7
  import matplotlib.ticker as tck
8
+ from mpl_toolkits.mplot3d import Axes3D
9
+ # import seaborn as sns
10
+
11
+ import sys, os,shutil,re, yaml,json
15
12
  from cycler import cycler
16
- import re
13
+ import time
14
+ from dateutil import parser
15
+ from datetime import datetime
16
+
17
17
  from PIL import Image,ImageEnhance, ImageOps,ImageFilter
18
18
  from rembg import remove,new_session
19
- from mpl_toolkits.mplot3d import Axes3D
19
+
20
20
  import docx
21
- import pandas as pd
22
21
  from fpdf import FPDF
23
- import yaml
24
22
  from lxml import etree
25
23
  from docx import Document
26
24
  from PyPDF2 import PdfReader
27
25
  from pdf2image import convert_from_path, pdfinfo_from_path
28
- from nltk.tokenize import sent_tokenize,word_tokenize
26
+ from nltk.tokenize import sent_tokenize, word_tokenize
29
27
  import nltk # nltk.download("punkt")
30
28
  from docx2pdf import convert
31
29
  import img2pdf as image2pdf
32
- import pprint
30
+ import nbformat
31
+ from nbconvert import MarkdownExporter
32
+
33
33
  from itertools import pairwise
34
- import time
35
34
  from box import Box, BoxList
36
35
  from numerizer import numerize
37
36
  from tqdm import tqdm
38
37
  import mimetypes
39
38
  from pprint import pp
40
- from dateutil import parser
41
- from datetime import datetime
42
39
  from collections import Counter
43
40
  from fuzzywuzzy import fuzz,process
44
- from py2ls import netfinder
45
41
  from langdetect import detect
46
- import shutil
47
42
  from duckduckgo_search import DDGS
48
43
 
44
+ from py2ls import netfinder
49
45
 
50
46
  dir_save='/Users/macjianfeng/Dropbox/Downloads/'
51
47
 
@@ -58,6 +54,45 @@ def rm_folder(folder_path, verbose=True):
58
54
  if verbose:
59
55
  print(f'Failed to delete {folder_path}. Reason: {e}')
60
56
 
57
+ def fremove(path, verbose=True):
58
+ """
59
+ Remove a folder and all its contents or a single file.
60
+ Parameters:
61
+ path (str): The path to the folder or file to remove.
62
+ verbose (bool): If True, print success or failure messages. Default is True.
63
+ """
64
+ try:
65
+ if os.path.isdir(path):
66
+ shutil.rmtree(path)
67
+ if verbose:
68
+ print(f'Successfully deleted folder {path}')
69
+ elif os.path.isfile(path):
70
+ os.remove(path)
71
+ if verbose:
72
+ print(f'Successfully deleted file {path}')
73
+ else:
74
+ if verbose:
75
+ print(f'Path {path} does not exist')
76
+ except Exception as e:
77
+ if verbose:
78
+ print(f'Failed to delete {path}. Reason: {e}')
79
+
80
+
81
+ def get_cwd(verbose:bool = True):
82
+ """
83
+ get_cwd: to get the current working directory
84
+ Args:
85
+ verbose (bool, optional): to show which function is use. Defaults to True.
86
+ """
87
+ try:
88
+ script_dir = os.path.dirname(os.path.abspath(__file__))
89
+ if verbose:
90
+ print("os.path.dirname(os.path.abspath(__file__)):", script_dir)
91
+ except NameError:
92
+ # This works in an interactive environment (like a Jupyter notebook)
93
+ script_dir = os.getcwd()
94
+ if verbose:
95
+ print("os.getcwd():", script_dir)
61
96
 
62
97
  def search(query, limit=5, kind='text', output='df',verbose=False,download=True, dir_save=dir_save):
63
98
  from duckduckgo_search import DDGS
@@ -157,102 +192,10 @@ def ai(*args, **kwargs):
157
192
  return echo(**kwargs)
158
193
 
159
194
  def detect_lang(text, output='lang',verbose=True):
160
- lang_code_iso639={'Abkhazian': 'ab',
161
- 'Afar': 'aa',
162
- 'Afrikaans': 'af',
163
- 'Akan': 'ak',
164
- 'Albanian': 'sq',
165
- 'Amharic': 'am',
166
- 'Arabic': 'ar',
167
- 'Armenian': 'hy',
168
- 'Assamese': 'as',
169
- # 'Avaric': 'av',
170
- 'Aymara': 'ay',
171
- 'Azerbaijani': 'az',
172
- 'Bashkir': 'ba',
173
- 'Basque': 'eu',
174
- 'Belarusian': 'be',
175
- 'Bislama': 'bi',
176
- 'Breton': 'br',
177
- 'Burmese': 'my',
178
- 'Catalan, Valencian': 'ca',
179
- 'Chamorro': 'ch',
180
- 'Chichewa, Chewa, Nyanja': 'ny',
181
- 'Chinese': 'zh',
182
- 'Corsican': 'co',
183
- 'Cree': 'cr',
184
- 'Croatian': 'hr',
185
- 'Danish': 'da',
186
- 'Dutch, Flemish': 'nl',
187
- 'Dzongkha': 'dz',
188
- 'English': 'en',
189
- 'Finnish': 'fi',
190
- 'French': 'fr',
191
- 'Galician': 'gl',
192
- 'Georgian': 'ka',
193
- 'German': 'de',
194
- 'Greek, Modern (1453–)': 'el',
195
- 'Gujarati': 'gu',
196
- 'Hausa': 'ha',
197
- 'Hebrew': 'he',
198
- 'Hindi': 'hi',
199
- 'Hungarian': 'hu',
200
- 'Icelandic': 'is',
201
- 'Italian': 'it',
202
- 'Kikuyu, Gikuyu': 'ki',
203
- 'Korean': 'ko',
204
- 'Kurdish': 'ku',
205
- 'Latin': 'la',
206
- 'Limburgan, Limburger, Limburgish': 'li',
207
- 'Luba-Katanga': 'lu',
208
- 'Macedonian': 'mk',
209
- 'Malay': 'ms',
210
- 'Nauru': 'na',
211
- 'North Ndebele': 'nd',
212
- 'Nepali': 'ne',
213
- 'Norwegian': 'no',
214
- 'Norwegian Nynorsk': 'nn',
215
- 'Sichuan Yi, Nuosu': 'ii',
216
- 'Occitan': 'oc',
217
- 'Ojibwa': 'oj',
218
- 'Oriya': 'or',
219
- 'Ossetian, Ossetic': 'os',
220
- 'Persian': 'fa',
221
- 'Punjabi, Panjabi': 'pa',
222
- 'Quechua': 'qu',
223
- 'Romanian, Moldavian, Moldovan': 'ro',
224
- 'Russian': 'ru',
225
- 'Samoan': 'sm',
226
- 'Sanskrit': 'sa',
227
- 'Serbian': 'sr',
228
- 'Shona': 'sn',
229
- 'Sinhala, Sinhalese': 'si',
230
- 'Slovenian': 'sl',
231
- 'Somali': 'so',
232
- 'Sundanese': 'su',
233
- 'Swahili': 'sw',
234
- 'Swati': 'ss',
235
- 'Tajik': 'tg',
236
- 'Tamil': 'ta',
237
- 'Telugu': 'te',
238
- 'Thai': 'th',
239
- 'Tibetan': 'bo',
240
- 'Tigrinya': 'ti',
241
- 'Tonga (Tonga Islands)': 'to',
242
- 'Tsonga': 'ts',
243
- 'Twi': 'tw',
244
- 'Ukrainian': 'uk',
245
- 'Urdu': 'ur',
246
- 'Uzbek': 'uz',
247
- 'Venda': 've',
248
- 'Vietnamese': 'vi',
249
- 'Volapük': 'vo',
250
- 'Welsh': 'cy',
251
- 'Wolof': 'wo',
252
- 'Xhosa': 'xh',
253
- 'Yiddish': 'yi',
254
- 'Yoruba': 'yo',
255
- 'Zulu': 'zu'}
195
+ dir_curr_script=os.path.dirname(os.path.abspath(__file__))
196
+ dir_lang_code=dir_curr_script+"/data/lang_code_iso639.json"
197
+ print(dir_curr_script,os.getcwd(),dir_lang_code)
198
+ lang_code_iso639=fload(dir_lang_code)
256
199
  l_lang,l_code = [],[]
257
200
  [[l_lang.append(v),l_code.append(k)] for v,k in lang_code_iso639.items()]
258
201
  try:
@@ -340,21 +283,7 @@ def counter(list_, verbose=True):
340
283
  # print(f"Return a list of the n most common elements:\n{c.most_common()}")
341
284
  # print(f"Compute the sum of the counts:\n{c.total()}")
342
285
 
343
- def is_num(s):
344
- """
345
- Check if a string can be converted to a number (int or float).
346
- Parameters:
347
- - s (str): The string to check.
348
- Returns:
349
- - bool: True if the string can be converted to a number, False otherwise.
350
- """
351
- try:
352
- float(s) # Try converting the string to a float
353
- return True
354
- except ValueError:
355
- return False
356
- def isnum(s):
357
- return is_num(s)
286
+
358
287
 
359
288
  def str2time(time_str, fmt='24'):
360
289
  """
@@ -600,6 +529,7 @@ def paper_size(paper_type_str='a4'):
600
529
  if not paper_type:
601
530
  paper_type='a4' # default
602
531
  return df[paper_type].tolist()
532
+
603
533
  def docx2pdf(dir_docx, dir_pdf=None):
604
534
  if dir_pdf:
605
535
  convert(dir_docx,dir_pdf)
@@ -815,7 +745,7 @@ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
815
745
  df_dir_img_single_page = pd.DataFrame()
816
746
  dir_single_page = []
817
747
  if verbose:
818
- pprint.pp(pdfinfo_from_path(dir_pdf))
748
+ pp(pdfinfo_from_path(dir_pdf))
819
749
  if isinstance(page, tuple) and page:
820
750
  page = list(page)
821
751
  if isinstance(page,int):
@@ -888,7 +818,14 @@ def fload(fpath, kind=None, **kwargs):
888
818
  def load_xlsx(fpath, **kwargs):
889
819
  df = pd.read_excel(fpath, **kwargs)
890
820
  return df
891
-
821
+ def load_ipynb(fpath,**kwargs):
822
+ as_version=kwargs.get("as_version",4)
823
+ with open(fpath, "r") as file:
824
+ nb = nbformat.read(file, as_version=as_version)
825
+ md_exporter = MarkdownExporter()
826
+ md_body, _ = md_exporter.from_notebook_node(nb)
827
+ return md_body
828
+
892
829
  def load_pdf(fpath, page='all', verbose=False, **kwargs):
893
830
  """
894
831
  Parameters:
@@ -950,7 +887,7 @@ def fload(fpath, kind=None, **kwargs):
950
887
 
951
888
  kind = kind.lstrip('.').lower()
952
889
  img_types=[ 'bmp','eps', 'gif', 'icns', 'ico', 'im', 'jpg','jpeg', 'jpeg2000','msp', 'pcx', 'png', 'ppm', 'sgi', 'spider', 'tga','tiff','webp',"json"]
953
- doc_types = ["docx", "txt", "md", "html", "json", "yaml", "xml", "csv", "xlsx", "pdf"]
890
+ doc_types = ["docx", "txt", "md", "html", "json", "yaml", "xml", "csv", "xlsx", "pdf","ipynb"]
954
891
  supported_types = [*doc_types, *img_types]
955
892
  if kind not in supported_types:
956
893
  raise ValueError(f"Error:\n{kind} is not in the supported list {supported_types}")
@@ -970,6 +907,8 @@ def fload(fpath, kind=None, **kwargs):
970
907
  return load_csv(fpath, **kwargs)
971
908
  elif kind == "xlsx":
972
909
  return load_xlsx(fpath, **kwargs)
910
+ elif kind == "ipynb":
911
+ return load_ipynb(fpath, **kwargs)
973
912
  elif kind == "pdf":
974
913
  print('usage:load_pdf(fpath, page="all", verbose=False)')
975
914
  return load_pdf(fpath, **kwargs)
@@ -1093,7 +1032,25 @@ def fsave(
1093
1032
  df = pd.DataFrame(data)
1094
1033
  df.to_excel(fpath, **kwargs)
1095
1034
 
1035
+ def save_ipynb(fpath,data,**kwargs):
1036
+ # Split the content by code fences to distinguish between code and markdown
1037
+ parts = data.split('```')
1038
+ cells = []
1096
1039
 
1040
+ for i, part in enumerate(parts):
1041
+ if i % 2 == 0:
1042
+ # Even index: markdown content
1043
+ cells.append(nbf.v4.new_markdown_cell(part.strip()))
1044
+ else:
1045
+ # Odd index: code content
1046
+ cells.append(nbf.v4.new_code_cell(part.strip()))
1047
+ # Create a new notebook
1048
+ nb = nbformat.v4.new_notebook()
1049
+ nb['cells'] = cells
1050
+ # Write the notebook to a file
1051
+ with open(fpath, 'w', encoding='utf-8') as ipynb_file:
1052
+ nbf.write(fpath, ipynb_file)
1053
+
1097
1054
  # def save_json(fpath, data, **kwargs):
1098
1055
  # with open(fpath, "w") as file:
1099
1056
  # json.dump(data, file, **kwargs)
@@ -1152,6 +1109,7 @@ def fsave(
1152
1109
  "json",
1153
1110
  "xml",
1154
1111
  "yaml",
1112
+ "ipynb"
1155
1113
  ]:
1156
1114
  print(
1157
1115
  f"Warning:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
@@ -1168,19 +1126,17 @@ def fsave(
1168
1126
  elif kind == "pdf":
1169
1127
  save_pdf(fpath, content, font_name, font_size)
1170
1128
  elif kind == "csv":
1171
- save_csv(
1172
- fpath, content, **kwargs
1173
- ) # Assuming content is in tabular form (list of dicts or DataFrame)
1129
+ save_csv(fpath, content, **kwargs)
1174
1130
  elif kind == "xlsx":
1175
- save_xlsx(
1176
- fpath, content, **kwargs
1177
- ) # Assuming content is in tabular form (list of dicts or DataFrame)
1131
+ save_xlsx(fpath, content, **kwargs)
1178
1132
  elif kind == "json":
1179
- save_json(fpath, content) # Assuming content is a serializable object
1133
+ save_json(fpath, content)
1180
1134
  elif kind == "xml":
1181
- save_xml(fpath, content) # Assuming content is a dictionary
1135
+ save_xml(fpath, content)
1182
1136
  elif kind == "yaml":
1183
- save_yaml(fpath, content, **kwargs) # Assuming content is a serializable object
1137
+ save_yaml(fpath, content, **kwargs)
1138
+ elif kind == "ipynb":
1139
+ save_ipynb(fpath, content, **kwargs)
1184
1140
  else:
1185
1141
  try:
1186
1142
  netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
@@ -1285,42 +1241,19 @@ def isa(*args,**kwargs):
1285
1241
  elif 'zip' in contains.lower():
1286
1242
  return is_zip(fpath)
1287
1243
  elif 'dir' in contains.lower() or ('f' in contains.lower() and 'd' in contains.lower()):
1288
- return bool(('/' in fpath) or ('\\' in fpath))
1244
+ return os.path.isdir(fpath)
1289
1245
  elif 'fi' in contains.lower():#file
1290
1246
  return os.path.isfile(fpath)
1247
+ elif 'num' in contains.lower():#file
1248
+ return os.path.isfile(fpath)
1249
+ elif 'text' in contains.lower() or 'txt' in contains.lower():#file
1250
+ return is_text(fpath)
1251
+ elif 'color' in contains.lower():#file
1252
+ return is_str_color(fpath)
1291
1253
  else:
1292
1254
  print(f"{contains} was not set up correctly")
1293
1255
  return False
1294
1256
 
1295
- def is_image(fpath):
1296
- mime_type, _ = mimetypes.guess_type(fpath)
1297
- if mime_type and mime_type.startswith('image'):
1298
- return True
1299
- else:
1300
- return False
1301
-
1302
- def is_document(fpath):
1303
- mime_type, _ = mimetypes.guess_type(fpath)
1304
- if mime_type and (
1305
- mime_type.startswith('text/') or
1306
- mime_type == 'application/pdf' or
1307
- mime_type == 'application/msword' or
1308
- mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
1309
- mime_type == 'application/vnd.ms-excel' or
1310
- mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
1311
- mime_type == 'application/vnd.ms-powerpoint' or
1312
- mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
1313
- ):
1314
- return True
1315
- else:
1316
- return False
1317
-
1318
- def is_zip(fpath):
1319
- mime_type, _ = mimetypes.guess_type(fpath)
1320
- if mime_type == 'application/zip':
1321
- return True
1322
- else:
1323
- return False
1324
1257
  def listdir(
1325
1258
  rootdir,
1326
1259
  kind="folder",
@@ -1428,21 +1361,15 @@ def list_func(lib_name, opt="call"):
1428
1361
  funcs = dir(lib_name)
1429
1362
  return funcs
1430
1363
  def func_list(lib_name, opt="call"):
1431
- if opt == "call":
1432
- funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
1433
- else:
1434
- funcs = dir(lib_name)
1435
- return funcs
1364
+ return list_func(lib_name, opt=opt)
1436
1365
 
1437
1366
  def newfolder(*args, **kwargs):
1438
1367
  """
1439
1368
  newfolder(pardir, chdir)
1440
-
1441
1369
  Args:
1442
1370
  pardir (dir): parent dir
1443
1371
  chdir (str): children dir
1444
1372
  overwrite (bool): overwrite?
1445
-
1446
1373
  Returns:
1447
1374
  mkdir, giving a option if exists_ok or not
1448
1375
  """
@@ -1501,33 +1428,29 @@ def newfolder(*args, **kwargs):
1501
1428
  return rootdir
1502
1429
 
1503
1430
  def figsave(*args,dpi=300):
1504
- DirSave = None
1431
+ dir_save = None
1505
1432
  fname = None
1506
-
1507
1433
  for arg in args:
1508
1434
  if isinstance(arg, str):
1509
1435
  if '/' in arg or '\\' in arg:
1510
- DirSave = arg
1436
+ dir_save = arg
1511
1437
  elif '/' not in arg and '\\' not in arg:
1512
1438
  fname = arg
1513
-
1514
1439
  # Backup original values
1515
- if '/' in DirSave:
1516
- if DirSave[-1] != '/':
1517
- DirSave = DirSave + '/'
1518
- elif '\\' in DirSave:
1519
- if DirSave[-1] != '\\':
1520
- DirSave = DirSave + '\\'
1440
+ if '/' in dir_save:
1441
+ if dir_save[-1] != '/':
1442
+ dir_save = dir_save + '/'
1443
+ elif '\\' in dir_save:
1444
+ if dir_save[-1] != '\\':
1445
+ dir_save = dir_save + '\\'
1521
1446
  else:
1522
- raise ValueError('Check the Path of DirSave Directory')
1523
-
1447
+ raise ValueError('Check the Path of dir_save Directory')
1524
1448
  ftype = fname.split('.')[-1]
1525
1449
  if len(fname.split('.')) == 1:
1526
1450
  ftype = 'nofmt'
1527
- fname = DirSave + fname + '.' + ftype
1451
+ fname = dir_save + fname + '.' + ftype
1528
1452
  else:
1529
- fname = DirSave + fname
1530
-
1453
+ fname = dir_save + fname
1531
1454
  # Save figure based on file type
1532
1455
  if ftype.lower() == 'eps':
1533
1456
  plt.savefig(fname, format='eps', bbox_inches='tight')
@@ -1552,295 +1475,55 @@ def figsave(*args,dpi=300):
1552
1475
  plt.savefig(fname, format='emf', dpi=dpi, bbox_inches='tight')
1553
1476
  elif ftype.lower() == 'fig':
1554
1477
  plt.savefig(fname, format='pdf', bbox_inches='tight',dpi=dpi)
1555
-
1556
1478
  print(f'\nSaved @: dpi={dpi}\n{fname}')
1557
1479
 
1558
1480
 
1559
- # ==============FuncStars(ax,x1=1,x2=2, yscale=0.9, pval=0.01)====================================================
1560
- # Usage:
1561
- # FuncStars(ax, x1=2, x2=3, yscale=0.99, pval=0.02)
1562
- # =============================================================================
1563
-
1564
- # FuncStars --v 0.1.1
1565
- def FuncStars(ax,
1566
- pval=None,
1567
- Ylim=None,
1568
- Xlim=None,
1569
- symbol='*',
1570
- yscale=0.95,
1571
- x1=0,
1572
- x2=1,
1573
- alpha=0.05,
1574
- fontsize=14,
1575
- fontsize_note=6,
1576
- rotation=0,
1577
- fontname='Arial',
1578
- values_below=None,
1579
- linego=True,
1580
- linestyle='-',
1581
- linecolor='k',
1582
- linewidth=.8,
1583
- nsshow='off',
1584
- symbolcolor='k',
1585
- tailindicator=[0.06, 0.06],
1586
- report=None,
1587
- report_scale=-0.1,
1588
- report_loc=None):
1589
-
1590
-
1591
- if ax is None:
1592
- ax = plt.gca()
1593
- if Ylim is None:
1594
- Ylim = plt.gca().get_ylim()
1595
- if Xlim is None:
1596
- Xlim = ax.get_xlim()
1597
- if report_loc is None and report is not None:
1598
- report_loc = np.min(Ylim) + report_scale*np.abs(np.diff(Ylim))
1599
- if report_scale > 0:
1600
- report_scale = -np.abs(report_scale)
1601
-
1602
- yscale = np.float64(yscale)
1603
- y_loc = np.min(Ylim) + yscale*(np.max(Ylim)-np.min(Ylim))
1604
- xcenter = np.mean([x1, x2])
1605
-
1606
- # ns / *
1607
- if alpha < pval:
1608
- if nsshow == 'on':
1609
- ns_str = f'p={round(pval, 3)}' if pval < 0.9 else 'ns'
1610
- color = 'm' if pval < 0.1 else 'k'
1611
- plt.text(xcenter, y_loc, ns_str,
1612
- ha='center', va='bottom', # 'center_baseline',
1613
- fontsize=fontsize-6 if fontsize > 6 else fontsize,
1614
- fontname=fontname, color=color, rotation=rotation
1615
- # bbox=dict(facecolor=None, edgecolor=None, color=None, linewidth=None)
1616
- )
1617
- elif 0.01 < pval <= alpha:
1618
- plt.text(xcenter, y_loc, symbol,
1619
- ha='center', va='center_baseline',
1620
- fontsize=fontsize, fontname=fontname, color=symbolcolor)
1621
- elif 0.001 < pval <= 0.01:
1622
- plt.text(xcenter, y_loc, symbol * 2,
1623
- ha='center', va='center_baseline',
1624
- fontsize=fontsize, fontname=fontname, color=symbolcolor)
1625
- elif 0 < pval <= 0.001:
1626
- plt.text(xcenter, y_loc, symbol * 3,
1627
- ha='center', va='center_baseline',
1628
- fontsize=fontsize, fontname=fontname, color=symbolcolor)
1629
-
1630
- # lines indicators
1631
- if linego: # and 0 < pval <= 0.05:
1632
- print(pval)
1633
- print(linego)
1634
- # horizontal line
1635
- if yscale < 0.99:
1636
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1637
- x2 - np.abs(np.diff(Xlim)) * 0.01],
1638
- [y_loc - np.abs(np.diff(Ylim)) * .03,
1639
- y_loc - np.abs(np.diff(Ylim)) * .03],
1640
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1641
- # vertical line
1642
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1643
- x1 + np.abs(np.diff(Xlim)) * 0.01],
1644
- [y_loc - np.abs(np.diff(Ylim)) * tailindicator[0],
1645
- y_loc - np.abs(np.diff(Ylim)) * .03],
1646
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1647
- plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
1648
- x2 - np.abs(np.diff(Xlim)) * 0.01],
1649
- [y_loc - np.abs(np.diff(Ylim)) * tailindicator[1],
1650
- y_loc - np.abs(np.diff(Ylim)) * .03],
1651
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1652
- else:
1653
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1654
- x2 - np.abs(np.diff(Xlim)) * 0.01],
1655
- [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002,
1656
- np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
1657
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1658
- # vertical line
1659
- plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1660
- x1 + np.abs(np.diff(Xlim)) * 0.01],
1661
- [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[0],
1662
- np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
1663
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1664
- plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
1665
- x2 - np.abs(np.diff(Xlim)) * 0.01],
1666
- [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[1],
1667
- np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
1668
- linestyle=linestyle, color=linecolor, linewidth=linewidth)
1669
-
1670
- if values_below is not None:
1671
- plt.text(xcenter, y_loc * (-0.1), values_below,
1672
- ha='center', va='bottom', # 'center_baseline', rotation=rotation,
1673
- fontsize=fontsize_note, fontname=fontname, color='k')
1674
-
1675
- # report / comments
1676
- if report is not None:
1677
- plt.text(xcenter, report_loc, report,
1678
- ha='left', va='bottom', # 'center_baseline', rotation=rotation,
1679
- fontsize=fontsize_note, fontname=fontname, color='.7')
1680
1481
  def is_str_color(s):
1681
1482
  # Regular expression pattern for hexadecimal color codes
1682
1483
  color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
1683
1484
  return re.match(color_code_pattern, s) is not None
1684
-
1685
- def stdshade(ax=None,*args, **kwargs):
1686
- if (
1687
- isinstance(ax, np.ndarray)
1688
- and ax.ndim == 2
1689
- and min(ax.shape) > 1
1690
- and max(ax.shape) > 1
1691
- ):
1692
- y = ax
1693
- ax = plt.gca()
1694
- if ax is None:
1695
- ax = plt.gca()
1696
- alpha = 0.5
1697
- acolor = "k"
1698
- paraStdSem = "sem"
1699
- plotStyle = "-"
1700
- plotMarker = "none"
1701
- smth = 1
1702
- l_c_one = ["r", "g", "b", "m", "c", "y", "k", "w"]
1703
- l_style2 = ["--", "-."]
1704
- l_style1 = ["-", ":"]
1705
- l_mark = ["o", "+", "*", ".", "x", "_", "|", "s", "d", "^", "v", ">", "<", "p", "h"]
1706
-
1707
- # Check each argument
1708
- for iarg in range(len(args)):
1709
- if (
1710
- isinstance(args[iarg], np.ndarray)
1711
- and args[iarg].ndim == 2
1712
- and min(args[iarg].shape) > 1
1713
- and max(args[iarg].shape) > 1
1714
- ):
1715
- y = args[iarg]
1716
- # Except y, continuous data is 'F'
1717
- if (isinstance(args[iarg], np.ndarray) and args[iarg].ndim == 1) or isinstance(
1718
- args[iarg], range
1719
- ):
1720
- x = args[iarg]
1721
- if isinstance(x, range):
1722
- x = np.arange(start=x.start, stop=x.stop, step=x.step)
1723
- # Only one number( 0~1), 'alpha' / color
1724
- if isinstance(args[iarg], (int, float)):
1725
- if np.size(args[iarg]) == 1 and 0 <= args[iarg] <= 1:
1726
- alpha = args[iarg]
1727
- if isinstance(args[iarg], (list, tuple)) and np.size(args[iarg]) == 3:
1728
- acolor = args[iarg]
1729
- acolor = tuple(acolor) if isinstance(acolor, list) else acolor
1730
- # Color / plotStyle /
1731
- if (
1732
- isinstance(args[iarg], str)
1733
- and len(args[iarg]) == 1
1734
- and args[iarg] in l_c_one
1735
- ):
1736
- acolor = args[iarg]
1737
- else:
1738
- if isinstance(args[iarg], str):
1739
- if args[iarg] in ["sem", "std"]:
1740
- paraStdSem = args[iarg]
1741
- if args[iarg].startswith("#"):
1742
- acolor=hue2rgb(args[iarg])
1743
- if str2list(args[iarg])[0] in l_c_one:
1744
- if len(args[iarg]) == 3:
1745
- k = [i for i in str2list(args[iarg]) if i in l_c_one]
1746
- if k != []:
1747
- acolor = k[0]
1748
- st = [i for i in l_style2 if i in args[iarg]]
1749
- if st != []:
1750
- plotStyle = st[0]
1751
- elif len(args[iarg]) == 2:
1752
- k = [i for i in str2list(args[iarg]) if i in l_c_one]
1753
- if k != []:
1754
- acolor = k[0]
1755
- mk = [i for i in str2list(args[iarg]) if i in l_mark]
1756
- if mk != []:
1757
- plotMarker = mk[0]
1758
- st = [i for i in l_style1 if i in args[iarg]]
1759
- if st != []:
1760
- plotStyle = st[0]
1761
- if len(args[iarg]) == 1:
1762
- k = [i for i in str2list(args[iarg]) if i in l_c_one]
1763
- if k != []:
1764
- acolor = k[0]
1765
- mk = [i for i in str2list(args[iarg]) if i in l_mark]
1766
- if mk != []:
1767
- plotMarker = mk[0]
1768
- st = [i for i in l_style1 if i in args[iarg]]
1769
- if st != []:
1770
- plotStyle = st[0]
1771
- if len(args[iarg]) == 2:
1772
- st = [i for i in l_style2 if i in args[iarg]]
1773
- if st != []:
1774
- plotStyle = st[0]
1775
- # smth
1776
- if (
1777
- isinstance(args[iarg], (int, float))
1778
- and np.size(args[iarg]) == 1
1779
- and args[iarg] >= 1
1780
- ):
1781
- smth = args[iarg]
1782
-
1783
- if "x" not in locals() or x is None:
1784
- x = np.arange(1, y.shape[1] + 1)
1785
- elif len(x) < y.shape[1]:
1786
- y = y[:, x]
1787
- nRow = y.shape[0]
1788
- nCol = y.shape[1]
1789
- print(f"y was corrected, please confirm that {nRow} row, {nCol} col")
1485
+ def is_num(s):
1486
+ """
1487
+ Check if a string can be converted to a number (int or float).
1488
+ Parameters:
1489
+ - s (str): The string to check.
1490
+ Returns:
1491
+ - bool: True if the string can be converted to a number, False otherwise.
1492
+ """
1493
+ try:
1494
+ float(s) # Try converting the string to a float
1495
+ return True
1496
+ except ValueError:
1497
+ return False
1498
+ def isnum(s):
1499
+ return is_num(s)
1500
+ def is_image(fpath):
1501
+ mime_type, _ = mimetypes.guess_type(fpath)
1502
+ if mime_type and mime_type.startswith('image'):
1503
+ return True
1790
1504
  else:
1791
- x = np.arange(1, y.shape[1] + 1)
1792
-
1793
- if x.shape[0] != 1:
1794
- x = x.T
1795
- yMean = np.nanmean(y, axis=0)
1796
- if smth > 1:
1797
- yMean = savgol_filter(np.nanmean(y, axis=0), smth, 1)
1505
+ return False
1506
+ def is_document(fpath):
1507
+ mime_type, _ = mimetypes.guess_type(fpath)
1508
+ if mime_type and (
1509
+ mime_type.startswith('text/') or
1510
+ mime_type == 'application/pdf' or
1511
+ mime_type == 'application/msword' or
1512
+ mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
1513
+ mime_type == 'application/vnd.ms-excel' or
1514
+ mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
1515
+ mime_type == 'application/vnd.ms-powerpoint' or
1516
+ mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
1517
+ ):
1518
+ return True
1798
1519
  else:
1799
- yMean = np.nanmean(y, axis=0)
1800
- if paraStdSem == "sem":
1801
- if smth > 1:
1802
- wings = savgol_filter(np.nanstd(y, axis=0) / np.sqrt(y.shape[0]), smth, 1)
1803
- else:
1804
- wings = np.nanstd(y, axis=0) / np.sqrt(y.shape[0])
1805
- elif paraStdSem == "std":
1806
- if smth > 1:
1807
- wings = savgol_filter(np.nanstd(y, axis=0), smth, 1)
1808
- else:
1809
- wings = np.nanstd(y, axis=0)
1810
-
1811
- fill_kws = kwargs.get('fill_kws', {})
1812
- line_kws = kwargs.get('line_kws', {})
1813
- fill = ax.fill_between(x, yMean + wings, yMean - wings, color=acolor, alpha=alpha, lw=0,**fill_kws)
1814
- if line_kws != {} and not any(key.lower() in ['lw', 'linewidth'] for key in line_kws.keys()):
1815
- line = ax.plot(x, yMean, color=acolor, lw=1.5, ls=plotStyle, marker=plotMarker, **line_kws)
1520
+ return False
1521
+ def is_zip(fpath):
1522
+ mime_type, _ = mimetypes.guess_type(fpath)
1523
+ if mime_type == 'application/zip':
1524
+ return True
1816
1525
  else:
1817
- line = ax.plot(x, yMean, color=acolor, ls=plotStyle, marker=plotMarker, **line_kws)
1818
- return line[0], fill
1819
-
1820
-
1821
- # =============================================================================
1822
- # # for plot figures {Qiu et al.2023}
1823
- # =============================================================================
1824
- # =============================================================================
1825
- # plt.rcParams.update({'figure.max_open_warning': 0})
1826
- # # Output matplotlib figure to SVG with text as text, not curves
1827
- # plt.rcParams['svg.fonttype'] = 'none'
1828
- # plt.rcParams['pdf.fonttype'] = 42
1829
- #
1830
- # plt.rc('text', usetex=False)
1831
- # # plt.style.use('ggplot')
1832
- # plt.style.use('science')
1833
- # plt.rc('font', family='serif')
1834
- # plt.rcParams.update({
1835
- # "font.family": "serif", # specify font family here
1836
- # "font.serif": ["Arial"], # specify font here
1837
- # "font.size": 11})
1838
- # # plt.tight_layout()
1839
- # =============================================================================
1840
- # =============================================================================
1841
- # # axis spine
1842
- # # use it like: adjust_spines(ax, ['left', 'bottom'])
1843
- # =============================================================================
1526
+ return False
1844
1527
 
1845
1528
 
1846
1529
  def adjust_spines(ax=None, spines=['left', 'bottom'],distance=2):
@@ -1883,692 +1566,6 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
1883
1566
  # =============================================================================
1884
1567
 
1885
1568
 
1886
- def FuncCmpt(X1, X2, pmc='auto', pair='unpaired'):
1887
- # output = {}
1888
-
1889
- # pmc correction: 'parametric'/'non-parametric'/'auto'
1890
- # meawhile get the opposite setting (to compare the results)
1891
- def corr_pmc(pmc):
1892
- cfg_pmc = None
1893
- if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'npmc', 'nonparametric', 'non-parametric'}:
1894
- cfg_pmc = 'parametric'
1895
- elif pmc.lower() in {'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
1896
- cfg_pmc = 'non-parametric'
1897
- else:
1898
- cfg_pmc = 'auto'
1899
- return cfg_pmc
1900
-
1901
- def corr_pair(pair):
1902
- cfg_pair = None
1903
- if 'pa' in pair.lower() and 'np' not in pair.lower():
1904
- cfg_pair = 'paired'
1905
- elif 'np' in pair.lower():
1906
- cfg_pair = 'unpaired'
1907
- return cfg_pair
1908
-
1909
- def check_normality(data):
1910
- stat_shapiro, pval_shapiro = stats.shapiro(data)
1911
- if pval_shapiro > 0.05:
1912
- Normality = True
1913
- else:
1914
- Normality = False
1915
- print(f'\n normally distributed\n') if Normality else print(
1916
- f'\n NOT normally distributed\n')
1917
- return Normality
1918
-
1919
- def sub_cmpt_2group(X1, X2, cfg_pmc='pmc', pair='unpaired'):
1920
- output = {}
1921
- nX1 = np.sum(~np.isnan(X1))
1922
- nX2 = np.sum(~np.isnan(X2))
1923
- if cfg_pmc == 'parametric' or cfg_pmc == 'auto':
1924
- # VarType correction by checking variance Type via "levene"
1925
- stat_lev, pval_lev = stats.levene(
1926
- X1, X2, center='median', proportiontocut=0.05)
1927
- VarType = True if pval_lev > 0.05 and nX1 == nX2 else False
1928
-
1929
- if 'np' in pair: # 'unpaired'
1930
- if VarType and Normality:
1931
- # The independent t-test requires that the dependent variable is approximately normally
1932
- # distributed within each group
1933
- # Note: Technically, it is the residuals that need to be normally distributed, but for
1934
- # an independent t-test, both will give you the same result.
1935
- stat_value, pval= stats.ttest_ind(
1936
- X1, X2, axis=0, equal_var=True, nan_policy='omit', alternative='two-sided')
1937
- notes_stat = 'unpaired t test'
1938
- notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
1939
- else:
1940
- # If the Levene's Test for Equality of Variances is statistically significant,
1941
- # which indicates that the group variances are unequal in the population, you
1942
- # can correct for this violation by not using the pooled estimate for the error
1943
- # term for the t-statistic, but instead using an adjustment to the degrees of
1944
- # freedom using the Welch-Satterthwaite method
1945
- stat_value, pval= stats.ttest_ind(
1946
- X1, X2, axis=0, equal_var=False, nan_policy='omit', alternative='two-sided')
1947
- notes_stat = 'Welchs t-test'
1948
- # note: APA FORMAT
1949
- notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
1950
- elif 'pa' in pair and 'np' not in pair: # 'paired'
1951
- # the paired-samples t-test is considered “robust” in handling violations of normality
1952
- # to some extent. It can still yield valid results even if the data is not normally
1953
- # distributed. Therefore, this test typically requires only approximately normal data
1954
- stat_value, pval= stats.ttest_rel(
1955
- X1, X2, axis=0, nan_policy='omit', alternative='two-sided')
1956
- notes_stat = 'paired t test'
1957
- # note: APA FORMAT
1958
- notes_APA = f't({sum([nX1-1])})={round(stat_value, 5)},p={round(pval, 5)}'
1959
- elif cfg_pmc == 'non-parametric':
1960
- if 'np' in pair: # Perform Mann-Whitney
1961
- stat_value, pval = stats.mannwhitneyu(
1962
- X1, X2, method='exact', nan_policy='omit')
1963
- notes_stat = 'Mann-Whitney U'
1964
- if nX1 == nX2:
1965
- notes_APA = f'U(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
1966
- else:
1967
- notes_APA = f'U(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
1968
- elif 'pa' in pair and 'np' not in pair: # Wilcoxon signed-rank test
1969
- stat_value, pval = stats.wilcoxon(
1970
- X1, X2, method='exact', nan_policy='omit')
1971
- notes_stat = 'Wilcoxon signed-rank'
1972
- if nX1 == nX2:
1973
- notes_APA = f'Z(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
1974
- else:
1975
- notes_APA = f'Z(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
1976
-
1977
- # filling output
1978
- output['stat'] = stat_value
1979
- output['pval'] = pval
1980
- output['method'] = notes_stat
1981
- output['APA'] = notes_APA
1982
-
1983
- print(f"{output['method']}\n {notes_APA}\n\n")
1984
-
1985
- return output, pval
1986
-
1987
- Normality1 = check_normality(X1)
1988
- Normality2 = check_normality(X2)
1989
- Normality = True if all([Normality1, Normality2]) else False
1990
-
1991
- nX1 = np.sum(~np.isnan(X1))
1992
- nX2 = np.sum(~np.isnan(X2))
1993
-
1994
- cfg_pmc = corr_pmc(pmc)
1995
- cfg_pair = corr_pair(pair)
1996
-
1997
- output, p = sub_cmpt_2group(
1998
- X1, X2, cfg_pmc=cfg_pmc, pair=cfg_pair)
1999
- return p, output
2000
-
2001
-
2002
- # ======compare 2 group test===================================================
2003
- # # Example
2004
- # X1 = [19, 22, 16, 29, 24]
2005
- # X2 = [20, 11, 17, 12, 22]
2006
-
2007
- # p, res= FuncCmpt(X1, X2, pmc='pmc', pair='unparrr')
2008
-
2009
- # =============================================================================
2010
-
2011
- # =============================================================================
2012
- # # method = ['anova', # 'One-way and N-way ANOVA',
2013
- # # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
2014
- # # 'mixed_anova', # 'Two way mixed ANOVA',
2015
- # # 'welch_anova', # 'One-way Welch ANOVA',
2016
- # # 'kruskal', # 'Non-parametric one-way ANOVA'
2017
- # # 'friedman', # Non-parametric one-way repeated measures ANOVA
2018
- # # ]
2019
- # =============================================================================
2020
-
2021
-
2022
- # =============================================================================
2023
- # # method = ['anova', # 'One-way and N-way ANOVA',
2024
- # # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
2025
- # # 'mixed_anova', # 'Two way mixed ANOVA',
2026
- # # 'welch_anova', # 'One-way Welch ANOVA',
2027
- # # 'kruskal', # 'Non-parametric one-way ANOVA'
2028
- # # 'friedman', # Non-parametric one-way repeated measures ANOVA
2029
- # # ]
2030
- # =============================================================================
2031
- def df_wide_long(df):
2032
- rows, columns = df.shape
2033
- if columns > rows:
2034
- return "Wide"
2035
- elif rows > columns:
2036
- return "Long"
2037
-
2038
- def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
2039
- ss_type=2, detailed=True, effsize='np2',
2040
- correction='auto', between=None, within=None,
2041
- subject=None, group=None
2042
- ):
2043
-
2044
- def corr_pair(pair):
2045
- cfg_pair = None
2046
- if 'pa' in pair.lower() and 'np' not in pair.lower():
2047
- cfg_pair = 'paired'
2048
- elif 'np' in pair.lower():
2049
- cfg_pair = 'unpaired'
2050
- elif 'mix' in pair.lower():
2051
- cfg_pair = 'mix'
2052
- return cfg_pair
2053
-
2054
- def check_normality(data):
2055
- stat_shapiro, pval_shapiro = stats.shapiro(data)
2056
- if pval_shapiro > 0.05:
2057
- Normality = True
2058
- else:
2059
- Normality = False
2060
- print(f'\n normally distributed\n') if Normality else print(
2061
- f'\n NOT normally distributed\n')
2062
- return Normality
2063
-
2064
- def corr_pmc(pmc):
2065
- cfg_pmc = None
2066
- if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'upmc', 'npmc', 'nonparametric', 'non-parametric'}:
2067
- cfg_pmc = 'parametric'
2068
- elif pmc.lower() in {'upmc', 'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
2069
- cfg_pmc = 'non-parametric'
2070
- else:
2071
- cfg_pmc = 'auto'
2072
- return cfg_pmc
2073
-
2074
- def extract_apa(res_tab):
2075
- notes_APA = []
2076
- if "ddof1" in res_tab:
2077
- for irow in range(res_tab.shape[0]):
2078
- note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
2079
- notes_APA.append([note_tmp])
2080
- elif "DF" in res_tab:
2081
- print(res_tab.shape[0])
2082
- for irow in range(res_tab.shape[0]-1):
2083
- note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
2084
- notes_APA.append([note_tmp])
2085
- notes_APA.append(['NaN'])
2086
- elif "DF1" in res_tab: # in 'mix' case
2087
- for irow in range(res_tab.shape[0]):
2088
- note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF1[irow]),round(res_tab.DF2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
2089
- notes_APA.append([note_tmp])
2090
- return notes_APA
2091
-
2092
- def anovatable(res_tab):
2093
- if 'df' in res_tab: # statsmodels
2094
- res_tab['mean_sq'] = res_tab[:]['sum_sq']/res_tab[:]['df']
2095
- res_tab['est_sq'] = res_tab[:-1]['sum_sq'] / \
2096
- sum(res_tab['sum_sq'])
2097
- res_tab['omega_sq'] = (res_tab[:-1]['sum_sq']-(res_tab[:-1]['df'] *
2098
- res_tab['mean_sq'][-1]))/(sum(res_tab['sum_sq'])+res_tab['mean_sq'][-1])
2099
- elif 'DF' in res_tab:
2100
- res_tab['MS'] = res_tab[:]['SS']/res_tab[:]['DF']
2101
- res_tab['est_sq'] = res_tab[:-1]['SS']/sum(res_tab['SS'])
2102
- res_tab['omega_sq'] = (res_tab[:-1]['SS']-(res_tab[:-1]['DF'] *
2103
- res_tab['MS'][1]))/(sum(res_tab['SS'])+res_tab['MS'][1])
2104
- if 'p-unc' in res_tab:
2105
- if 'np2' in res_tab:
2106
- res_tab['est_sq'] = res_tab['np2']
2107
- if 'p-unc' in res_tab:
2108
- res_tab['PR(>F)'] = res_tab['p-unc']
2109
- return res_tab
2110
-
2111
- def run_anova(data, dv, factor, ss_type=2, detailed=True, effsize='np2'):
2112
- # perform ANOVA
2113
- # =============================================================================
2114
- # # # ANOVA (input: formula, dataset)
2115
- # =============================================================================
2116
- # # note: if the data is balanced (equal sample size for each group), Type 1, 2, and 3 sums of squares
2117
- # # (typ parameter) will produce similar results.
2118
- # lm = ols("values ~ C(group)", data=df).fit()
2119
- # res_tab = anova_lm(lm, typ=ss_type)
2120
-
2121
- # # however, it does not provide any effect size measures to tell if the
2122
- # # statistical significance is meaningful. The function below calculates
2123
- # # eta-squared () and omega-squared (). A quick note, is the exact same
2124
- # # thing as except when coming from the ANOVA framework people call it ;
2125
- # # is considered a better measure of effect size since it is unbiased in
2126
- # # it's calculation by accounting for the degrees of freedom in the model.
2127
- # # note: No effect sizes are calculated when using statsmodels.
2128
- # # to calculate eta squared, use the sum of squares from the table
2129
- # res_tab = anovatable(res_tab)
2130
-
2131
- # =============================================================================
2132
- # # alternativ for ANOVA
2133
- # =============================================================================
2134
- res_tab = pg.anova(dv=dv, between=factor, data=data,
2135
- detailed=detailed, ss_type=ss_type, effsize=effsize)
2136
- res_tab = anovatable(res_tab)
2137
- return res_tab
2138
-
2139
- def run_rmanova(data, dv, factor, subject, correction='auto', detailed=True, effsize='ng2'):
2140
- # One-way repeated-measures ANOVA using a long-format dataset.
2141
- res_tab = pg.rm_anova(data=data, dv=dv, within=factor,
2142
- subject=subject, detailed=detailed, effsize=effsize)
2143
- return res_tab
2144
-
2145
- def run_welchanova(data, dv, factor):
2146
- # When the groups are balanced and have equal variances, the optimal
2147
- # post-hoc test is the Tukey-HSD test (pingouin.pairwise_tukey()). If the
2148
- # groups have unequal variances, the Games-Howell test is more adequate
2149
- # (pingouin.pairwise_gameshowell()). Results have been tested against R.
2150
- res_tab = pg.welch_anova(data=data, dv=dv, between=factor)
2151
- res_tab = anovatable(res_tab)
2152
- return res_tab
2153
-
2154
- def run_mixedanova(data, dv, between, within, subject, correction='auto', effsize='np2'):
2155
- # Notes
2156
- # Data are expected to be in long-format (even the repeated measures).
2157
- # If your data is in wide-format, you can use the pandas.melt() function
2158
- # to convert from wide to long format.
2159
-
2160
- # Warning
2161
- # If the between-subject groups are unbalanced(=unequal sample sizes), a
2162
- # type II ANOVA will be computed. Note however that SPSS, JAMOVI and JASP
2163
- # by default return a type III ANOVA, which may lead to slightly different
2164
- # results.
2165
- res_tab = pg.mixed_anova(data=data, dv=dv, within=within, subject=subject,
2166
- between=between, correction=correction, effsize=effsize)
2167
- res_tab = anovatable(res_tab)
2168
- return res_tab
2169
-
2170
- def run_friedman(data, dv, factor, subject, method='chisq'):
2171
- # Friedman test for repeated measurements
2172
- # The Friedman test is used for non-parametric (rank-based) one-way
2173
- # repeated measures ANOVA
2174
-
2175
- # check df form ('long' or 'wide')
2176
- # df_long = data.melt(ignore_index=False).reset_index()
2177
- # if data.describe().shape[1] >= df_long.describe().shape[1]:
2178
- # res_tab = pg.friedman(data, method=method)
2179
- # else:
2180
- # res_tab = pg.friedman(data=df_long, dv='value',
2181
- # within="variable", subject="index", method=method)
2182
- if "Wide" in df_wide_long(data):
2183
- df_long = data.melt(ignore_index=False).reset_index()
2184
- res_tab = pg.friedman(data=df_long, dv='value',
2185
- within="variable", subject="index", method=method)
2186
- else:
2187
- res_tab = pg.friedman(data, dv=dv, within=factor, subject=subject,method=method)
2188
- res_tab = anovatable(res_tab)
2189
- return res_tab
2190
-
2191
- def run_kruskal(data, dv, factor):
2192
- # Kruskal-Wallis H-test for independent samples
2193
- res_tab = pg.kruskal(data=data, dv=dv, between=factor)
2194
- res_tab = anovatable(res_tab)
2195
- return res_tab
2196
-
2197
- # Normality Check:
2198
- # Conduct normality tests (Shapiro-Wilk) for each group.
2199
- # If the data is approximately normally distributed, ANOVA is robust to
2200
- # moderate departures from normality, especially with larger sample sizes.
2201
-
2202
- # print(data[factor])
2203
- # print(type(data[factor]))
2204
- # print(len(data[factor].columns))
2205
- # print(data[factor].nunique())
2206
- # print(data[factor[0]])
2207
- # print(data[factor[0]].unique())
2208
- if group is None:
2209
- group = factor
2210
-
2211
- # print(f'\ngroup is :\n{data[group]},\ndv is :\n{dv}\n')
2212
- norm_array = []
2213
- for sub_group in data[group].unique():
2214
- norm_curr = check_normality(
2215
- data.loc[data[group] == sub_group, dv])
2216
- norm_array.append(norm_curr)
2217
- norm_all = True if all(norm_array) else False
2218
-
2219
- # Homogeneity of Variances:
2220
- # Check for homogeneity of variances (homoscedasticity) among groups.
2221
- # Levene's test or Bartlett's test can be used for this purpose.
2222
- # If variances are significantly different, consider transformations or use a
2223
- # robust ANOVA method.
2224
-
2225
- # # =============================================================================
2226
- # # # method1: stats.levene
2227
- # # =============================================================================
2228
- # # data_array = []
2229
- # # for sub_group in df["group"].unique():
2230
- # # data_array.append(df.loc[df['group'] == sub_group, 'values'].values)
2231
- # # print(data_array)
2232
- # # variance_all = stats.levene(data_array[0],data_array[1],data_array[2])
2233
-
2234
- # =============================================================================
2235
- # # method2: pingouin.homoscedasticity
2236
- # =============================================================================
2237
- res_levene = None
2238
- variance_all = pg.homoscedasticity(
2239
- data, dv=dv, group=group, method='levene', alpha=0.05)
2240
- res_levene = True if variance_all.iloc[0,1] > 0.05 else False
2241
- # =============================================================================
2242
- # # ANOVA Assumptions:
2243
- # # Ensure that the assumptions of independence, homogeneity of variances, and
2244
- # # normality are reasonably met before proceeding.
2245
- # =============================================================================
2246
- notes_norm = 'normally' if norm_all else 'NOT-normally'
2247
- notes_variance = 'equal' if res_levene else 'unequal'
2248
- print(f'Data is {notes_norm} distributed, shows {notes_variance} variance')
2249
-
2250
- cfg_pmc = corr_pmc(pmc)
2251
- cfg_pair = corr_pair(pair)
2252
- output = {}
2253
- if (cfg_pmc == 'parametric') or (cfg_pmc == 'auto'):
2254
- if 'np' in cfg_pair: # 'unpaired'
2255
- if cfg_pmc == 'auto':
2256
- if norm_all:
2257
- if res_levene:
2258
- res_tab = run_anova(data, dv, factor, ss_type=ss_type,
2259
- detailed=True, effsize='np2')
2260
- notes_stat = f'{data[factor].nunique()} Way ANOVA'
2261
- notes_APA = extract_apa(res_tab)
2262
-
2263
- else:
2264
- res_tab = run_welchanova(data, dv, factor)
2265
- notes_stat = f'{data[factor].nunique()} Way Welch ANOVA'
2266
- notes_APA = extract_apa(res_tab)
2267
-
2268
- else:
2269
-
2270
- res_tab = run_kruskal(data, dv, factor)
2271
- notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
2272
- notes_APA = extract_apa(res_tab)
2273
-
2274
- elif cfg_pmc == 'parametric':
2275
- res_tab = run_anova(data, dv, factor, ss_type=ss_type,
2276
- detailed=True, effsize='np2')
2277
- notes_stat = f'{data[factor].nunique()} Way ANOVA'
2278
- notes_APA = extract_apa(res_tab)
2279
-
2280
- elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
2281
- res_tab = run_rmanova(data, dv, factor, subject, correction='auto',
2282
- detailed=True, effsize='ng2')
2283
- notes_stat = f'{data[factor].nunique()} Way Repeated measures ANOVA'
2284
- notes_APA = extract_apa(res_tab)
2285
-
2286
- elif 'mix' in cfg_pair or 'both' in cfg_pair:
2287
- res_tab = run_mixedanova(data, dv, between, within, subject)
2288
- # notes_stat = f'{len(sum(len(between)+sum(len(within))))} Way Mixed ANOVA'
2289
- notes_stat = ""
2290
- # n_inter = res_tab.loc(res_tab["Source"] == "Interaction")
2291
- # print(n_inter)
2292
- notes_APA = extract_apa(res_tab)
2293
-
2294
- elif cfg_pmc == 'non-parametric':
2295
- if 'np' in cfg_pair: # 'unpaired'
2296
- res_tab = run_kruskal(data, dv, factor)
2297
- notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
2298
- notes_APA = f'H({res_tab.ddof1[0]},n={data.shape[0]})={round(res_tab.H[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
2299
-
2300
- elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
2301
- res_tab = run_friedman(data, dv, factor, subject, method='chisq')
2302
- notes_stat = f'Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA'
2303
- notes_APA = f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
2304
-
2305
- # =============================================================================
2306
- # # Post-hoc
2307
- # Post-Hoc Tests (if significant):
2308
- # If ANOVA indicates significant differences, perform post-hoc tests (e.g.,
2309
- # Tukey's HSD, Bonferroni, or Scheffé) to identify which groups differ from each other.
2310
- # # https://pingouin-stats.org/build/html/generated/pingouin.pairwise_tests.html
2311
- # =============================================================================
2312
- go_pmc = True if cfg_pmc == 'parametric' else False
2313
- go_subject = subject if ('pa' in cfg_pair) and (
2314
- 'np' not in cfg_pair) else None
2315
- go_mix_between = between if ('mix' in cfg_pair) or (
2316
- 'both' in cfg_pair) else None
2317
- go_mix_between = None if ('pa' in cfg_pair) or (
2318
- 'np' not in cfg_pair) else factor
2319
- go_mix_within = within if ('mix' in cfg_pair) or (
2320
- 'both' in cfg_pair) else None
2321
- go_mix_within = factor if ('pa' in cfg_pair) or (
2322
- 'np' not in cfg_pair) else None
2323
-
2324
- if res_tab['p-unc'][0] <= .05:
2325
- # Pairwise Comparisons
2326
- method_post_hoc = [
2327
- "bonf", # 'bonferroni', # : one-step correction
2328
- "sidak", # one-step correction
2329
- "holm", # step-down method using Bonferroni adjustments
2330
- "fdr_bh", # Benjamini/Hochberg (non-negative)
2331
- "fdr_by", # Benjamini/Yekutieli (negative)
2332
- ]
2333
- res_posthoc = pd.DataFrame()
2334
- for met in method_post_hoc:
2335
- post_curr = pg.pairwise_tests(data=data, dv=dv, between=go_mix_between, within=go_mix_within, subject=go_subject, parametric=go_pmc, marginal=True, alpha=0.05, alternative='two-sided',
2336
- padjust=met)
2337
-
2338
- res_posthoc = pd.concat([res_posthoc, post_curr],
2339
- ignore_index=True)
2340
- else:
2341
- res_posthoc = None
2342
- output['res_posthoc'] = res_posthoc
2343
- # =============================================================================
2344
- # # filling output
2345
- # =============================================================================
2346
-
2347
- pd.set_option('display.max_columns', None)
2348
- output['stat'] = notes_stat
2349
- # print(output['APA'])
2350
- output['APA'] = notes_APA
2351
- output['pval'] = res_tab['p-unc']
2352
- output['res_tab'] = res_tab
2353
- if res_tab.shape[0] == len(notes_APA):
2354
- output['res_tab']['APA'] = output['APA'] # note APA in the table
2355
- # print(output['stat'])
2356
- # print(output['res_tab'])
2357
-
2358
- return output
2359
-
2360
-
2361
- # =============================================================================
2362
- # # One-way ANOVA
2363
- # =============================================================================
2364
- # url = "http://stats191.stanford.edu/data/rehab.csv"
2365
- # rehab_table = pd.read_table(url, delimiter=",")
2366
- # rehab_table.to_csv("rehab.table")
2367
- # fig, ax = plt.subplots(figsize=(8, 6))
2368
- # fig = rehab_table.boxplot("Time", "Fitness", ax=ax, grid=False)
2369
- # # fig, ax = plt.subplots(figsize=(8, 6))
2370
- # # set_pub()
2371
- # # sns.boxenplot(x="Time",y="Fitness",data = rehab_table)
2372
-
2373
- # out2 = FuncMultiCmpt(pmc='pmc', pair='unpair',
2374
- # data=rehab_table, dv='Time', factor='Fitness')
2375
- # # print(out2['res_tab'])
2376
- # # print(out2['APA'])
2377
- # out2['res_posthoc']
2378
- # out2['res_posthoc']['p-unc'][0]
2379
- # out2['res_posthoc']['p-adjust'][0]
2380
- # out2['res_posthoc']['p-corr'][0]
2381
-
2382
-
2383
- # =============================================================================
2384
- # # Interactions and ANOVA
2385
- # https://www.statsmodels.org/dev/examples/notebooks/generated/interactions_anova.html
2386
- # url = "http://stats191.stanford.edu/data/salary.table"
2387
- # fh = urlopen(url)
2388
- # df = pd.read_table(fh)
2389
- # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
2390
- # dv='S', factor=['X', 'E', 'M'], group='M')
2391
- # # # two-way anova
2392
- # # https://www.statology.org/two-way-anova-python/
2393
- # # =============================================================================
2394
- # # df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
2395
- # # 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
2396
- # # 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
2397
- # # 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
2398
- # # 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
2399
- # # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
2400
- # # dv='height', factor=['water','sun'],group='water')
2401
-
2402
-
2403
- # =============================================================================
2404
- # # two way anova
2405
- # https://www.geeksforgeeks.org/how-to-perform-a-two-way-anova-in-python/
2406
- # =============================================================================
2407
- # df1=pd.DataFrame({'Fertilizer': np.repeat(['daily', 'weekly'], 15),
2408
- # 'Watering': np.repeat(['daily', 'weekly'], 15),
2409
- # 'height': [14, 16, 15, 15, 16, 13, 12, 11,
2410
- # 14, 15, 16, 16, 17, 18, 14, 13,
2411
- # 14, 14, 14, 15, 16, 16, 17, 18,
2412
- # 14, 13, 14, 14, 14, 15]})
2413
-
2414
- # df1['subject'] = np.tile(range(0, 15), (1, 2)).T
2415
- # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df1,
2416
- # dv='height', factor=['Fertilizer','Watering'],group='Watering')
2417
- # # print(out1['stat'])
2418
- # # print(out1['res_tab'])
2419
-
2420
- # =============================================================================
2421
- # # welch anova
2422
- # https://www.geeksforgeeks.org/how-to-perform-welchs-anova-in-python/
2423
- # =============================================================================
2424
- # df = pd.DataFrame({'score': [64, 66, 68, 75, 78, 94, 98, 79, 71, 80,
2425
- # 91, 92, 93, 90, 97, 94, 82, 88, 95, 96,
2426
- # 79, 78, 88, 94, 92, 85, 83, 85, 82, 81],
2427
- # 'group': np.repeat(['strat1', 'strat2', 'strat3'],repeats=10)})
2428
- # out1 = FuncMultiCmpt(pmc='auto',pair='unpaired',data=df, dv='score', factor='group', group='group')
2429
- # =============================================================================
2430
- # # two way anova
2431
- # https://www.statology.org/two-way-anova-python/
2432
- # =============================================================================
2433
- # df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
2434
- # 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
2435
- # 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
2436
- # 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
2437
- # 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
2438
- # df['subject'] = np.tile(range(0, 15), (1, 2)).T
2439
- # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
2440
- # dv='height', factor=['water', 'sun'], subject='subject', group='water')
2441
- # # print(out1['stat'])
2442
- # # print(out1['res_tab'])
2443
-
2444
- # =============================================================================
2445
- # # 3-way ANOVA
2446
- # =============================================================================
2447
- # df = pd.DataFrame({'program': np.repeat([1, 2], 20),
2448
- # 'gender': np.tile(np.repeat(['M', 'F'], 10), 2),
2449
- # 'division': np.tile(np.repeat([1, 2], 5), 4),
2450
- # 'height': [7, 7, 8, 8, 7, 6, 6, 5, 6, 5,
2451
- # 5, 5, 4, 5, 4, 3, 3, 4, 3, 3,
2452
- # 6, 6, 5, 4, 5, 4, 5, 4, 4, 3,
2453
- # 2, 2, 1, 4, 4, 2, 1, 1, 2, 1]})
2454
- # df['subject'] = np.tile(range(0, 20), (1, 2)).T
2455
- # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
2456
- # dv='height', factor=['gender', 'program', 'division'], subject='subject', group='program')
2457
- # # print(out1['stat'])
2458
- # # print(out1['res_tab'])
2459
-
2460
- # =============================================================================
2461
- # # Repeated Measures ANOVA in Python
2462
- # =============================================================================
2463
- # df = pd.DataFrame({'patient': np.repeat([1, 2, 3, 4, 5], 4),
2464
- # 'drug': np.tile([1, 2, 3, 4], 5),
2465
- # 'response': [30, 28, 16, 34,
2466
- # 14, 18, 10, 22,
2467
- # 24, 20, 18, 30,
2468
- # 38, 34, 20, 44,
2469
- # 26, 28, 14, 30]})
2470
- # # df['subject'] = np.tile(range(0, 20), (1, 2)).T
2471
- # out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
2472
- # dv='response', factor=['drug'], subject='patient', group='drug')
2473
- # print(out1['stat'])
2474
- # print(out1['res_tab'])
2475
- # print(out1['APA'])
2476
-
2477
- # =============================================================================
2478
- # # repeated anova
2479
- # https://www.geeksforgeeks.org/how-to-perform-a-repeated-measures-anova-in-python/
2480
- # =============================================================================
2481
- # df = pd.DataFrame({'Cars': np.repeat([1, 2, 3, 4, 5], 4),
2482
- # 'Engine Oil': np.tile([1, 2, 3, 4], 5),
2483
- # 'Mileage': [36, 38, 30, 29,
2484
- # 34, 38, 30, 29,
2485
- # 34, 28, 38, 32,
2486
- # 38, 34, 20, 44,
2487
- # 26, 28, 34, 50]})
2488
- # out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
2489
- # dv='Mileage', factor=['Engine Oil'], subject='Cars', group='Cars')
2490
- # =============================================================================
2491
- # #two-way repeated anova
2492
- # =============================================================================
2493
- # df = pd.read_csv(
2494
- # "https://reneshbedre.github.io/assets/posts/anova/plants_leaves_two_within.csv")
2495
- # df
2496
- # # df['subject'] = np.tile(range(0, 20), (1, 2)).T
2497
- # out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
2498
- # dv='num_leaves', factor=['year', 'time'], subject='plants', group='year')
2499
- # print(out1['stat'])
2500
- # print(out1['res_tab'])
2501
- # print(out1['APA'])
2502
-
2503
- # =============================================================================
2504
- # # repeated anova
2505
- # =============================================================================
2506
- # df = pd.read_csv('/Users/macjianfeng/Desktop/test.csv')
2507
- # df.head()
2508
- # df.loc[df['animal'].str.contains('Sleep'), 'experiment'] = 'sleep'
2509
- # df.loc[df['animal'].str.contains('Wake'), 'experiment'] = 'wake'
2510
- # df.loc[df['variable'].str.contains('hypo'), 'region'] = 'hypo'
2511
- # df.loc[df['variable'].str.contains('cort'), 'region'] = 'cort'
2512
- # df
2513
- # for i in range(4):
2514
- # match i:
2515
- # case 0:
2516
- # prot_name = 'A1'
2517
- # case 1:
2518
- # prot_name = 'A2'
2519
- # case 2:
2520
- # prot_name = '845'
2521
- # case 3:
2522
- # prot_name = '831'
2523
- # df_tmp = df[df["variable"].str.contains(prot_name)]
2524
- # df_tmp['protein'] = prot_name
2525
- # df_tmp = df_tmp.reset_index()
2526
- # print(df_tmp)
2527
-
2528
- # out1 = FuncMultiCmpt(pmc='pmc', pair='mix', data=df_tmp,
2529
- # dv='value', between='experiment', within='region', subject='animal', group='experiment')
2530
- # print(out1['stat'])
2531
- # print(out1['res_tab'])
2532
- # # =============================================================================
2533
- # One-way ANOVA
2534
- # df1 = pd.read_csv('/Users/macjianfeng/Desktop/Book2.csv')
2535
- # df2 = df1.melt()
2536
- # out1 = FuncMultiCmpt(pmc='npmc', pair='unpaired', data=df2,
2537
- # dv='libido', factor=['brand x', 'brand y', 'brand z'], subject='participant')
2538
- # print(out1['stat'])
2539
- # print(out1['res_tab'])
2540
- # =============================================================================
2541
-
2542
-
2543
- # =============================================================================
2544
- # # #One-way ANOVA new example: https://www.pythonfordatascience.org/anova-python/
2545
- # =============================================================================
2546
- # df1 = pd.read_csv(
2547
- # "https://raw.githubusercontent.com/researchpy/Data-sets/master/difficile.csv")
2548
- # df1.drop('person', axis=1, inplace=True)
2549
- # # Recoding value from numeric to string
2550
- # df1['dose'].replace({1: 'placebo', 2: 'low', 3: 'high'}, inplace=True)
2551
- # df1.head(10)
2552
-
2553
- # out3= FuncMultiCmpt(pmc='pmc', data=df1, dv='libido', factor='dose')
2554
- # # print(out3['res_tab'])
2555
- # # # print(out3['res_posthoc'])
2556
- # # print(out3['APA'])
2557
-
2558
- # =============================================================================
2559
- # https://lifewithdata.com/2023/06/08/how-to-perform-a-two-way-anova-in-python/
2560
- # =============================================================================
2561
- # data = {
2562
- # 'Diet': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C'],
2563
- # 'Workout': ['Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High'],
2564
- # 'WeightLoss': [3, 4, 5, 3.2, 5, 6, 5.2, 6, 5.5, 4, 5.5, 6.2]
2565
- # }
2566
- # df = pd.DataFrame(data)
2567
- # out4= FuncMultiCmpt(pmc='pmc', pair='unpaired',data=df, dv='WeightLoss', factor=['Diet','Workout'],group='Diet')
2568
-
2569
- # =============================================================================
2570
- # # convert to list to string
2571
- # =============================================================================
2572
1569
  def list2str(x_str):
2573
1570
  s = ''.join(str(x) for x in x_str)
2574
1571
  return s
@@ -2580,18 +1577,14 @@ def str2list(str_):
2580
1577
  def load_img(fpath):
2581
1578
  """
2582
1579
  Load an image from the specified file path.
2583
-
2584
1580
  Args:
2585
1581
  fpath (str): The file path to the image.
2586
-
2587
1582
  Returns:
2588
1583
  PIL.Image: The loaded image.
2589
-
2590
1584
  Raises:
2591
1585
  FileNotFoundError: If the specified file is not found.
2592
1586
  OSError: If the specified file cannot be opened or is not a valid image file.
2593
1587
  """
2594
-
2595
1588
  try:
2596
1589
  img = Image.open(fpath)
2597
1590
  return img
@@ -2604,12 +1597,10 @@ def apply_filter(img, *args):
2604
1597
  # def apply_filter(img, filter_name, filter_value=None):
2605
1598
  """
2606
1599
  Apply the specified filter to the image.
2607
-
2608
1600
  Args:
2609
1601
  img (PIL.Image): The input image.
2610
1602
  filter_name (str): The name of the filter to apply.
2611
1603
  **kwargs: Additional parameters specific to the filter.
2612
-
2613
1604
  Returns:
2614
1605
  PIL.Image: The filtered image.
2615
1606
  """
@@ -2813,10 +1804,8 @@ def imgsets(
2813
1804
  def auto_enhance(img):
2814
1805
  """
2815
1806
  Automatically enhances the image based on its characteristics.
2816
-
2817
1807
  Args:
2818
1808
  img (PIL.Image): The input image.
2819
-
2820
1809
  Returns:
2821
1810
  dict: A dictionary containing the optimal enhancement values.
2822
1811
  """
@@ -2832,47 +1821,33 @@ def imgsets(
2832
1821
  bit_depth = 16
2833
1822
  else:
2834
1823
  raise ValueError("Unsupported image mode")
2835
-
2836
1824
  # Calculate the brightness and contrast for each channel
2837
1825
  num_channels = len(img.getbands())
2838
1826
  brightness_factors = []
2839
1827
  contrast_factors = []
2840
1828
  for channel in range(num_channels):
2841
1829
  channel_histogram = img.split()[channel].histogram()
2842
- brightness = sum(i * w for i, w in enumerate(channel_histogram)) / sum(
2843
- channel_histogram
2844
- )
1830
+ brightness = sum(i * w for i, w in enumerate(channel_histogram))/sum(channel_histogram)
2845
1831
  channel_min, channel_max = img.split()[channel].getextrema()
2846
1832
  contrast = channel_max - channel_min
2847
-
2848
1833
  # Adjust calculations based on bit depth
2849
1834
  normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
2850
- brightness_factor = (
2851
- 1.0 + (brightness - normalization_factor / 2) / normalization_factor
2852
- )
2853
- contrast_factor = (
2854
- 1.0 + (contrast - normalization_factor / 2) / normalization_factor
2855
- )
2856
-
1835
+ brightness_factor = (1.0 + (brightness - normalization_factor / 2) / normalization_factor)
1836
+ contrast_factor = (1.0 + (contrast - normalization_factor / 2) / normalization_factor)
2857
1837
  brightness_factors.append(brightness_factor)
2858
1838
  contrast_factors.append(contrast_factor)
2859
-
2860
1839
  # Calculate the average brightness and contrast factors across channels
2861
1840
  avg_brightness_factor = sum(brightness_factors) / num_channels
2862
1841
  avg_contrast_factor = sum(contrast_factors) / num_channels
2863
-
2864
1842
  return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
2865
-
2866
1843
  # Load image if input is a file path
2867
1844
  if isinstance(img, str):
2868
1845
  img = load_img(img)
2869
-
2870
1846
  img_update = img.copy()
2871
1847
  # Auto-enhance image if requested
2872
1848
  if auto:
2873
1849
  auto_params = auto_enhance(img_update)
2874
1850
  sets.update(auto_params)
2875
-
2876
1851
  if sets is None:
2877
1852
  sets = {}
2878
1853
  for k, value in sets.items():
@@ -2947,12 +1922,9 @@ def imgsets(
2947
1922
  if len(value)==3:
2948
1923
  value+=(255,)
2949
1924
  img_update = remove(img_update, bgcolor=value)
2950
-
2951
1925
  if filter_kws:
2952
1926
  for filter_name, filter_value in filter_kws.items():
2953
1927
  img_update = apply_filter(img_update, filter_name, filter_value)
2954
-
2955
-
2956
1928
  # Display the image if requested
2957
1929
  if show:
2958
1930
  if figsize is None:
@@ -2961,7 +1933,6 @@ def imgsets(
2961
1933
  plt.figure(figsize=figsize, dpi=dpi)
2962
1934
  plt.imshow(img_update)
2963
1935
  plt.axis("on") if show_axis else plt.axis("off")
2964
-
2965
1936
  return img_update
2966
1937
  # # usage:
2967
1938
  # img = imgsets(
@@ -2982,7 +1953,6 @@ def figsets(*args):
2982
1953
  "scatter","ieee","no-latex","std-colors","high-vis","bright","dark_background","science",
2983
1954
  "high-vis","vibrant","muted","retro","grid","high-contrast","light","cjk-tc-font","cjk-kr-font",
2984
1955
  ]
2985
-
2986
1956
  def sets_priority(ax,key, value):
2987
1957
  if ("fo" in key) and (("size" in key) or ("sz" in key)):
2988
1958
  fontsize=value