py2ls 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. py2ls/.git/COMMIT_EDITMSG +1 -0
  2. py2ls/.git/FETCH_HEAD +1 -0
  3. py2ls/.git/HEAD +1 -0
  4. py2ls/.git/config +15 -0
  5. py2ls/.git/description +1 -0
  6. py2ls/.git/hooks/applypatch-msg.sample +15 -0
  7. py2ls/.git/hooks/commit-msg.sample +24 -0
  8. py2ls/.git/hooks/fsmonitor-watchman.sample +174 -0
  9. py2ls/.git/hooks/post-update.sample +8 -0
  10. py2ls/.git/hooks/pre-applypatch.sample +14 -0
  11. py2ls/.git/hooks/pre-commit.sample +49 -0
  12. py2ls/.git/hooks/pre-merge-commit.sample +13 -0
  13. py2ls/.git/hooks/pre-push.sample +53 -0
  14. py2ls/.git/hooks/pre-rebase.sample +169 -0
  15. py2ls/.git/hooks/pre-receive.sample +24 -0
  16. py2ls/.git/hooks/prepare-commit-msg.sample +42 -0
  17. py2ls/.git/hooks/push-to-checkout.sample +78 -0
  18. py2ls/.git/hooks/update.sample +128 -0
  19. py2ls/.git/index +0 -0
  20. py2ls/.git/info/exclude +6 -0
  21. py2ls/.git/logs/HEAD +1 -0
  22. py2ls/.git/logs/refs/heads/main +1 -0
  23. py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
  24. py2ls/.git/logs/refs/remotes/origin/main +1 -0
  25. py2ls/.git/objects/25/b796accd261b9135fd32a2c00785f68edf6c46 +0 -0
  26. py2ls/.git/objects/36/b4a1b7403abc6c360f8fe2cb656ab945254971 +0 -0
  27. py2ls/.git/objects/3f/d6561300938afbb3d11976cf9c8f29549280d9 +0 -0
  28. py2ls/.git/objects/58/20a729045d4dc7e37ccaf8aa8eec126850afe2 +0 -0
  29. py2ls/.git/objects/60/f273eb1c412d916fa3f11318a7da7a9911b52a +0 -0
  30. py2ls/.git/objects/61/570cec8c061abe74121f27f5face6c69b98f99 +0 -0
  31. py2ls/.git/objects/69/13c452ca319f7cbf6a0836dc10a5bb033c84e4 +0 -0
  32. py2ls/.git/objects/78/3d4167bc95c9d2175e0df03ef1c1c880ba75ab +0 -0
  33. py2ls/.git/objects/79/7ae089b2212a937840e215276005ce76881307 +0 -0
  34. py2ls/.git/objects/7e/5956c806b5edc344d46dab599dec337891ba1f +1 -0
  35. py2ls/.git/objects/8e/55a7d2b96184030211f20c9b9af201eefcac82 +0 -0
  36. py2ls/.git/objects/91/c69ad88fe0ba94aa7859fb5f7edac5e6f1a3f7 +0 -0
  37. py2ls/.git/objects/b0/56be4be89ba6b76949dd641df45bb7036050c8 +0 -0
  38. py2ls/.git/objects/b0/9cd7856d58590578ee1a4f3ad45d1310a97f87 +0 -0
  39. py2ls/.git/objects/d9/005f2cc7fc4e65f14ed5518276007c08cf2fd0 +0 -0
  40. py2ls/.git/objects/df/e0770424b2a19faf507a501ebfc23be8f54e7b +0 -0
  41. py2ls/.git/objects/e9/391ffe371f1cc43b42ef09b705d9c767c2e14f +0 -0
  42. py2ls/.git/objects/fc/292e793ecfd42240ac43be407023bd731fa9e7 +0 -0
  43. py2ls/.git/refs/heads/main +1 -0
  44. py2ls/.git/refs/remotes/origin/HEAD +1 -0
  45. py2ls/.git/refs/remotes/origin/main +1 -0
  46. py2ls/.gitattributes +2 -0
  47. py2ls/.gitignore +152 -0
  48. py2ls/LICENSE +201 -0
  49. py2ls/README.md +409 -0
  50. py2ls/__init__.py +17 -0
  51. py2ls/brain_atlas.py +145 -0
  52. py2ls/correlators.py +475 -0
  53. py2ls/dbhandler.py +97 -0
  54. py2ls/freqanalysis.py +800 -0
  55. py2ls/internet_finder.py +405 -0
  56. py2ls/ips.py +2844 -0
  57. py2ls/netfinder.py +780 -0
  58. py2ls/sleep_events_detectors.py +1350 -0
  59. py2ls/translator.py +686 -0
  60. py2ls/version.py +1 -0
  61. py2ls/wb_detector.py +169 -0
  62. py2ls-0.1.0.dist-info/METADATA +12 -0
  63. py2ls-0.1.0.dist-info/RECORD +64 -0
  64. py2ls-0.1.0.dist-info/WHEEL +4 -0
py2ls/ips.py ADDED
@@ -0,0 +1,2844 @@
1
+ from scipy.ndimage import convolve1d
2
+ import numpy as np
3
+ import pandas as pd
4
+ import json
5
+ import matplotlib.pyplot as plt
6
+ # from functools import partial
7
+ import seaborn as sns
8
+ import scienceplots
9
+ import matplotlib
10
+ import sys
11
+ import os
12
+ from scipy.signal import savgol_filter
13
+ import pingouin as pg
14
+ from scipy import stats
15
+ import matplotlib.ticker as tck
16
+ from cycler import cycler
17
+ import re
18
+ from PIL import ImageEnhance, ImageOps,ImageFilter
19
+ from rembg import remove,new_session
20
+ from mpl_toolkits.mplot3d import Axes3D
21
+ import docx
22
+ import pandas as pd
23
+ from fpdf import FPDF
24
+ import yaml
25
+ from lxml import etree
26
+ from docx import Document
27
+ from PyPDF2 import PdfReader
28
+ from pdf2image import convert_from_path, pdfinfo_from_path
29
+ from nltk.tokenize import sent_tokenize,word_tokenize
30
+ import nltk # nltk.download("punkt")
31
+ from docx2pdf import convert
32
+ import img2pdf as image2pdf
33
+ import pprint
34
+ from itertools import pairwise
35
+ import time
36
+ from box import Box, BoxList
37
+ from numerizer import numerize
38
+ from tqdm import tqdm
39
+
40
+ def str2num(s, *args):
41
+ delimiter = None
42
+ round_digits = None
43
+
44
+ for arg in args:
45
+ if isinstance(arg, str):
46
+ delimiter = arg
47
+ elif isinstance(arg, int):
48
+ round_digits = arg
49
+
50
+ try:
51
+ num = int(s)
52
+ except ValueError:
53
+ try:
54
+ num = float(s)
55
+ except ValueError:
56
+ try:
57
+ numerized = numerize(s)
58
+ num = int(numerized) if '.' not in numerized else float(numerized)
59
+ except Exception as e:
60
+ # Attempt to handle multiple number segments
61
+ try:
62
+ number_segments = ssplit(s,by='number_strings')
63
+ nums = []
64
+ for segment in number_segments:
65
+ try:
66
+ nums.append(str2num(segment))
67
+ except ValueError:
68
+ continue
69
+ if len(nums) == 1:
70
+ num = nums[0]
71
+ else:
72
+ raise ValueError("Multiple number segments found, cannot determine single numeric value")
73
+ except Exception as e:
74
+ raise ValueError(f"Cannot convert {s} to a number: {e}")
75
+
76
+ # Apply rounding if specified
77
+ if round_digits is not None:
78
+ num_adj = num + 0.00000000001 # Ensure precise rounding
79
+ num = round(num_adj, round_digits)
80
+
81
+ # Apply delimiter formatting if specified
82
+ if delimiter is not None:
83
+ num_str = f"{num:,}".replace(",", delimiter)
84
+ return num_str
85
+
86
+ return num
87
+ # Examples
88
+ # print(str2num("123")) # Output: 123
89
+ # print(str2num("123.456", 2)) # Output: 123.46
90
+ # print(str2num("one hundred and twenty three")) # Output: 123
91
+ # print(str2num("seven million")) # Output: 7000000
92
+ # print(str2num('one thousand thirty one',',')) # Output: 1,031
93
+ # print(str2num("12345.6789", ",")) # Output: 12,345.6789
94
+ # print(str2num("12345.6789", " ", 2)) # Output: 12 345.68
95
+ # print(str2num('111113.34555',3,',')) # Output: 111,113.346
96
+ # print(str2num("123.55555 sec miniuets",3)) # Output: 1.3
97
+ def num2str(num, *args):
98
+ delimiter = None
99
+ round_digits = None
100
+
101
+ # Parse additional arguments
102
+ for arg in args:
103
+ if isinstance(arg, str):
104
+ delimiter = arg
105
+ elif isinstance(arg, int):
106
+ round_digits = arg
107
+
108
+ # Apply rounding if specified
109
+ if round_digits is not None:
110
+ num = round(num, round_digits)
111
+
112
+ # Convert number to string
113
+ num_str = f"{num}"
114
+
115
+ # Apply delimiter if specified
116
+ if delimiter is not None:
117
+ num_str = num_str.replace(".", ",") # Replace decimal point with comma
118
+ num_str_parts = num_str.split(",")
119
+ if len(num_str_parts) > 1:
120
+ integer_part = num_str_parts[0]
121
+ decimal_part = num_str_parts[1]
122
+ integer_part = "{:,}".format(int(integer_part))
123
+ num_str = integer_part + "." + decimal_part
124
+ else:
125
+ num_str = "{:,}".format(int(num_str_parts[0]))
126
+
127
+ return num_str
128
+ # Examples
129
+ # print(num2str(123),type(num2str(123))) # Output: "123"
130
+ # print(num2str(123.456, 2),type(num2str(123.456, 2))) # Output: "123.46"
131
+ # print(num2str(7000.125, 2),type(num2str(7000.125, 2))) # Output: "7000.13"
132
+ # print(num2str(12345.6789, ","),type(num2str(12345.6789, ","))) # Output: "12,345.6789"
133
+ # print(num2str(7000.00, ","),type(num2str(7000.00, ","))) # Output: "7,000.00"
134
+ def sreplace(text, dict_replace=None, robust=True):
135
+ """
136
+ Replace specified substrings in the input text with provided replacements.
137
+ Args:
138
+ text (str): The input text where replacements will be made.
139
+ dict_replace (dict, optional): A dictionary containing substrings to be replaced as keys
140
+ and their corresponding replacements as values. Defaults to {".com": "..come", "\n": " ", "\t": " ", " ": " "}.
141
+ robust (bool, optional): If True, additional default replacements for newline and tab characters will be applied.
142
+ Default is False.
143
+ Returns:
144
+ str: The text after replacements have been made.
145
+ """
146
+ # Default replacements for newline and tab characters
147
+ default_replacements = {
148
+ "\a": "",
149
+ "\b": "",
150
+ "\f": "",
151
+ "\n": "",
152
+ "\r": "",
153
+ "\t": "",
154
+ "\v": "",
155
+ "\\": "", # Corrected here
156
+ # "\?": "",
157
+ "�": "",
158
+ "\\x": "", # Corrected here
159
+ "\\x hhhh": "",
160
+ "\\ ooo": "", # Corrected here
161
+ "\xa0": "",
162
+ " ": " ",
163
+ }
164
+
165
+ # If dict_replace is None, use the default dictionary
166
+ if dict_replace is None:
167
+ dict_replace = {}
168
+
169
+ # If robust is True, update the dictionary with default replacements
170
+ if robust:
171
+ dict_replace.update(default_replacements)
172
+
173
+ # Iterate over each key-value pair in the dictionary and replace substrings accordingly
174
+ for k, v in dict_replace.items():
175
+ text = text.replace(k, v)
176
+ return text
177
+ # usage:
178
+ # sreplace(text, dict_replace=dict(old_str='new_str'), robust=True)
179
+
180
+ def paper_size(paper_type_str='a4'):
181
+ df=pd.DataFrame({'a0':[841,1189],'a1':[594,841],'a2':[420,594],'a3':[297,420],'a4':[210,297],'a5':[148,210],'a6':[105,148],'a7':[74,105],
182
+ 'b0':[1028,1456],'b1':[707,1000],'b2':[514,728],'b3':[364,514],'b4':[257,364],'b5':[182,257],'b6':[128,182],
183
+ 'letter': [215.9, 279.4],'legal':[215.9, 355.6],'business card':[85.6, 53.98],
184
+ 'photo china passport':[33,48],'passport single':[125,88],'visa':[105,74],'sim':[25,15]})
185
+ for name in df.columns:
186
+ if paper_type_str in name.lower():
187
+ paper_type=name
188
+ if not paper_type:
189
+ paper_type='a4' # default
190
+ return df[paper_type].tolist()
191
+ def docx2pdf(dir_docx, dir_pdf=None):
192
+ if dir_pdf:
193
+ convert(dir_docx,dir_pdf)
194
+ else:
195
+ convert(dir_docx)
196
+
197
+ def img2pdf(dir_img, kind="jpeg",page=None, dir_save=None, page_size="a4", dpi=300):
198
+ def mm_to_point(size):
199
+ return (image2pdf.mm_to_pt(size[0]),image2pdf.mm_to_pt(size[1]))
200
+ def set_dpi(x):
201
+ dpix=dpiy=x
202
+ return image2pdf.get_fixed_dpi_layout_fun((dpix, dpiy))
203
+ if not kind.startswith("."):
204
+ kind="."+kind
205
+ if dir_save is None:
206
+ dir_save = dir_img.replace(kind,'.pdf')
207
+ imgs = []
208
+ if os.path.isdir(dir_img):
209
+ if not dir_save.endswith(".pdf"):
210
+ dir_save+="#merged_img2pdf.pdf"
211
+ if page is None:
212
+ select_range = listdir(dir_img,kind=kind).fpath
213
+ else:
214
+ if not isinstance(page, (np.ndarray,list,range)):
215
+ page=[page]
216
+ select_range = listdir(dir_img,kind=kind)['fpath'][page]
217
+ for fname in select_range:
218
+ if not fname.endswith(kind):
219
+ continue
220
+ path = os.path.join(dir_img, fname)
221
+ if os.path.isdir(path):
222
+ continue
223
+ imgs.append(path)
224
+ else:
225
+ imgs=[os.path.isdir(dir_img),dir_img]
226
+
227
+ if page_size:
228
+ if isinstance(page_size,str):
229
+ pdf_in_mm=mm_to_point(paper_size(page_size))
230
+ else:
231
+ print("default: page_size = (210,297)")
232
+ pdf_in_mm=mm_to_point(page_size)
233
+ print(f"page size was set to {page_size}")
234
+ p_size= image2pdf.get_layout_fun(pdf_in_mm)
235
+ else:
236
+ p_size = set_dpi(dpi)
237
+ with open(dir_save,"wb") as f:
238
+ f.write(image2pdf.convert(imgs, layout_fun=p_size))
239
+ # usage:
240
+ # dir_img="/Users/macjianfeng/Dropbox/00-Personal/2015-History/2012-2015_兰州大学/120901-大学课件/生物统计学 陆卫/复习题/"
241
+ # img2pdf(dir_img,kind='tif', page=range(3,7,2))
242
+ def ssplit(text, by="space", verbose=False, **kws):
243
+ if isinstance(text, list):
244
+ nested_list= [ssplit(i,by=by,verbose=verbose,**kws) for i in text]
245
+ flat_list = [item for sublist in nested_list for item in sublist]
246
+ return flat_list
247
+ def split_by_word_length(text, length):
248
+ return [word for word in text.split() if len(word) == length]
249
+
250
+ def split_by_multiple_delimiters(text, delimiters):
251
+ regex_pattern = "|".join(map(re.escape, delimiters))
252
+ return re.split(regex_pattern, text)
253
+
254
+ def split_by_camel_case(text):
255
+ return re.findall(r"[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))", text)
256
+
257
+ def split_at_upper_fl_lower(text):
258
+ return re.findall(r"[A-Z](?:[a-z]+|[A-Z]+(?=[A-Z]|$))", text)
259
+
260
+ def split_at_lower_fl_upper(text):
261
+ split_text = re.split(r"(?<=[a-z])(?=[A-Z])", text)
262
+ return split_text
263
+
264
+ def split_at_upper(text):
265
+ split_text = re.split(r"(?=[A-Z])", text)
266
+ split_text = [part for part in split_text if part]
267
+ return split_text
268
+
269
+ def split_by_regex_lookahead(text, pattern):
270
+ return re.split(f'(?<={pattern})', text)
271
+
272
+ def split_by_regex_end(text, pattern):
273
+ return re.split(f'(?={pattern})', text)
274
+
275
+ # def split_by_sentence_endings(text):
276
+ # return re.split(r"(?<=[.!?])", text)
277
+ def split_non_ascii(text):
278
+ # return re.split(r"([^\x00-\x7F\w\s,.!?:\"'()\-]+)", text)
279
+ # return re.split(r"[^\x00-\x7F]+", text)
280
+ return re.split(r"([^\x00-\x7F]+)", text)
281
+ def split_by_consecutive_non_alphanumeric(text):
282
+ return re.split(r"\W+", text)
283
+
284
+ def split_by_fixed_length_chunks(text, length):
285
+ return [text[i : i + length] for i in range(0, len(text), length)]
286
+ def split_by_sent_num(text,n=10):
287
+ # split text into sentences
288
+ text_split_by_sent=sent_tokenize(text)
289
+ cut_loc_array=np.arange(0,len(text_split_by_sent),n)
290
+ if cut_loc_array[-1]!=len(text_split_by_sent):
291
+ cut_loc=np.append(cut_loc_array,len(text_split_by_sent))
292
+ else:
293
+ cut_loc = cut_loc_array
294
+ # get text in section (e.g., every 10 sentences)
295
+ text_section=[]
296
+ for i,j in pairwise(cut_loc):
297
+ text_section.append(text_split_by_sent[i:j])
298
+ return text_section
299
+ def split_general(text, by, verbose=False, ignore_case=False):
300
+ if ignore_case:
301
+ if verbose:
302
+ print(f"used {by} to split, ignore_case=True")
303
+ pattern = re.compile(re.escape(by), re.IGNORECASE)
304
+ split_text = pattern.split(text)
305
+ return split_text
306
+ else:
307
+ if verbose:
308
+ print(f"used {by} to split, ignore_case=False")
309
+ return text.split(by)
310
+ def reg_split(text, pattern):
311
+ return re.split(pattern, text)
312
+ if "sp" in by or "white" in by:
313
+ if verbose:
314
+ print(f"splited by space")
315
+ return text.split()
316
+ elif "word" in by and "len" in by:
317
+ if verbose:
318
+ print(f"split_by_word_length(text, length)")
319
+ return split_by_word_length(text, **kws) # split_by_word_length(text, length)
320
+ elif "," in by:
321
+ if verbose:
322
+ print(f"splited by ','")
323
+ return text.split(",")
324
+ elif isinstance(by, list):
325
+ if verbose:
326
+ print(f"split_by_multiple_delimiters: ['|','&']")
327
+ return split_by_multiple_delimiters(text, by)
328
+ elif all([("digi" in by or "num" in by),not 'sent' in by, not 'str' in by]):
329
+ if verbose:
330
+ print(f"splited by digital (numbers)")
331
+ return re.split(r"(\d+)", text)
332
+ elif all([("digi" in by or "num" in by), 'str' in by]):
333
+ if verbose:
334
+ print(f"Splitting by (number strings)")
335
+ pattern = re.compile(r'\b((?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?)(?:[-\s]?(?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?))*)\b', re.IGNORECASE)
336
+ return re.split(pattern, text)
337
+ elif "pun" in by:
338
+ if verbose:
339
+ print(f"splited by 标点('.!?;')")
340
+ return re.split(r"[.!?;]", text)
341
+ elif "\n" in by or "li" in by:
342
+ if verbose:
343
+ print(f"splited by lines('\n')")
344
+ return text.splitlines()
345
+ elif "cam" in by:
346
+ if verbose:
347
+ print(f"splited by camel_case")
348
+ return split_by_camel_case(text)
349
+ elif "word" in by:
350
+ if verbose:
351
+ print(f"splited by word")
352
+ return word_tokenize(text)
353
+ elif "sen" in by and not 'num' in by:
354
+ if verbose:
355
+ print(f"splited by sentence")
356
+ return sent_tokenize(text)
357
+ elif 'sen' in by and 'num' in by:
358
+ return split_by_sent_num(text,**kws)
359
+ elif "cha" in by:
360
+ if verbose:
361
+ print(f"splited by chracters")
362
+ return list(text)
363
+ elif ("up" in by or "cap" in by) and "l" not in by:
364
+ if verbose:
365
+ print(f"splited by upper case")
366
+ return split_at_upper(text)
367
+ elif "u" in by and "l" in by:
368
+ if by.find("u") < by.find("l"):
369
+ if verbose:
370
+ print(f"splited by upper followed by lower case")
371
+ return split_at_upper_fl_lower(text)
372
+ else:
373
+ if verbose:
374
+ print(f"splited by lower followed by upper case")
375
+ return split_at_lower_fl_upper(text)
376
+ elif "start" in by or "head" in by:
377
+ if verbose:
378
+ print(f"splited by lookahead")
379
+ return split_by_regex_lookahead(text, **kws)
380
+ elif "end" in by or "tail" in by:
381
+ if verbose:
382
+ print(f"splited by endings")
383
+ return split_by_regex_end(text, **kws)
384
+ elif "other" in by or "non_alp" in by:
385
+ if verbose:
386
+ print(f"splited by non_alphanumeric")
387
+ return split_by_consecutive_non_alphanumeric(text)
388
+ elif "len" in by:
389
+ if verbose:
390
+ print(f"splited by fixed length")
391
+ return split_by_fixed_length_chunks(text, **kws)
392
+ elif "re" in by or "cus" in by or "cos" in by:
393
+ if verbose:
394
+ print(f"splited by customed, re; => {by}")
395
+ return reg_split(text, **kws)
396
+ elif 'lang' in by or 'eng' in by:
397
+ return split_non_ascii(text)
398
+ else:
399
+ return split_general(text, by, verbose=verbose, **kws)
400
+
401
+
402
+ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
403
+ df_dir_img_single_page = pd.DataFrame()
404
+ dir_single_page = []
405
+ if verbose:
406
+ pprint.pp(pdfinfo_from_path(dir_pdf))
407
+ if isinstance(page, tuple) and page:
408
+ page = list(page)
409
+ if isinstance(page,int):
410
+ page=[page]
411
+ if page is None:
412
+ page = [pdfinfo_from_path(dir_pdf)["Pages"]]
413
+ if len(page)==1 and page != pdfinfo_from_path(dir_pdf)["Pages"]:
414
+ page=[page[0], page[0]]
415
+ else:
416
+ page=[1, page[0]]
417
+ pages = convert_from_path(dir_pdf, first_page=page[0], last_page=page[1], **kws)
418
+ if dir_save is None:
419
+ dir_save = newfolder(dirname(dir_pdf), basename(dir_pdf).split(".")[0] + "_img")
420
+ for i, page in enumerate(pages):
421
+ if verbose:
422
+ print(f"processing page: {i+1}")
423
+ if i < 9:
424
+ dir_img_each_page = dir_save + f"page_0{i+1}.png"
425
+ else:
426
+ dir_img_each_page = dir_save + f"page_{i+1}.png"
427
+ dir_single_page.append(dir_img_each_page)
428
+ page.save(dir_img_each_page, kind.upper())
429
+ df_dir_img_single_page["fpath"] = dir_single_page
430
+ return df_dir_img_single_page
431
+
432
+ # dir_pdf = "/Users/macjianfeng/Dropbox/github/python/240308_Python Data Science Handbook.pdf"
433
+ # df_page = pdf2img(dir_pdf, page=[1, 5],dpi=300)
434
+
435
+
436
+
437
+ def fload(fpath, kind=None, **kwargs):
438
+ """
439
+ Load content from a file with specified file type.
440
+ Parameters:
441
+ fpath (str): The file path from which content will be loaded.
442
+ kind (str): The file type to load. Supported options: 'docx', 'txt', 'md', 'html', 'json', 'yaml', 'xml', 'csv', 'xlsx', 'pdf'.
443
+ **kwargs: Additional parameters for 'csv' and 'xlsx' file types.
444
+ Returns:
445
+ content: The content loaded from the file.
446
+ """
447
+ def load_txt_md(fpath):
448
+ with open(fpath, "r") as file:
449
+ content = file.read()
450
+ return content
451
+
452
+ def load_html(fpath):
453
+ with open(fpath, "r") as file:
454
+ content = file.read()
455
+ return content
456
+
457
+ def load_json(fpath):
458
+ with open(fpath, "r") as file:
459
+ content = json.load(file)
460
+ return content
461
+
462
+ def load_yaml(fpath):
463
+ with open(fpath, "r") as file:
464
+ content = yaml.safe_load(file)
465
+ return content
466
+
467
+ def load_xml(fpath):
468
+ tree = etree.parse(fpath)
469
+ root = tree.getroot()
470
+ return etree.tostring(root, pretty_print=True).decode()
471
+
472
+ def load_csv(fpath, **kwargs):
473
+ df = pd.read_csv(fpath, **kwargs)
474
+ return df
475
+
476
+ def load_xlsx(fpath, **kwargs):
477
+ df = pd.read_excel(fpath, **kwargs)
478
+ return df
479
+
480
+ def load_pdf(fpath, page='all', verbose=False, **kwargs):
481
+ """
482
+ Parameters:
483
+ fpath: The path to the PDF file to be loaded.
484
+ page (optional):
485
+ Specifies which page or pages to extract text from. By default, it's set to "all", which means text from all
486
+ pages will be returned. It can also be an integer to specify a single page number or a list of integers to
487
+ specify multiple pages.
488
+ verbose (optional):
489
+ If True, prints the total number of pages processed.
490
+ Functionality:
491
+ It initializes an empty dictionary text_dict to store page numbers as keys and their corresponding text as values.
492
+ It iterates through each page of the PDF file using a for loop.
493
+ For each page, it extracts the text using PyPDF2's extract_text() method and stores it in text_dict with the page number incremented by 1 as the key.
494
+ If the page parameter is an integer, it converts it into a list containing that single page number to ensure consistency in handling.
495
+ If the page parameter is a NumPy array, it converts it to a list using the tolist() method to ensure compatibility with list operations.
496
+ If verbose is True, it prints the total number of pages processed.
497
+ If page is a list, it combines the text of the specified pages into a single string combined_text and returns it.
498
+ If page is set to "all", it returns the entire text_dict containing text of all pages.
499
+ If page is an integer, it returns the text of the specified page number.
500
+ If the specified page is not found, it returns the string "Page is not found".
501
+ """
502
+ text_dict = {}
503
+ with open(fpath, "rb") as file:
504
+ pdf_reader = PdfReader(file)
505
+ num_pages = len(pdf_reader.pages)
506
+ for page_num in range(num_pages):
507
+ if verbose:
508
+ print(f"processing page {page_num}")
509
+ page_ = pdf_reader.pages[page_num]
510
+ text_dict[page_num + 1] = page_.extract_text()
511
+ if isinstance(page, int):
512
+ page = [page]
513
+ elif isinstance(page, np.ndarray):
514
+ page = page.tolist()
515
+ if verbose:
516
+ print(f"total pages: {page_num}")
517
+ if isinstance(page, list):
518
+ combined_text = ""
519
+ for page_num in page:
520
+ combined_text += text_dict.get(page_num, "")
521
+ return combined_text
522
+ elif "all" in page.lower():
523
+ combined_text = ""
524
+ for i in text_dict.values():
525
+ combined_text += i
526
+ return combined_text
527
+ else:
528
+ return text_dict.get(int(page), "Page is not found")
529
+
530
+ def load_docx(fpath):
531
+ doc = Document(fpath)
532
+ content = [para.text for para in doc.paragraphs]
533
+ return content
534
+
535
+ if kind is None:
536
+ _, kind = os.path.splitext(fpath)
537
+ kind = kind.lower()
538
+
539
+ kind = kind.lstrip('.').lower()
540
+ img_types=[ 'bmp','eps', 'gif', 'icns', 'ico', 'im', 'jpg','jpeg', 'jpeg2000','msp', 'pcx', 'png', 'ppm', 'sgi', 'spider', 'tga','tiff','webp',"json"]
541
+ doc_types = ["docx", "txt", "md", "html", "json", "yaml", "xml", "csv", "xlsx", "pdf"]
542
+ supported_types = [*doc_types, *img_types]
543
+ if kind not in supported_types:
544
+ raise ValueError(f"Error:\n{kind} is not in the supported list {supported_types}")
545
+ if kind == "docx":
546
+ return load_docx(fpath)
547
+ elif kind == "txt" or kind == "md":
548
+ return load_txt_md(fpath)
549
+ elif kind == "html":
550
+ return load_html(fpath)
551
+ elif kind == "json":
552
+ return load_json(fpath)
553
+ elif kind == "yaml":
554
+ return load_yaml(fpath)
555
+ elif kind == "xml":
556
+ return load_xml(fpath)
557
+ elif kind == "csv":
558
+ return load_csv(fpath, **kwargs)
559
+ elif kind == "xlsx":
560
+ return load_xlsx(fpath, **kwargs)
561
+ elif kind == "pdf":
562
+ print('usage:load_pdf(fpath, page="all", verbose=False)')
563
+ return load_pdf(fpath, **kwargs)
564
+ elif kind.lower() in img_types:
565
+ print(f'Image ".{kind}" is loaded.')
566
+ return load_img(fpath)
567
+ else:
568
+ raise ValueError(f"Error:\n{kind} is not in the supported list {supported_types}")
569
+
570
+ # Example usage
571
+ # txt_content = fload('sample.txt')
572
+ # md_content = fload('sample.md')
573
+ # html_content = fload('sample.html')
574
+ # json_content = fload('sample.json')
575
+ # yaml_content = fload('sample.yaml')
576
+ # xml_content = fload('sample.xml')
577
+ # csv_content = fload('sample.csv')
578
+ # xlsx_content = fload('sample.xlsx')
579
+ # docx_content = fload('sample.docx')
580
+
581
+ def fsave(
582
+ fpath,
583
+ content,
584
+ kind=None,
585
+ font_name="Times",
586
+ font_size=10,
587
+ spacing=6,
588
+ **kwargs,
589
+ ):
590
+ """
591
+ Save content into a file with specified file type and formatting.
592
+ Parameters:
593
+ fpath (str): The file path where content will be saved.
594
+ content (list of str or dict): The content to be saved, where each string represents a paragraph or a dictionary for tabular data.
595
+ kind (str): The file type to save. Supported options: 'docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml'.
596
+ font_name (str): The font name for text formatting (only applicable for 'docx', 'html', and 'pdf').
597
+ font_size (int): The font size for text formatting (only applicable for 'docx', 'html', and 'pdf').
598
+ spacing (int): The space after each paragraph (only applicable for 'docx').
599
+ **kwargs: Additional parameters for 'csv', 'xlsx', 'json', 'yaml' file types.
600
+ Returns:
601
+ None
602
+ """
603
+ def save_content(fpath, content):
604
+ with open(fpath, "w", encoding='utf-8') as file:
605
+ file.write(content)
606
+
607
+
608
+ def save_docx(fpath, content, font_name, font_size, spacing):
609
+ if isinstance(content, str):
610
+ content = content.split(". ")
611
+ doc = docx.Document()
612
+ for i, paragraph_text in enumerate(content):
613
+ paragraph = doc.add_paragraph()
614
+ run = paragraph.add_run(paragraph_text)
615
+ font = run.font
616
+ font.name = font_name
617
+ font.size = docx.shared.Pt(font_size)
618
+ if i != len(content) - 1: # Add spacing for all but the last paragraph
619
+ paragraph.space_after = docx.shared.Pt(spacing)
620
+ doc.save(fpath)
621
+
622
+
623
+ def save_txt_md(fpath, content, sep="\n"):
624
+ # Ensure content is a single string
625
+ if isinstance(content, list):
626
+ content = sep.join(content)
627
+ save_content(fpath, sep.join(content))
628
+
629
+
630
+ def save_html(fpath, content, font_name, font_size):
631
+ html_content = "<html><body>"
632
+ for paragraph_text in content:
633
+ html_content += f'<p style="font-family:{font_name}; font-size:{font_size}px;">{paragraph_text}</p>'
634
+ html_content += "</body></html>"
635
+ save_content(fpath, html_content)
636
+
637
+
638
+ def save_pdf(fpath, content, font_name, font_size):
639
+ pdf = FPDF()
640
+ pdf.add_page()
641
+ # pdf.add_font('Arial','',r'/System/Library/Fonts/Supplemental/Arial.ttf',uni=True)
642
+ pdf.set_font(font_name, '',font_size)
643
+ for paragraph_text in content:
644
+ pdf.multi_cell(0, 10, paragraph_text)
645
+ pdf.ln(h = '')
646
+ pdf.output(fpath,'F')
647
+
648
+
649
+ def save_csv(fpath, data, **kwargs):
650
+ df = pd.DataFrame(data)
651
+ df.to_csv(fpath, **kwargs)
652
+
653
+
654
+ def save_xlsx(fpath, data, **kwargs):
655
+ df = pd.DataFrame(data)
656
+ df.to_excel(fpath, **kwargs)
657
+
658
+
659
+ # def save_json(fpath, data, **kwargs):
660
+ # with open(fpath, "w") as file:
661
+ # json.dump(data, file, **kwargs)
662
+
663
+ def save_json(fpath_fname, var_dict_or_df):
664
+ with open(fpath_fname, "w") as f_json:
665
+ # Check if var_dict_or_df is a DataFrame
666
+ if isinstance(var_dict_or_df, pd.DataFrame):
667
+ # Convert DataFrame to a list of dictionaries
668
+ var_dict_or_df = var_dict_or_df.to_dict(orient="dict")
669
+
670
+ # Check if var_dict_or_df is a dictionary
671
+ if isinstance(var_dict_or_df, dict):
672
+ # Convert NumPy arrays to lists
673
+ for key, value in var_dict_or_df.items():
674
+ if isinstance(value, np.ndarray):
675
+ var_dict_or_df[key] = value.tolist()
676
+
677
+ # Save the dictionary or list of dictionaries to a JSON file
678
+ json.dump(var_dict_or_df, f_json, indent=4)
679
+ # # Example usage:
680
+ # sets = {"title": "mse_path_ MSE"}
681
+ # jsonsave("/.json", sets)
682
+ # # setss = jsonload("/.json")
683
+
684
+ def save_yaml(fpath, data, **kwargs):
685
+ with open(fpath, "w") as file:
686
+ yaml.dump(data, file, **kwargs)
687
+
688
+
689
+ def save_xml(fpath, data):
690
+ root = etree.Element("root")
691
+ if isinstance(data, dict):
692
+ for key, val in data.items():
693
+ child = etree.SubElement(root, key)
694
+ child.text = str(val)
695
+ else:
696
+ raise ValueError("XML saving only supports dictionary data")
697
+ tree = etree.ElementTree(root)
698
+ tree.write(fpath, pretty_print=True, xml_declaration=True, encoding="UTF-8")
699
+
700
+ if kind is None:
701
+ _, kind = os.path.splitext(fpath)
702
+ kind = kind.lower()
703
+
704
+ kind = kind.lstrip(".").lower()
705
+
706
+ if kind not in [
707
+ "docx",
708
+ "txt",
709
+ "md",
710
+ "html",
711
+ "pdf",
712
+ "csv",
713
+ "xlsx",
714
+ "json",
715
+ "xml",
716
+ "yaml",
717
+ ]:
718
+ raise ValueError(
719
+ f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
720
+ )
721
+
722
+ if kind == "docx" or kind=="doc":
723
+ save_docx(fpath, content, font_name, font_size, spacing)
724
+ elif kind == "txt":
725
+ save_txt_md(fpath, content, sep="")
726
+ elif kind == "md":
727
+ save_txt_md(fpath, content, sep="")
728
+ elif kind == "html":
729
+ save_html(fpath, content, font_name, font_size)
730
+ elif kind == "pdf":
731
+ save_pdf(fpath, content, font_name, font_size)
732
+ elif kind == "csv":
733
+ save_csv(
734
+ fpath, content, **kwargs
735
+ ) # Assuming content is in tabular form (list of dicts or DataFrame)
736
+ elif kind == "xlsx":
737
+ save_xlsx(
738
+ fpath, content, **kwargs
739
+ ) # Assuming content is in tabular form (list of dicts or DataFrame)
740
+ elif kind == "json":
741
+ save_json(fpath, content) # Assuming content is a serializable object
742
+ elif kind == "xml":
743
+ save_xml(fpath, content) # Assuming content is a dictionary
744
+ elif kind == "yaml":
745
+ save_yaml(fpath, content, **kwargs) # Assuming content is a serializable object
746
+ else:
747
+ raise ValueError(
748
+ f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
749
+ )
750
+
751
+
752
+ # # Example usage
753
+ # text_content = ["Hello, this is a sample text file.", "This is the second paragraph."]
754
+ # tabular_content = {"Name": ["Alice", "Bob"], "Age": [24, 30]}
755
+ # json_content = {"name": "Alice", "age": 24}
756
+ # yaml_content = {"Name": "Alice", "Age": 24}
757
+ # xml_content = {"Name": "Alice", "Age": 24}
758
+ # dir_save = "/Users/macjianfeng/Dropbox/Downloads/"
759
+ # fsave(dir_save + "sample.txt", text_content)
760
+ # fsave(dir_save + "sample.md", text_content)
761
+ # fsave(dir_save + "sample.html", text_content)
762
+ # fsave(dir_save + "sample.pdf", text_content)
763
+ # fsave(dir_save + "sample.docx", text_content)
764
+ # fsave(dir_save + "sample.csv", tabular_content, index=False)
765
+ # fsave(dir_save + "sample.xlsx", tabular_content, sheet_name="Sheet1", index=False)
766
+ # fsave(dir_save + "sample.json", json_content, indent=4)
767
+ # fsave(dir_save + "sample.yaml", yaml_content)
768
+ # fsave(dir_save + "sample.xml", xml_content)
769
+
770
+ def addpath(fpath):
771
+ sys.path.insert(0,dir)
772
+ def dirname(fpath):
773
+ dirname_=os.path.dirname(fpath)
774
+ if not dirname_.endswith('/'):
775
+ dirname_=dirname_+"/"
776
+ return dirname_
777
+ def dir_name(fpath):
778
+ return dirname(fpath)
779
+ def basename(fpath):
780
+ return os.path.basename(fpath)
781
+
782
+ def finfo(fpath):
783
+ fname, fmt = os.path.splitext(fpath)
784
+ dir_par = os.path.dirname(fpath) + '/'
785
+ data = {
786
+ "size": round(os.path.getsize(fpath) / 1024 / 1024, 3),
787
+ "creation_time": time.ctime(os.path.getctime(fpath)),
788
+ "ctime": time.ctime(os.path.getctime(fpath)),
789
+ "mod_time": time.ctime(os.path.getmtime(fpath)),
790
+ "mtime": time.ctime(os.path.getmtime(fpath)),
791
+ "parent_dir": dir_par,
792
+ "fname": fname.replace(dir_par, ""),
793
+ "kind": fmt
794
+ }
795
+ extra_info = {}
796
+ if data["kind"] == ".pdf":
797
+ extra_info = pdfinfo_from_path(fpath)
798
+
799
+ return FileInfo(
800
+ size=data["size"],
801
+ creation_time=data["creation_time"],
802
+ ctime=data["ctime"],
803
+ mod_time=data["mod_time"],
804
+ mtime=data["mtime"],
805
+ parent_dir=data["parent_dir"],
806
+ fname=data["fname"],
807
+ kind=data["kind"],
808
+ extra_info=extra_info
809
+ )
810
+
811
+ def listdir(
812
+ rootdir,
813
+ kind="folder",
814
+ sort_by="name",
815
+ ascending=True,
816
+ contains=None,
817
+ orient = "list",
818
+ output='df'
819
+ ):
820
+ def sort_kind(df, by="name", ascending=True):
821
+ if df[by].dtype == 'object': # Check if the column contains string values
822
+ if ascending:
823
+ sorted_index = df[by].str.lower().argsort()
824
+ else:
825
+ sorted_index = df[by].str.lower().argsort()[::-1]
826
+ else:
827
+ if ascending:
828
+ sorted_index = df[by].argsort()
829
+ else:
830
+ sorted_index = df[by].argsort()[::-1]
831
+ sorted_df = df.iloc[sorted_index].reset_index(drop=True)
832
+ return sorted_df
833
+
834
+ if not kind.startswith("."):
835
+ kind = "." + kind
836
+
837
+ if os.path.isdir(rootdir):
838
+ ls = os.listdir(rootdir)
839
+ fd = [".fd", ".fld", ".fol", ".fd", ".folder"]
840
+ i = 0
841
+ f = {
842
+ "name": [],
843
+ "length": [],
844
+ "path": [],
845
+ "created_time": [],
846
+ "modified_time": [],
847
+ "last_open_time":[],
848
+ "size": [],
849
+ "fname": [],
850
+ "fpath": [],
851
+ }
852
+ for item in ls:
853
+ item_path = os.path.join(rootdir, item)
854
+ if item.startswith("."):
855
+ continue
856
+ filename, file_extension = os.path.splitext(item)
857
+ is_folder = kind.lower() in fd and os.path.isdir(item_path)
858
+ is_file = kind.lower() in file_extension.lower() and (
859
+ os.path.isfile(item_path)
860
+ )
861
+ if not is_folder and not is_file:
862
+ continue
863
+ f["name"].append(filename)
864
+ f["length"].append(len(filename))
865
+ f["path"].append(os.path.join(os.path.dirname(item_path), item))
866
+ fpath=os.path.join(os.path.dirname(item_path), item)
867
+ f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
868
+ f["created_time"].append(pd.to_datetime(os.path.getctime(item_path),unit='s'))
869
+ f["modified_time"].append(pd.to_datetime(os.path.getmtime(item_path),unit='s'))
870
+ f['last_open_time'].append(pd.to_datetime(os.path.getatime(item_path),unit='s'))
871
+ f["fname"].append(filename) # will be removed
872
+ f["fpath"].append(fpath) # will be removed
873
+ i += 1
874
+
875
+ f["num"] = i
876
+ f["rootdir"] = rootdir
877
+ f["os"] = os.uname().machine
878
+ else:
879
+ raise FileNotFoundError(
880
+ 'The directory "{}" does NOT exist. Please check the directory "rootdir".'.format(
881
+ rootdir
882
+ )
883
+ )
884
+ f = pd.DataFrame(f)
885
+ if contains is not None:
886
+ f = f[f["name"].str.contains(contains,case=False)]
887
+ if "nam" in sort_by.lower():
888
+ # f.sort_values(by="name", ascending=ascending, ignore_index=True, inplace=True)
889
+ f=sort_kind(f, by="name", ascending=ascending)
890
+ elif "crea" in sort_by.lower():
891
+ f=sort_kind(f, by="created_time", ascending=ascending)
892
+ elif "modi" in sort_by.lower():
893
+ f=sort_kind(f, by="modified_time", ascending=ascending)
894
+ elif "s" in sort_by.lower() and "z" in sort_by.lower():
895
+ f=sort_kind(f, by="size", ascending=ascending)
896
+ if 'df' in output:
897
+ return f
898
+ else:
899
+ if 'l' in orient.lower(): # list # default
900
+ res_output = Box(f.to_dict(orient="list"))
901
+ return res_output
902
+ if 'd' in orient.lower(): # dict
903
+ return Box(f.to_dict(orient="dict"))
904
+ if 'r' in orient.lower(): # records
905
+ return Box(f.to_dict(orient="records"))
906
+ if 'in' in orient.lower(): # records
907
+ return Box(f.to_dict(orient="index"))
908
+ if 'se' in orient.lower(): # records
909
+ return Box(f.to_dict(orient="series"))
910
+
911
+ # Example usage:
912
+ # result = listdir('your_root_directory')
913
+ # print(result)
914
+ # df=listdir("/", contains='sss',sort_by='name',ascending=False)
915
+ # print(df.fname.to_list(),"\n",df.fpath.to_list())
916
+ def list_func(lib_name, opt="call"):
917
+ if opt == "call":
918
+ funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
919
+ else:
920
+ funcs = dir(lib_name)
921
+ return funcs
922
+ def func_list(lib_name, opt="call"):
923
+ if opt == "call":
924
+ funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
925
+ else:
926
+ funcs = dir(lib_name)
927
+ return funcs
928
+
929
+ def newfolder(pardir, chdir):
930
+ import os
931
+ rootdir = []
932
+ # Convert string to list
933
+ if isinstance(chdir, str):
934
+ chdir = [chdir]
935
+
936
+ # Subfoldername should be unique
937
+ chdir = list(set(chdir))
938
+
939
+ if isinstance(pardir, str): # Dir_parents should be 'str' type
940
+ pardir = os.path.normpath(pardir)
941
+
942
+ # Get the slash type: "/" or "\"
943
+ stype = '/' if '/' in pardir else '\\'
944
+
945
+ # Check if the parent directory exists and is a directory path
946
+ if os.path.isdir(pardir):
947
+ os.chdir(pardir) # Set current path
948
+
949
+ # Check if subdirectories are not empty
950
+ if chdir:
951
+ chdir.sort()
952
+ # Create multiple subdirectories at once
953
+ for folder in chdir:
954
+ # Check if the subfolder already exists
955
+ child_tmp = os.path.join(pardir, folder)
956
+ if not os.path.isdir(child_tmp):
957
+ os.mkdir('./' + folder)
958
+ print(f'\n {folder} was created successfully!\n')
959
+ else:
960
+ print(f'\n {folder} already exists! \n')
961
+
962
+ rootdir.append(child_tmp + stype) # Note down
963
+
964
+ else:
965
+ print('\nWarning: Dir_child doesn\'t exist\n')
966
+
967
+ else:
968
+ print('\nWarning: Dir_parent is not a directory path\n')
969
+
970
+ # Dir is the main output, if only one dir, then str type is inconvenient
971
+ if len(rootdir) == 1:
972
+ rootdir = rootdir[0]
973
+
974
+ return rootdir
975
+
976
+
977
+ def figsave(*args,dpi=300):
978
+ DirSave = None
979
+ fname = None
980
+
981
+ for arg in args:
982
+ if isinstance(arg, str):
983
+ if '/' in arg or '\\' in arg:
984
+ DirSave = arg
985
+ elif '/' not in arg and '\\' not in arg:
986
+ fname = arg
987
+
988
+ # Backup original values
989
+ if '/' in DirSave:
990
+ if DirSave[-1] != '/':
991
+ DirSave = DirSave + '/'
992
+ elif '\\' in DirSave:
993
+ if DirSave[-1] != '\\':
994
+ DirSave = DirSave + '\\'
995
+ else:
996
+ raise ValueError('Check the Path of DirSave Directory')
997
+
998
+ ftype = fname.split('.')[-1]
999
+ if len(fname.split('.')) == 1:
1000
+ ftype = 'nofmt'
1001
+ fname = DirSave + fname + '.' + ftype
1002
+ else:
1003
+ fname = DirSave + fname
1004
+
1005
+ # Save figure based on file type
1006
+ if ftype.lower() == 'eps':
1007
+ plt.savefig(fname, format='eps', bbox_inches='tight')
1008
+ plt.savefig(fname.replace('.eps', '.pdf'),
1009
+ format='pdf', bbox_inches='tight',dpi=dpi)
1010
+ elif ftype.lower() == 'nofmt': # default: both "tif" and "pdf"
1011
+ fname_corr=fname.replace('nofmt','pdf')
1012
+ plt.savefig(fname_corr, format='pdf', bbox_inches='tight',dpi=dpi)
1013
+ fname=fname.replace('nofmt','tif')
1014
+ plt.savefig(fname, format='tiff', dpi=dpi, bbox_inches='tight')
1015
+ print(f"default saving filetype: both 'tif' and 'pdf")
1016
+ elif ftype.lower() == 'pdf':
1017
+ plt.savefig(fname, format='pdf', bbox_inches='tight',dpi=dpi)
1018
+ elif ftype.lower() in ['jpg', 'jpeg']:
1019
+ plt.savefig(fname, format='jpeg', dpi=dpi, bbox_inches='tight')
1020
+ elif ftype.lower() == 'png':
1021
+ plt.savefig(fname, format='png', dpi=dpi,
1022
+ bbox_inches='tight', transparent=True)
1023
+ elif ftype.lower() in ['tiff', 'tif']:
1024
+ plt.savefig(fname, format='tiff', dpi=dpi, bbox_inches='tight')
1025
+ elif ftype.lower() == 'emf':
1026
+ plt.savefig(fname, format='emf', dpi=dpi, bbox_inches='tight')
1027
+ elif ftype.lower() == 'fig':
1028
+ plt.savefig(fname, format='pdf', bbox_inches='tight',dpi=dpi)
1029
+
1030
+ print(f'\nSaved @: dpi={dpi}\n{fname}')
1031
+
1032
+
1033
+ # ==============FuncStars(ax,x1=1,x2=2, yscale=0.9, pval=0.01)====================================================
1034
+ # Usage:
1035
+ # FuncStars(ax, x1=2, x2=3, yscale=0.99, pval=0.02)
1036
+ # =============================================================================
1037
+
1038
+ # FuncStars --v 0.1.1
1039
+ def FuncStars(ax,
1040
+ pval=None,
1041
+ Ylim=None,
1042
+ Xlim=None,
1043
+ symbol='*',
1044
+ yscale=0.95,
1045
+ x1=0,
1046
+ x2=1,
1047
+ alpha=0.05,
1048
+ fontsize=14,
1049
+ fontsize_note=6,
1050
+ rotation=0,
1051
+ fontname='Arial',
1052
+ values_below=None,
1053
+ linego=True,
1054
+ linestyle='-',
1055
+ linecolor='k',
1056
+ linewidth=.8,
1057
+ nsshow='off',
1058
+ symbolcolor='k',
1059
+ tailindicator=[0.06, 0.06],
1060
+ report=None,
1061
+ report_scale=-0.1,
1062
+ report_loc=None):
1063
+
1064
+
1065
+ if ax is None:
1066
+ ax = plt.gca()
1067
+ if Ylim is None:
1068
+ Ylim = plt.gca().get_ylim()
1069
+ if Xlim is None:
1070
+ Xlim = ax.get_xlim()
1071
+ if report_loc is None and report is not None:
1072
+ report_loc = np.min(Ylim) + report_scale*np.abs(np.diff(Ylim))
1073
+ if report_scale > 0:
1074
+ report_scale = -np.abs(report_scale)
1075
+
1076
+ yscale = np.float64(yscale)
1077
+ y_loc = np.min(Ylim) + yscale*(np.max(Ylim)-np.min(Ylim))
1078
+ xcenter = np.mean([x1, x2])
1079
+
1080
+ # ns / *
1081
+ if alpha < pval:
1082
+ if nsshow == 'on':
1083
+ ns_str = f'p={round(pval, 3)}' if pval < 0.9 else 'ns'
1084
+ color = 'm' if pval < 0.1 else 'k'
1085
+ plt.text(xcenter, y_loc, ns_str,
1086
+ ha='center', va='bottom', # 'center_baseline',
1087
+ fontsize=fontsize-6 if fontsize > 6 else fontsize,
1088
+ fontname=fontname, color=color, rotation=rotation
1089
+ # bbox=dict(facecolor=None, edgecolor=None, color=None, linewidth=None)
1090
+ )
1091
+ elif 0.01 < pval <= alpha:
1092
+ plt.text(xcenter, y_loc, symbol,
1093
+ ha='center', va='center_baseline',
1094
+ fontsize=fontsize, fontname=fontname, color=symbolcolor)
1095
+ elif 0.001 < pval <= 0.01:
1096
+ plt.text(xcenter, y_loc, symbol * 2,
1097
+ ha='center', va='center_baseline',
1098
+ fontsize=fontsize, fontname=fontname, color=symbolcolor)
1099
+ elif 0 < pval <= 0.001:
1100
+ plt.text(xcenter, y_loc, symbol * 3,
1101
+ ha='center', va='center_baseline',
1102
+ fontsize=fontsize, fontname=fontname, color=symbolcolor)
1103
+
1104
+ # lines indicators
1105
+ if linego: # and 0 < pval <= 0.05:
1106
+ print(pval)
1107
+ print(linego)
1108
+ # horizontal line
1109
+ if yscale < 0.99:
1110
+ plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1111
+ x2 - np.abs(np.diff(Xlim)) * 0.01],
1112
+ [y_loc - np.abs(np.diff(Ylim)) * .03,
1113
+ y_loc - np.abs(np.diff(Ylim)) * .03],
1114
+ linestyle=linestyle, color=linecolor, linewidth=linewidth)
1115
+ # vertical line
1116
+ plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1117
+ x1 + np.abs(np.diff(Xlim)) * 0.01],
1118
+ [y_loc - np.abs(np.diff(Ylim)) * tailindicator[0],
1119
+ y_loc - np.abs(np.diff(Ylim)) * .03],
1120
+ linestyle=linestyle, color=linecolor, linewidth=linewidth)
1121
+ plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
1122
+ x2 - np.abs(np.diff(Xlim)) * 0.01],
1123
+ [y_loc - np.abs(np.diff(Ylim)) * tailindicator[1],
1124
+ y_loc - np.abs(np.diff(Ylim)) * .03],
1125
+ linestyle=linestyle, color=linecolor, linewidth=linewidth)
1126
+ else:
1127
+ plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1128
+ x2 - np.abs(np.diff(Xlim)) * 0.01],
1129
+ [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002,
1130
+ np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
1131
+ linestyle=linestyle, color=linecolor, linewidth=linewidth)
1132
+ # vertical line
1133
+ plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
1134
+ x1 + np.abs(np.diff(Xlim)) * 0.01],
1135
+ [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[0],
1136
+ np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
1137
+ linestyle=linestyle, color=linecolor, linewidth=linewidth)
1138
+ plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
1139
+ x2 - np.abs(np.diff(Xlim)) * 0.01],
1140
+ [np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[1],
1141
+ np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
1142
+ linestyle=linestyle, color=linecolor, linewidth=linewidth)
1143
+
1144
+ if values_below is not None:
1145
+ plt.text(xcenter, y_loc * (-0.1), values_below,
1146
+ ha='center', va='bottom', # 'center_baseline', rotation=rotation,
1147
+ fontsize=fontsize_note, fontname=fontname, color='k')
1148
+
1149
+ # report / comments
1150
+ if report is not None:
1151
+ plt.text(xcenter, report_loc, report,
1152
+ ha='left', va='bottom', # 'center_baseline', rotation=rotation,
1153
+ fontsize=fontsize_note, fontname=fontname, color='.7')
1154
+ def is_str_color(s):
1155
+ # Regular expression pattern for hexadecimal color codes
1156
+ color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
1157
+ return re.match(color_code_pattern, s) is not None
1158
+
1159
+ def stdshade(ax=None,*args, **kwargs):
1160
+ if (
1161
+ isinstance(ax, np.ndarray)
1162
+ and ax.ndim == 2
1163
+ and min(ax.shape) > 1
1164
+ and max(ax.shape) > 1
1165
+ ):
1166
+ y = ax
1167
+ ax = plt.gca()
1168
+ if ax is None:
1169
+ ax = plt.gca()
1170
+ alpha = 0.5
1171
+ acolor = "k"
1172
+ paraStdSem = "sem"
1173
+ plotStyle = "-"
1174
+ plotMarker = "none"
1175
+ smth = 1
1176
+ l_c_one = ["r", "g", "b", "m", "c", "y", "k", "w"]
1177
+ l_style2 = ["--", "-."]
1178
+ l_style1 = ["-", ":"]
1179
+ l_mark = ["o", "+", "*", ".", "x", "_", "|", "s", "d", "^", "v", ">", "<", "p", "h"]
1180
+
1181
+ # Check each argument
1182
+ for iarg in range(len(args)):
1183
+ if (
1184
+ isinstance(args[iarg], np.ndarray)
1185
+ and args[iarg].ndim == 2
1186
+ and min(args[iarg].shape) > 1
1187
+ and max(args[iarg].shape) > 1
1188
+ ):
1189
+ y = args[iarg]
1190
+ # Except y, continuous data is 'F'
1191
+ if (isinstance(args[iarg], np.ndarray) and args[iarg].ndim == 1) or isinstance(
1192
+ args[iarg], range
1193
+ ):
1194
+ x = args[iarg]
1195
+ if isinstance(x, range):
1196
+ x = np.arange(start=x.start, stop=x.stop, step=x.step)
1197
+ # Only one number( 0~1), 'alpha' / color
1198
+ if isinstance(args[iarg], (int, float)):
1199
+ if np.size(args[iarg]) == 1 and 0 <= args[iarg] <= 1:
1200
+ alpha = args[iarg]
1201
+ if isinstance(args[iarg], (list, tuple)) and np.size(args[iarg]) == 3:
1202
+ acolor = args[iarg]
1203
+ acolor = tuple(acolor) if isinstance(acolor, list) else acolor
1204
+ # Color / plotStyle /
1205
+ if (
1206
+ isinstance(args[iarg], str)
1207
+ and len(args[iarg]) == 1
1208
+ and args[iarg] in l_c_one
1209
+ ):
1210
+ acolor = args[iarg]
1211
+ else:
1212
+ if isinstance(args[iarg], str):
1213
+ if args[iarg] in ["sem", "std"]:
1214
+ paraStdSem = args[iarg]
1215
+ if args[iarg].startswith("#"):
1216
+ acolor=hue2rgb(args[iarg])
1217
+ if str2list(args[iarg])[0] in l_c_one:
1218
+ if len(args[iarg]) == 3:
1219
+ k = [i for i in str2list(args[iarg]) if i in l_c_one]
1220
+ if k != []:
1221
+ acolor = k[0]
1222
+ st = [i for i in l_style2 if i in args[iarg]]
1223
+ if st != []:
1224
+ plotStyle = st[0]
1225
+ elif len(args[iarg]) == 2:
1226
+ k = [i for i in str2list(args[iarg]) if i in l_c_one]
1227
+ if k != []:
1228
+ acolor = k[0]
1229
+ mk = [i for i in str2list(args[iarg]) if i in l_mark]
1230
+ if mk != []:
1231
+ plotMarker = mk[0]
1232
+ st = [i for i in l_style1 if i in args[iarg]]
1233
+ if st != []:
1234
+ plotStyle = st[0]
1235
+ if len(args[iarg]) == 1:
1236
+ k = [i for i in str2list(args[iarg]) if i in l_c_one]
1237
+ if k != []:
1238
+ acolor = k[0]
1239
+ mk = [i for i in str2list(args[iarg]) if i in l_mark]
1240
+ if mk != []:
1241
+ plotMarker = mk[0]
1242
+ st = [i for i in l_style1 if i in args[iarg]]
1243
+ if st != []:
1244
+ plotStyle = st[0]
1245
+ if len(args[iarg]) == 2:
1246
+ st = [i for i in l_style2 if i in args[iarg]]
1247
+ if st != []:
1248
+ plotStyle = st[0]
1249
+ # smth
1250
+ if (
1251
+ isinstance(args[iarg], (int, float))
1252
+ and np.size(args[iarg]) == 1
1253
+ and args[iarg] >= 1
1254
+ ):
1255
+ smth = args[iarg]
1256
+
1257
+ if "x" not in locals() or x is None:
1258
+ x = np.arange(1, y.shape[1] + 1)
1259
+ elif len(x) < y.shape[1]:
1260
+ y = y[:, x]
1261
+ nRow = y.shape[0]
1262
+ nCol = y.shape[1]
1263
+ print(f"y was corrected, please confirm that {nRow} row, {nCol} col")
1264
+ else:
1265
+ x = np.arange(1, y.shape[1] + 1)
1266
+
1267
+ if x.shape[0] != 1:
1268
+ x = x.T
1269
+ yMean = np.nanmean(y, axis=0)
1270
+ if smth > 1:
1271
+ yMean = savgol_filter(np.nanmean(y, axis=0), smth, 1)
1272
+ else:
1273
+ yMean = np.nanmean(y, axis=0)
1274
+ if paraStdSem == "sem":
1275
+ if smth > 1:
1276
+ wings = savgol_filter(np.nanstd(y, axis=0) / np.sqrt(y.shape[0]), smth, 1)
1277
+ else:
1278
+ wings = np.nanstd(y, axis=0) / np.sqrt(y.shape[0])
1279
+ elif paraStdSem == "std":
1280
+ if smth > 1:
1281
+ wings = savgol_filter(np.nanstd(y, axis=0), smth, 1)
1282
+ else:
1283
+ wings = np.nanstd(y, axis=0)
1284
+
1285
+ fill_kws = kwargs.get('fill_kws', {})
1286
+ line_kws = kwargs.get('line_kws', {})
1287
+ fill = ax.fill_between(x, yMean + wings, yMean - wings, color=acolor, alpha=alpha, lw=0,**fill_kws)
1288
+ if line_kws != {} and not any(key.lower() in ['lw', 'linewidth'] for key in line_kws.keys()):
1289
+ line = ax.plot(x, yMean, color=acolor, lw=1.5, ls=plotStyle, marker=plotMarker, **line_kws)
1290
+ else:
1291
+ line = ax.plot(x, yMean, color=acolor, ls=plotStyle, marker=plotMarker, **line_kws)
1292
+ return line[0], fill
1293
+
1294
+
1295
+ # =============================================================================
1296
+ # # for plot figures {Qiu et al.2023}
1297
+ # =============================================================================
1298
+ # =============================================================================
1299
+ # plt.rcParams.update({'figure.max_open_warning': 0})
1300
+ # # Output matplotlib figure to SVG with text as text, not curves
1301
+ # plt.rcParams['svg.fonttype'] = 'none'
1302
+ # plt.rcParams['pdf.fonttype'] = 42
1303
+ #
1304
+ # plt.rc('text', usetex=False)
1305
+ # # plt.style.use('ggplot')
1306
+ # plt.style.use('science')
1307
+ # plt.rc('font', family='serif')
1308
+ # plt.rcParams.update({
1309
+ # "font.family": "serif", # specify font family here
1310
+ # "font.serif": ["Arial"], # specify font here
1311
+ # "font.size": 11})
1312
+ # # plt.tight_layout()
1313
+ # =============================================================================
1314
+ # =============================================================================
1315
+ # # axis spine
1316
+ # # use it like: adjust_spines(ax, ['left', 'bottom'])
1317
+ # =============================================================================
1318
+
1319
+
1320
+ def adjust_spines(ax=None, spines=['left', 'bottom'],distance=2):
1321
+ if ax is None:
1322
+ ax = plt.gca()
1323
+ for loc, spine in ax.spines.items():
1324
+ if loc in spines:
1325
+ spine.set_position(('outward', distance)) # outward by 2 points
1326
+ # spine.set_smart_bounds(True)
1327
+ else:
1328
+ spine.set_color('none') # don't draw spine
1329
+ # turn off ticks where there is no spine
1330
+ if 'left' in spines:
1331
+ ax.yaxis.set_ticks_position('left')
1332
+ else:
1333
+ ax.yaxis.set_ticks([])
1334
+ if 'bottom' in spines:
1335
+ ax.xaxis.set_ticks_position('bottom')
1336
+ else:
1337
+ # no xaxis ticks
1338
+ ax.xaxis.set_ticks([])
1339
+ # And then plot the data:
1340
+
1341
+ def add_colorbar(im, width=None, pad=None, **kwargs):
1342
+ # usage: add_colorbar(im, width=0.01, pad=0.005, label="PSD (dB)", shrink=0.8)
1343
+ l, b, w, h = im.axes.get_position().bounds # get boundaries
1344
+ width = width or 0.1 * w # get width of the colorbar
1345
+ pad = pad or width # get pad between im and cbar
1346
+ fig = im.axes.figure # get figure of image
1347
+ cax = fig.add_axes([l + w + pad, b, width, h]) # define cbar Axes
1348
+ return fig.colorbar(im, cax=cax, **kwargs) # draw cbar
1349
+ # =============================================================================
1350
+ # # for plot figures: setting rcParams
1351
+ # usage: set_pub()
1352
+ # or by setting sns.set_theme...see below:
1353
+ # sns.set_theme(style="ticks", rc=params) # 白色无刻度线,有坐标轴标度
1354
+ # # sns.set_theme(style="whitegrid", rc=params)# 白色+刻度线,无坐标轴标度
1355
+ # # sns.set_theme(style="white", rc=params) # 白色无刻度线,无坐标轴标度
1356
+ # # sns.set_theme(style="dark", rc=params) # 深色无刻度线,无坐标轴标度
1357
+ # =============================================================================
1358
+
1359
+
1360
+ def FuncCmpt(X1, X2, pmc='auto', pair='unpaired'):
1361
+ # output = {}
1362
+
1363
+ # pmc correction: 'parametric'/'non-parametric'/'auto'
1364
+ # meawhile get the opposite setting (to compare the results)
1365
+ def corr_pmc(pmc):
1366
+ cfg_pmc = None
1367
+ if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'npmc', 'nonparametric', 'non-parametric'}:
1368
+ cfg_pmc = 'parametric'
1369
+ elif pmc.lower() in {'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
1370
+ cfg_pmc = 'non-parametric'
1371
+ else:
1372
+ cfg_pmc = 'auto'
1373
+ return cfg_pmc
1374
+
1375
+ def corr_pair(pair):
1376
+ cfg_pair = None
1377
+ if 'pa' in pair.lower() and 'np' not in pair.lower():
1378
+ cfg_pair = 'paired'
1379
+ elif 'np' in pair.lower():
1380
+ cfg_pair = 'unpaired'
1381
+ return cfg_pair
1382
+
1383
+ def check_normality(data):
1384
+ stat_shapiro, pval_shapiro = stats.shapiro(data)
1385
+ if pval_shapiro > 0.05:
1386
+ Normality = True
1387
+ else:
1388
+ Normality = False
1389
+ print(f'\n normally distributed\n') if Normality else print(
1390
+ f'\n NOT normally distributed\n')
1391
+ return Normality
1392
+
1393
+ def sub_cmpt_2group(X1, X2, cfg_pmc='pmc', pair='unpaired'):
1394
+ output = {}
1395
+ nX1 = np.sum(~np.isnan(X1))
1396
+ nX2 = np.sum(~np.isnan(X2))
1397
+ if cfg_pmc == 'parametric' or cfg_pmc == 'auto':
1398
+ # VarType correction by checking variance Type via "levene"
1399
+ stat_lev, pval_lev = stats.levene(
1400
+ X1, X2, center='median', proportiontocut=0.05)
1401
+ VarType = True if pval_lev > 0.05 and nX1 == nX2 else False
1402
+
1403
+ if 'np' in pair: # 'unpaired'
1404
+ if VarType and Normality:
1405
+ # The independent t-test requires that the dependent variable is approximately normally
1406
+ # distributed within each group
1407
+ # Note: Technically, it is the residuals that need to be normally distributed, but for
1408
+ # an independent t-test, both will give you the same result.
1409
+ stat_value, pval= stats.ttest_ind(
1410
+ X1, X2, axis=0, equal_var=True, nan_policy='omit', alternative='two-sided')
1411
+ notes_stat = 'unpaired t test'
1412
+ notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
1413
+ else:
1414
+ # If the Levene's Test for Equality of Variances is statistically significant,
1415
+ # which indicates that the group variances are unequal in the population, you
1416
+ # can correct for this violation by not using the pooled estimate for the error
1417
+ # term for the t-statistic, but instead using an adjustment to the degrees of
1418
+ # freedom using the Welch-Satterthwaite method
1419
+ stat_value, pval= stats.ttest_ind(
1420
+ X1, X2, axis=0, equal_var=False, nan_policy='omit', alternative='two-sided')
1421
+ notes_stat = 'Welchs t-test'
1422
+ # note: APA FORMAT
1423
+ notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
1424
+ elif 'pa' in pair and 'np' not in pair: # 'paired'
1425
+ # the paired-samples t-test is considered “robust” in handling violations of normality
1426
+ # to some extent. It can still yield valid results even if the data is not normally
1427
+ # distributed. Therefore, this test typically requires only approximately normal data
1428
+ stat_value, pval= stats.ttest_rel(
1429
+ X1, X2, axis=0, nan_policy='omit', alternative='two-sided')
1430
+ notes_stat = 'paired t test'
1431
+ # note: APA FORMAT
1432
+ notes_APA = f't({sum([nX1-1])})={round(stat_value, 5)},p={round(pval, 5)}'
1433
+ elif cfg_pmc == 'non-parametric':
1434
+ if 'np' in pair: # Perform Mann-Whitney
1435
+ stat_value, pval = stats.mannwhitneyu(
1436
+ X1, X2, method='exact', nan_policy='omit')
1437
+ notes_stat = 'Mann-Whitney U'
1438
+ if nX1 == nX2:
1439
+ notes_APA = f'U(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
1440
+ else:
1441
+ notes_APA = f'U(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
1442
+ elif 'pa' in pair and 'np' not in pair: # Wilcoxon signed-rank test
1443
+ stat_value, pval = stats.wilcoxon(
1444
+ X1, X2, method='exact', nan_policy='omit')
1445
+ notes_stat = 'Wilcoxon signed-rank'
1446
+ if nX1 == nX2:
1447
+ notes_APA = f'Z(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
1448
+ else:
1449
+ notes_APA = f'Z(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
1450
+
1451
+ # filling output
1452
+ output['stat'] = stat_value
1453
+ output['pval'] = pval
1454
+ output['method'] = notes_stat
1455
+ output['APA'] = notes_APA
1456
+
1457
+ print(f"{output['method']}\n {notes_APA}\n\n")
1458
+
1459
+ return output, pval
1460
+
1461
+ Normality1 = check_normality(X1)
1462
+ Normality2 = check_normality(X2)
1463
+ Normality = True if all([Normality1, Normality2]) else False
1464
+
1465
+ nX1 = np.sum(~np.isnan(X1))
1466
+ nX2 = np.sum(~np.isnan(X2))
1467
+
1468
+ cfg_pmc = corr_pmc(pmc)
1469
+ cfg_pair = corr_pair(pair)
1470
+
1471
+ output, p = sub_cmpt_2group(
1472
+ X1, X2, cfg_pmc=cfg_pmc, pair=cfg_pair)
1473
+ return p, output
1474
+
1475
+
1476
+ # ======compare 2 group test===================================================
1477
+ # # Example
1478
+ # X1 = [19, 22, 16, 29, 24]
1479
+ # X2 = [20, 11, 17, 12, 22]
1480
+
1481
+ # p, res= FuncCmpt(X1, X2, pmc='pmc', pair='unparrr')
1482
+
1483
+ # =============================================================================
1484
+
1485
+ # =============================================================================
1486
+ # # method = ['anova', # 'One-way and N-way ANOVA',
1487
+ # # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
1488
+ # # 'mixed_anova', # 'Two way mixed ANOVA',
1489
+ # # 'welch_anova', # 'One-way Welch ANOVA',
1490
+ # # 'kruskal', # 'Non-parametric one-way ANOVA'
1491
+ # # 'friedman', # Non-parametric one-way repeated measures ANOVA
1492
+ # # ]
1493
+ # =============================================================================
1494
+
1495
+
1496
+ # =============================================================================
1497
+ # # method = ['anova', # 'One-way and N-way ANOVA',
1498
+ # # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
1499
+ # # 'mixed_anova', # 'Two way mixed ANOVA',
1500
+ # # 'welch_anova', # 'One-way Welch ANOVA',
1501
+ # # 'kruskal', # 'Non-parametric one-way ANOVA'
1502
+ # # 'friedman', # Non-parametric one-way repeated measures ANOVA
1503
+ # # ]
1504
+ # =============================================================================
1505
+ def df_wide_long(df):
1506
+ rows, columns = df.shape
1507
+ if columns > rows:
1508
+ return "Wide"
1509
+ elif rows > columns:
1510
+ return "Long"
1511
+
1512
+ def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
1513
+ ss_type=2, detailed=True, effsize='np2',
1514
+ correction='auto', between=None, within=None,
1515
+ subject=None, group=None
1516
+ ):
1517
+
1518
+ def corr_pair(pair):
1519
+ cfg_pair = None
1520
+ if 'pa' in pair.lower() and 'np' not in pair.lower():
1521
+ cfg_pair = 'paired'
1522
+ elif 'np' in pair.lower():
1523
+ cfg_pair = 'unpaired'
1524
+ elif 'mix' in pair.lower():
1525
+ cfg_pair = 'mix'
1526
+ return cfg_pair
1527
+
1528
+ def check_normality(data):
1529
+ stat_shapiro, pval_shapiro = stats.shapiro(data)
1530
+ if pval_shapiro > 0.05:
1531
+ Normality = True
1532
+ else:
1533
+ Normality = False
1534
+ print(f'\n normally distributed\n') if Normality else print(
1535
+ f'\n NOT normally distributed\n')
1536
+ return Normality
1537
+
1538
+ def corr_pmc(pmc):
1539
+ cfg_pmc = None
1540
+ if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'upmc', 'npmc', 'nonparametric', 'non-parametric'}:
1541
+ cfg_pmc = 'parametric'
1542
+ elif pmc.lower() in {'upmc', 'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
1543
+ cfg_pmc = 'non-parametric'
1544
+ else:
1545
+ cfg_pmc = 'auto'
1546
+ return cfg_pmc
1547
+
1548
+ def extract_apa(res_tab):
1549
+ notes_APA = []
1550
+ if "ddof1" in res_tab:
1551
+ for irow in range(res_tab.shape[0]):
1552
+ note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
1553
+ notes_APA.append([note_tmp])
1554
+ elif "DF" in res_tab:
1555
+ print(res_tab.shape[0])
1556
+ for irow in range(res_tab.shape[0]-1):
1557
+ note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
1558
+ notes_APA.append([note_tmp])
1559
+ notes_APA.append(['NaN'])
1560
+ elif "DF1" in res_tab: # in 'mix' case
1561
+ for irow in range(res_tab.shape[0]):
1562
+ note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF1[irow]),round(res_tab.DF2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
1563
+ notes_APA.append([note_tmp])
1564
+ return notes_APA
1565
+
1566
+ def anovatable(res_tab):
1567
+ if 'df' in res_tab: # statsmodels
1568
+ res_tab['mean_sq'] = res_tab[:]['sum_sq']/res_tab[:]['df']
1569
+ res_tab['est_sq'] = res_tab[:-1]['sum_sq'] / \
1570
+ sum(res_tab['sum_sq'])
1571
+ res_tab['omega_sq'] = (res_tab[:-1]['sum_sq']-(res_tab[:-1]['df'] *
1572
+ res_tab['mean_sq'][-1]))/(sum(res_tab['sum_sq'])+res_tab['mean_sq'][-1])
1573
+ elif 'DF' in res_tab:
1574
+ res_tab['MS'] = res_tab[:]['SS']/res_tab[:]['DF']
1575
+ res_tab['est_sq'] = res_tab[:-1]['SS']/sum(res_tab['SS'])
1576
+ res_tab['omega_sq'] = (res_tab[:-1]['SS']-(res_tab[:-1]['DF'] *
1577
+ res_tab['MS'][1]))/(sum(res_tab['SS'])+res_tab['MS'][1])
1578
+ if 'p-unc' in res_tab:
1579
+ if 'np2' in res_tab:
1580
+ res_tab['est_sq'] = res_tab['np2']
1581
+ if 'p-unc' in res_tab:
1582
+ res_tab['PR(>F)'] = res_tab['p-unc']
1583
+ return res_tab
1584
+
1585
+ def run_anova(data, dv, factor, ss_type=2, detailed=True, effsize='np2'):
1586
+ # perform ANOVA
1587
+ # =============================================================================
1588
+ # # # ANOVA (input: formula, dataset)
1589
+ # =============================================================================
1590
+ # # note: if the data is balanced (equal sample size for each group), Type 1, 2, and 3 sums of squares
1591
+ # # (typ parameter) will produce similar results.
1592
+ # lm = ols("values ~ C(group)", data=df).fit()
1593
+ # res_tab = anova_lm(lm, typ=ss_type)
1594
+
1595
+ # # however, it does not provide any effect size measures to tell if the
1596
+ # # statistical significance is meaningful. The function below calculates
1597
+ # # eta-squared () and omega-squared (). A quick note, is the exact same
1598
+ # # thing as except when coming from the ANOVA framework people call it ;
1599
+ # # is considered a better measure of effect size since it is unbiased in
1600
+ # # it's calculation by accounting for the degrees of freedom in the model.
1601
+ # # note: No effect sizes are calculated when using statsmodels.
1602
+ # # to calculate eta squared, use the sum of squares from the table
1603
+ # res_tab = anovatable(res_tab)
1604
+
1605
+ # =============================================================================
1606
+ # # alternativ for ANOVA
1607
+ # =============================================================================
1608
+ res_tab = pg.anova(dv=dv, between=factor, data=data,
1609
+ detailed=detailed, ss_type=ss_type, effsize=effsize)
1610
+ res_tab = anovatable(res_tab)
1611
+ return res_tab
1612
+
1613
+ def run_rmanova(data, dv, factor, subject, correction='auto', detailed=True, effsize='ng2'):
1614
+ # One-way repeated-measures ANOVA using a long-format dataset.
1615
+ res_tab = pg.rm_anova(data=data, dv=dv, within=factor,
1616
+ subject=subject, detailed=detailed, effsize=effsize)
1617
+ return res_tab
1618
+
1619
+ def run_welchanova(data, dv, factor):
1620
+ # When the groups are balanced and have equal variances, the optimal
1621
+ # post-hoc test is the Tukey-HSD test (pingouin.pairwise_tukey()). If the
1622
+ # groups have unequal variances, the Games-Howell test is more adequate
1623
+ # (pingouin.pairwise_gameshowell()). Results have been tested against R.
1624
+ res_tab = pg.welch_anova(data=data, dv=dv, between=factor)
1625
+ res_tab = anovatable(res_tab)
1626
+ return res_tab
1627
+
1628
+ def run_mixedanova(data, dv, between, within, subject, correction='auto', effsize='np2'):
1629
+ # Notes
1630
+ # Data are expected to be in long-format (even the repeated measures).
1631
+ # If your data is in wide-format, you can use the pandas.melt() function
1632
+ # to convert from wide to long format.
1633
+
1634
+ # Warning
1635
+ # If the between-subject groups are unbalanced(=unequal sample sizes), a
1636
+ # type II ANOVA will be computed. Note however that SPSS, JAMOVI and JASP
1637
+ # by default return a type III ANOVA, which may lead to slightly different
1638
+ # results.
1639
+ res_tab = pg.mixed_anova(data=data, dv=dv, within=within, subject=subject,
1640
+ between=between, correction=correction, effsize=effsize)
1641
+ res_tab = anovatable(res_tab)
1642
+ return res_tab
1643
+
1644
+ def run_friedman(data, dv, factor, subject, method='chisq'):
1645
+ # Friedman test for repeated measurements
1646
+ # The Friedman test is used for non-parametric (rank-based) one-way
1647
+ # repeated measures ANOVA
1648
+
1649
+ # check df form ('long' or 'wide')
1650
+ # df_long = data.melt(ignore_index=False).reset_index()
1651
+ # if data.describe().shape[1] >= df_long.describe().shape[1]:
1652
+ # res_tab = pg.friedman(data, method=method)
1653
+ # else:
1654
+ # res_tab = pg.friedman(data=df_long, dv='value',
1655
+ # within="variable", subject="index", method=method)
1656
+ if "Wide" in df_wide_long(data):
1657
+ df_long = data.melt(ignore_index=False).reset_index()
1658
+ res_tab = pg.friedman(data=df_long, dv='value',
1659
+ within="variable", subject="index", method=method)
1660
+ else:
1661
+ res_tab = pg.friedman(data, dv=dv, within=factor, subject=subject,method=method)
1662
+ res_tab = anovatable(res_tab)
1663
+ return res_tab
1664
+
1665
+ def run_kruskal(data, dv, factor):
1666
+ # Kruskal-Wallis H-test for independent samples
1667
+ res_tab = pg.kruskal(data=data, dv=dv, between=factor)
1668
+ res_tab = anovatable(res_tab)
1669
+ return res_tab
1670
+
1671
+ # Normality Check:
1672
+ # Conduct normality tests (Shapiro-Wilk) for each group.
1673
+ # If the data is approximately normally distributed, ANOVA is robust to
1674
+ # moderate departures from normality, especially with larger sample sizes.
1675
+
1676
+ # print(data[factor])
1677
+ # print(type(data[factor]))
1678
+ # print(len(data[factor].columns))
1679
+ # print(data[factor].nunique())
1680
+ # print(data[factor[0]])
1681
+ # print(data[factor[0]].unique())
1682
+ if group is None:
1683
+ group = factor
1684
+
1685
+ # print(f'\ngroup is :\n{data[group]},\ndv is :\n{dv}\n')
1686
+ norm_array = []
1687
+ for sub_group in data[group].unique():
1688
+ norm_curr = check_normality(
1689
+ data.loc[data[group] == sub_group, dv])
1690
+ norm_array.append(norm_curr)
1691
+ norm_all = True if all(norm_array) else False
1692
+
1693
+ # Homogeneity of Variances:
1694
+ # Check for homogeneity of variances (homoscedasticity) among groups.
1695
+ # Levene's test or Bartlett's test can be used for this purpose.
1696
+ # If variances are significantly different, consider transformations or use a
1697
+ # robust ANOVA method.
1698
+
1699
+ # # =============================================================================
1700
+ # # # method1: stats.levene
1701
+ # # =============================================================================
1702
+ # # data_array = []
1703
+ # # for sub_group in df["group"].unique():
1704
+ # # data_array.append(df.loc[df['group'] == sub_group, 'values'].values)
1705
+ # # print(data_array)
1706
+ # # variance_all = stats.levene(data_array[0],data_array[1],data_array[2])
1707
+
1708
+ # =============================================================================
1709
+ # # method2: pingouin.homoscedasticity
1710
+ # =============================================================================
1711
+ res_levene = None
1712
+ variance_all = pg.homoscedasticity(
1713
+ data, dv=dv, group=group, method='levene', alpha=0.05)
1714
+ res_levene = True if variance_all.iloc[0,1] > 0.05 else False
1715
+ # =============================================================================
1716
+ # # ANOVA Assumptions:
1717
+ # # Ensure that the assumptions of independence, homogeneity of variances, and
1718
+ # # normality are reasonably met before proceeding.
1719
+ # =============================================================================
1720
+ notes_norm = 'normally' if norm_all else 'NOT-normally'
1721
+ notes_variance = 'equal' if res_levene else 'unequal'
1722
+ print(f'Data is {notes_norm} distributed, shows {notes_variance} variance')
1723
+
1724
+ cfg_pmc = corr_pmc(pmc)
1725
+ cfg_pair = corr_pair(pair)
1726
+ output = {}
1727
+ if (cfg_pmc == 'parametric') or (cfg_pmc == 'auto'):
1728
+ if 'np' in cfg_pair: # 'unpaired'
1729
+ if cfg_pmc == 'auto':
1730
+ if norm_all:
1731
+ if res_levene:
1732
+ res_tab = run_anova(data, dv, factor, ss_type=ss_type,
1733
+ detailed=True, effsize='np2')
1734
+ notes_stat = f'{data[factor].nunique()} Way ANOVA'
1735
+ notes_APA = extract_apa(res_tab)
1736
+
1737
+ else:
1738
+ res_tab = run_welchanova(data, dv, factor)
1739
+ notes_stat = f'{data[factor].nunique()} Way Welch ANOVA'
1740
+ notes_APA = extract_apa(res_tab)
1741
+
1742
+ else:
1743
+
1744
+ res_tab = run_kruskal(data, dv, factor)
1745
+ notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
1746
+ notes_APA = extract_apa(res_tab)
1747
+
1748
+ elif cfg_pmc == 'parametric':
1749
+ res_tab = run_anova(data, dv, factor, ss_type=ss_type,
1750
+ detailed=True, effsize='np2')
1751
+ notes_stat = f'{data[factor].nunique()} Way ANOVA'
1752
+ notes_APA = extract_apa(res_tab)
1753
+
1754
+ elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
1755
+ res_tab = run_rmanova(data, dv, factor, subject, correction='auto',
1756
+ detailed=True, effsize='ng2')
1757
+ notes_stat = f'{data[factor].nunique()} Way Repeated measures ANOVA'
1758
+ notes_APA = extract_apa(res_tab)
1759
+
1760
+ elif 'mix' in cfg_pair or 'both' in cfg_pair:
1761
+ res_tab = run_mixedanova(data, dv, between, within, subject)
1762
+ # notes_stat = f'{len(sum(len(between)+sum(len(within))))} Way Mixed ANOVA'
1763
+ notes_stat = ""
1764
+ # n_inter = res_tab.loc(res_tab["Source"] == "Interaction")
1765
+ # print(n_inter)
1766
+ notes_APA = extract_apa(res_tab)
1767
+
1768
+ elif cfg_pmc == 'non-parametric':
1769
+ if 'np' in cfg_pair: # 'unpaired'
1770
+ res_tab = run_kruskal(data, dv, factor)
1771
+ notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
1772
+ notes_APA = f'H({res_tab.ddof1[0]},n={data.shape[0]})={round(res_tab.H[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
1773
+
1774
+ elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
1775
+ res_tab = run_friedman(data, dv, factor, subject, method='chisq')
1776
+ notes_stat = f'Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA'
1777
+ notes_APA = f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
1778
+
1779
+ # =============================================================================
1780
+ # # Post-hoc
1781
+ # Post-Hoc Tests (if significant):
1782
+ # If ANOVA indicates significant differences, perform post-hoc tests (e.g.,
1783
+ # Tukey's HSD, Bonferroni, or Scheffé) to identify which groups differ from each other.
1784
+ # # https://pingouin-stats.org/build/html/generated/pingouin.pairwise_tests.html
1785
+ # =============================================================================
1786
+ go_pmc = True if cfg_pmc == 'parametric' else False
1787
+ go_subject = subject if ('pa' in cfg_pair) and (
1788
+ 'np' not in cfg_pair) else None
1789
+ go_mix_between = between if ('mix' in cfg_pair) or (
1790
+ 'both' in cfg_pair) else None
1791
+ go_mix_between = None if ('pa' in cfg_pair) or (
1792
+ 'np' not in cfg_pair) else factor
1793
+ go_mix_within = within if ('mix' in cfg_pair) or (
1794
+ 'both' in cfg_pair) else None
1795
+ go_mix_within = factor if ('pa' in cfg_pair) or (
1796
+ 'np' not in cfg_pair) else None
1797
+
1798
+ if res_tab['p-unc'][0] <= .05:
1799
+ # Pairwise Comparisons
1800
+ method_post_hoc = [
1801
+ "bonf", # 'bonferroni', # : one-step correction
1802
+ "sidak", # one-step correction
1803
+ "holm", # step-down method using Bonferroni adjustments
1804
+ "fdr_bh", # Benjamini/Hochberg (non-negative)
1805
+ "fdr_by", # Benjamini/Yekutieli (negative)
1806
+ ]
1807
+ res_posthoc = pd.DataFrame()
1808
+ for met in method_post_hoc:
1809
+ post_curr = pg.pairwise_tests(data=data, dv=dv, between=go_mix_between, within=go_mix_within, subject=go_subject, parametric=go_pmc, marginal=True, alpha=0.05, alternative='two-sided',
1810
+ padjust=met)
1811
+
1812
+ res_posthoc = pd.concat([res_posthoc, post_curr],
1813
+ ignore_index=True)
1814
+ else:
1815
+ res_posthoc = None
1816
+ output['res_posthoc'] = res_posthoc
1817
+ # =============================================================================
1818
+ # # filling output
1819
+ # =============================================================================
1820
+
1821
+ pd.set_option('display.max_columns', None)
1822
+ output['stat'] = notes_stat
1823
+ # print(output['APA'])
1824
+ output['APA'] = notes_APA
1825
+ output['pval'] = res_tab['p-unc']
1826
+ output['res_tab'] = res_tab
1827
+ if res_tab.shape[0] == len(notes_APA):
1828
+ output['res_tab']['APA'] = output['APA'] # note APA in the table
1829
+ # print(output['stat'])
1830
+ # print(output['res_tab'])
1831
+
1832
+ return output
1833
+
1834
+
1835
+ # =============================================================================
1836
+ # # One-way ANOVA
1837
+ # =============================================================================
1838
+ # url = "http://stats191.stanford.edu/data/rehab.csv"
1839
+ # rehab_table = pd.read_table(url, delimiter=",")
1840
+ # rehab_table.to_csv("rehab.table")
1841
+ # fig, ax = plt.subplots(figsize=(8, 6))
1842
+ # fig = rehab_table.boxplot("Time", "Fitness", ax=ax, grid=False)
1843
+ # # fig, ax = plt.subplots(figsize=(8, 6))
1844
+ # # set_pub()
1845
+ # # sns.boxenplot(x="Time",y="Fitness",data = rehab_table)
1846
+
1847
+ # out2 = FuncMultiCmpt(pmc='pmc', pair='unpair',
1848
+ # data=rehab_table, dv='Time', factor='Fitness')
1849
+ # # print(out2['res_tab'])
1850
+ # # print(out2['APA'])
1851
+ # out2['res_posthoc']
1852
+ # out2['res_posthoc']['p-unc'][0]
1853
+ # out2['res_posthoc']['p-adjust'][0]
1854
+ # out2['res_posthoc']['p-corr'][0]
1855
+
1856
+
1857
+ # =============================================================================
1858
+ # # Interactions and ANOVA
1859
+ # https://www.statsmodels.org/dev/examples/notebooks/generated/interactions_anova.html
1860
+ # url = "http://stats191.stanford.edu/data/salary.table"
1861
+ # fh = urlopen(url)
1862
+ # df = pd.read_table(fh)
1863
+ # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
1864
+ # dv='S', factor=['X', 'E', 'M'], group='M')
1865
+ # # # two-way anova
1866
+ # # https://www.statology.org/two-way-anova-python/
1867
+ # # =============================================================================
1868
+ # # df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
1869
+ # # 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
1870
+ # # 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
1871
+ # # 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
1872
+ # # 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
1873
+ # # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
1874
+ # # dv='height', factor=['water','sun'],group='water')
1875
+
1876
+
1877
+ # =============================================================================
1878
+ # # two way anova
1879
+ # https://www.geeksforgeeks.org/how-to-perform-a-two-way-anova-in-python/
1880
+ # =============================================================================
1881
+ # df1=pd.DataFrame({'Fertilizer': np.repeat(['daily', 'weekly'], 15),
1882
+ # 'Watering': np.repeat(['daily', 'weekly'], 15),
1883
+ # 'height': [14, 16, 15, 15, 16, 13, 12, 11,
1884
+ # 14, 15, 16, 16, 17, 18, 14, 13,
1885
+ # 14, 14, 14, 15, 16, 16, 17, 18,
1886
+ # 14, 13, 14, 14, 14, 15]})
1887
+
1888
+ # df1['subject'] = np.tile(range(0, 15), (1, 2)).T
1889
+ # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df1,
1890
+ # dv='height', factor=['Fertilizer','Watering'],group='Watering')
1891
+ # # print(out1['stat'])
1892
+ # # print(out1['res_tab'])
1893
+
1894
+ # =============================================================================
1895
+ # # welch anova
1896
+ # https://www.geeksforgeeks.org/how-to-perform-welchs-anova-in-python/
1897
+ # =============================================================================
1898
+ # df = pd.DataFrame({'score': [64, 66, 68, 75, 78, 94, 98, 79, 71, 80,
1899
+ # 91, 92, 93, 90, 97, 94, 82, 88, 95, 96,
1900
+ # 79, 78, 88, 94, 92, 85, 83, 85, 82, 81],
1901
+ # 'group': np.repeat(['strat1', 'strat2', 'strat3'],
1902
+ # repeats=10)})
1903
+ # out1 = FuncMultiCmpt(pmc='auto',pair='unpaired',data=df, dv='score', factor='group', group='group')
1904
+ # =============================================================================
1905
+ # # two way anova
1906
+ # https://www.statology.org/two-way-anova-python/
1907
+ # =============================================================================
1908
+ # df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
1909
+ # 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
1910
+ # 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
1911
+ # 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
1912
+ # 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
1913
+ # df['subject'] = np.tile(range(0, 15), (1, 2)).T
1914
+ # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
1915
+ # dv='height', factor=['water', 'sun'], subject='subject', group='water')
1916
+ # # print(out1['stat'])
1917
+ # # print(out1['res_tab'])
1918
+
1919
+ # =============================================================================
1920
+ # # 3-way ANOVA
1921
+ # =============================================================================
1922
+ # df = pd.DataFrame({'program': np.repeat([1, 2], 20),
1923
+ # 'gender': np.tile(np.repeat(['M', 'F'], 10), 2),
1924
+ # 'division': np.tile(np.repeat([1, 2], 5), 4),
1925
+ # 'height': [7, 7, 8, 8, 7, 6, 6, 5, 6, 5,
1926
+ # 5, 5, 4, 5, 4, 3, 3, 4, 3, 3,
1927
+ # 6, 6, 5, 4, 5, 4, 5, 4, 4, 3,
1928
+ # 2, 2, 1, 4, 4, 2, 1, 1, 2, 1]})
1929
+ # df['subject'] = np.tile(range(0, 20), (1, 2)).T
1930
+ # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
1931
+ # dv='height', factor=['gender', 'program', 'division'], subject='subject', group='program')
1932
+ # # print(out1['stat'])
1933
+ # # print(out1['res_tab'])
1934
+
1935
+ # =============================================================================
1936
+ # # Repeated Measures ANOVA in Python
1937
+ # =============================================================================
1938
+ # df = pd.DataFrame({'patient': np.repeat([1, 2, 3, 4, 5], 4),
1939
+ # 'drug': np.tile([1, 2, 3, 4], 5),
1940
+ # 'response': [30, 28, 16, 34,
1941
+ # 14, 18, 10, 22,
1942
+ # 24, 20, 18, 30,
1943
+ # 38, 34, 20, 44,
1944
+ # 26, 28, 14, 30]})
1945
+ # # df['subject'] = np.tile(range(0, 20), (1, 2)).T
1946
+ # out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
1947
+ # dv='response', factor=['drug'], subject='patient', group='drug')
1948
+ # print(out1['stat'])
1949
+ # print(out1['res_tab'])
1950
+ # print(out1['APA'])
1951
+
1952
+ # =============================================================================
1953
+ # # repeated anova
1954
+ # https://www.geeksforgeeks.org/how-to-perform-a-repeated-measures-anova-in-python/
1955
+ # =============================================================================
1956
+ # df = pd.DataFrame({'Cars': np.repeat([1, 2, 3, 4, 5], 4),
1957
+ # 'Engine Oil': np.tile([1, 2, 3, 4], 5),
1958
+ # 'Mileage': [36, 38, 30, 29,
1959
+ # 34, 38, 30, 29,
1960
+ # 34, 28, 38, 32,
1961
+ # 38, 34, 20, 44,
1962
+ # 26, 28, 34, 50]})
1963
+ # out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
1964
+ # dv='Mileage', factor=['Engine Oil'], subject='Cars', group='Cars')
1965
+ # =============================================================================
1966
+ # #two-way repeated anova
1967
+ # =============================================================================
1968
+ # df = pd.read_csv(
1969
+ # "https://reneshbedre.github.io/assets/posts/anova/plants_leaves_two_within.csv")
1970
+ # df
1971
+ # # df['subject'] = np.tile(range(0, 20), (1, 2)).T
1972
+ # out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
1973
+ # dv='num_leaves', factor=['year', 'time'], subject='plants', group='year')
1974
+ # print(out1['stat'])
1975
+ # print(out1['res_tab'])
1976
+ # print(out1['APA'])
1977
+
1978
+ # =============================================================================
1979
+ # # repeated anova
1980
+ # =============================================================================
1981
+ # df = pd.read_csv('/Users/macjianfeng/Desktop/test.csv')
1982
+ # df.head()
1983
+ # df.loc[df['animal'].str.contains('Sleep'), 'experiment'] = 'sleep'
1984
+ # df.loc[df['animal'].str.contains('Wake'), 'experiment'] = 'wake'
1985
+ # df.loc[df['variable'].str.contains('hypo'), 'region'] = 'hypo'
1986
+ # df.loc[df['variable'].str.contains('cort'), 'region'] = 'cort'
1987
+ # df
1988
+ # for i in range(4):
1989
+ # match i:
1990
+ # case 0:
1991
+ # prot_name = 'A1'
1992
+ # case 1:
1993
+ # prot_name = 'A2'
1994
+ # case 2:
1995
+ # prot_name = '845'
1996
+ # case 3:
1997
+ # prot_name = '831'
1998
+ # df_tmp = df[df["variable"].str.contains(prot_name)]
1999
+ # df_tmp['protein'] = prot_name
2000
+ # df_tmp = df_tmp.reset_index()
2001
+ # print(df_tmp)
2002
+
2003
+ # out1 = FuncMultiCmpt(pmc='pmc', pair='mix', data=df_tmp,
2004
+ # dv='value', between='experiment', within='region', subject='animal', group='experiment')
2005
+ # print(out1['stat'])
2006
+ # print(out1['res_tab'])
2007
+ # # =============================================================================
2008
+ # One-way ANOVA
2009
+ # df1 = pd.read_csv('/Users/macjianfeng/Desktop/Book2.csv')
2010
+ # df2 = df1.melt()
2011
+ # out1 = FuncMultiCmpt(pmc='npmc', pair='unpaired', data=df2,
2012
+ # dv='libido', factor=['brand x', 'brand y', 'brand z'], subject='participant')
2013
+ # print(out1['stat'])
2014
+ # print(out1['res_tab'])
2015
+ # =============================================================================
2016
+
2017
+
2018
+ # =============================================================================
2019
+ # # #One-way ANOVA new example: https://www.pythonfordatascience.org/anova-python/
2020
+ # =============================================================================
2021
+ # df1 = pd.read_csv(
2022
+ # "https://raw.githubusercontent.com/researchpy/Data-sets/master/difficile.csv")
2023
+ # df1.drop('person', axis=1, inplace=True)
2024
+ # # Recoding value from numeric to string
2025
+ # df1['dose'].replace({1: 'placebo', 2: 'low', 3: 'high'}, inplace=True)
2026
+ # df1.head(10)
2027
+
2028
+ # out3= FuncMultiCmpt(pmc='pmc', data=df1, dv='libido', factor='dose')
2029
+ # # print(out3['res_tab'])
2030
+ # # # print(out3['res_posthoc'])
2031
+ # # print(out3['APA'])
2032
+
2033
+ # =============================================================================
2034
+ # https://lifewithdata.com/2023/06/08/how-to-perform-a-two-way-anova-in-python/
2035
+ # =============================================================================
2036
+ # data = {
2037
+ # 'Diet': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C'],
2038
+ # 'Workout': ['Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High'],
2039
+ # 'WeightLoss': [3, 4, 5, 3.2, 5, 6, 5.2, 6, 5.5, 4, 5.5, 6.2]
2040
+ # }
2041
+ # df = pd.DataFrame(data)
2042
+ # out4= FuncMultiCmpt(pmc='pmc', pair='unpaired',data=df, dv='WeightLoss', factor=['Diet','Workout'],group='Diet')
2043
+
2044
+ # =============================================================================
2045
+ # # convert to list to string
2046
+ # =============================================================================
2047
+ def list2str(x_str):
2048
+ s = ''.join(str(x) for x in x_str)
2049
+ return s
2050
+ def str2list(str_):
2051
+ l = []
2052
+ [l.append(x) for x in str_]
2053
+ return l
2054
+
2055
+ def load_img(fpath):
2056
+ """
2057
+ Load an image from the specified file path.
2058
+
2059
+ Args:
2060
+ fpath (str): The file path to the image.
2061
+
2062
+ Returns:
2063
+ PIL.Image: The loaded image.
2064
+
2065
+ Raises:
2066
+ FileNotFoundError: If the specified file is not found.
2067
+ OSError: If the specified file cannot be opened or is not a valid image file.
2068
+ """
2069
+ from PIL import Image
2070
+
2071
+ try:
2072
+ img = Image.open(fpath)
2073
+ return img
2074
+ except FileNotFoundError:
2075
+ raise FileNotFoundError(f"The file '{fpath}' was not found.")
2076
+ except OSError:
2077
+ raise OSError(f"Unable to open file '{fpath}' or it is not a valid image file.")
2078
+
2079
+ def apply_filter(img, *args):
2080
+ # def apply_filter(img, filter_name, filter_value=None):
2081
+ """
2082
+ Apply the specified filter to the image.
2083
+
2084
+ Args:
2085
+ img (PIL.Image): The input image.
2086
+ filter_name (str): The name of the filter to apply.
2087
+ **kwargs: Additional parameters specific to the filter.
2088
+
2089
+ Returns:
2090
+ PIL.Image: The filtered image.
2091
+ """
2092
+ def correct_filter_name(filter_name):
2093
+ if 'bl' in filter_name.lower() and 'box' not in filter_name.lower():
2094
+ return 'BLUR'
2095
+ elif 'cont' in filter_name.lower():
2096
+ return 'Contour'
2097
+ elif 'det' in filter_name.lower():
2098
+ return 'Detail'
2099
+ elif 'edg' in filter_name.lower() and 'mo' not in filter_name.lower() and 'f' not in filter_name.lower():
2100
+ return 'EDGE_ENHANCE'
2101
+ elif 'edg' in filter_name.lower() and 'mo' in filter_name.lower():
2102
+ return 'EDGE_ENHANCE_MORE'
2103
+ elif 'emb' in filter_name.lower():
2104
+ return 'EMBOSS'
2105
+ elif 'edg' in filter_name.lower() and 'f' in filter_name.lower():
2106
+ return 'FIND_EDGES'
2107
+ elif 'sh' in filter_name.lower() and 'mo' not in filter_name.lower():
2108
+ return 'SHARPEN'
2109
+ elif 'sm' in filter_name.lower() and 'mo' not in filter_name.lower():
2110
+ return 'SMOOTH'
2111
+ elif 'sm' in filter_name.lower() and 'mo' in filter_name.lower():
2112
+ return 'SMOOTH_MORE'
2113
+ elif 'min' in filter_name.lower():
2114
+ return 'MIN_FILTER'
2115
+ elif 'max' in filter_name.lower():
2116
+ return 'MAX_FILTER'
2117
+ elif 'mod' in filter_name.lower():
2118
+ return 'MODE_FILTER'
2119
+ elif 'mul' in filter_name.lower():
2120
+ return 'MULTIBAND_FILTER'
2121
+ elif 'gau' in filter_name.lower():
2122
+ return 'GAUSSIAN_BLUR'
2123
+ elif 'box' in filter_name.lower():
2124
+ return 'BOX_BLUR'
2125
+ elif 'med' in filter_name.lower():
2126
+ return 'MEDIAN_FILTER'
2127
+ else:
2128
+ supported_filters = [
2129
+ "BLUR",
2130
+ "CONTOUR",
2131
+ "DETAIL",
2132
+ "EDGE_ENHANCE",
2133
+ "EDGE_ENHANCE_MORE",
2134
+ "EMBOSS",
2135
+ "FIND_EDGES",
2136
+ "SHARPEN",
2137
+ "SMOOTH",
2138
+ "SMOOTH_MORE",
2139
+ "MIN_FILTER",
2140
+ "MAX_FILTER",
2141
+ "MODE_FILTER",
2142
+ "MULTIBAND_FILTER",
2143
+ "GAUSSIAN_BLUR",
2144
+ "BOX_BLUR",
2145
+ "MEDIAN_FILTER",
2146
+ ]
2147
+ raise ValueError(
2148
+ f"Unsupported filter: {filter_name}, should be one of: {supported_filters}"
2149
+ )
2150
+ for arg in args:
2151
+ if isinstance(arg, str):
2152
+ filter_name = arg
2153
+ filter_name = correct_filter_name(filter_name)
2154
+ else:
2155
+ filter_value = arg
2156
+ filter_name = filter_name.upper() # Ensure filter name is uppercase
2157
+
2158
+ # Supported filters
2159
+ supported_filters = {
2160
+ "BLUR": ImageFilter.BLUR,
2161
+ "CONTOUR": ImageFilter.CONTOUR,
2162
+ "DETAIL": ImageFilter.DETAIL,
2163
+ "EDGE_ENHANCE": ImageFilter.EDGE_ENHANCE,
2164
+ "EDGE_ENHANCE_MORE": ImageFilter.EDGE_ENHANCE_MORE,
2165
+ "EMBOSS": ImageFilter.EMBOSS,
2166
+ "FIND_EDGES": ImageFilter.FIND_EDGES,
2167
+ "SHARPEN": ImageFilter.SHARPEN,
2168
+ "SMOOTH": ImageFilter.SMOOTH,
2169
+ "SMOOTH_MORE": ImageFilter.SMOOTH_MORE,
2170
+ "MIN_FILTER": ImageFilter.MinFilter,
2171
+ "MAX_FILTER": ImageFilter.MaxFilter,
2172
+ "MODE_FILTER": ImageFilter.ModeFilter,
2173
+ "MULTIBAND_FILTER": ImageFilter.MultibandFilter,
2174
+ "GAUSSIAN_BLUR": ImageFilter.GaussianBlur,
2175
+ "BOX_BLUR": ImageFilter.BoxBlur,
2176
+ "MEDIAN_FILTER": ImageFilter.MedianFilter,
2177
+ }
2178
+ # Check if the filter name is supported
2179
+ if filter_name not in supported_filters:
2180
+ raise ValueError(
2181
+ f"Unsupported filter: {filter_name}, should be one of: {[i.lower() for i in supported_filters.keys()]}"
2182
+ )
2183
+
2184
+ # Apply the filter
2185
+ if filter_name.upper() in [
2186
+ "BOX_BLUR",
2187
+ "GAUSSIAN_BLUR",
2188
+ "MEDIAN_FILTER",
2189
+ "MIN_FILTER",
2190
+ "MAX_FILTER",
2191
+ "MODE_FILTER",
2192
+ ]:
2193
+ radius = filter_value if filter_value is not None else 2
2194
+ return img.filter(supported_filters[filter_name](radius))
2195
+ elif filter_name in ["MULTIBAND_FILTER"]:
2196
+ bands = filter_value if filter_value is not None else None
2197
+ return img.filter(supported_filters[filter_name](bands))
2198
+ else:
2199
+ if filter_value is not None:
2200
+ print(f"{filter_name} doesn't require a value for {filter_value}, but it remains unaffected")
2201
+ return img.filter(supported_filters[filter_name])
2202
+
2203
+
2204
+ def imgsets(
2205
+ img,
2206
+ sets=None,
2207
+ show=True,
2208
+ show_axis=False,
2209
+ size=None,
2210
+ dpi=100,
2211
+ figsize=None,
2212
+ auto=False,
2213
+ filter_kws=None,
2214
+ ):
2215
+ """
2216
+ Apply various enhancements and filters to an image using PIL's ImageEnhance and ImageFilter modules.
2217
+
2218
+ Args:
2219
+ img (PIL.Image): The input image.
2220
+ sets (dict): A dictionary specifying the enhancements, filters, and their parameters.
2221
+ show (bool): Whether to display the enhanced image.
2222
+ show_axis (bool): Whether to display axes on the image plot.
2223
+ size (tuple): The size of the thumbnail, cover, contain, or fit operation.
2224
+ dpi (int): Dots per inch for the displayed image.
2225
+ figsize (tuple): The size of the figure for displaying the image.
2226
+ auto (bool): Whether to automatically enhance the image based on its characteristics.
2227
+
2228
+ Returns:
2229
+ PIL.Image: The enhanced image.
2230
+
2231
+ Supported enhancements and filters:
2232
+ - "sharpness": Adjusts the sharpness of the image. Values > 1 increase sharpness, while values < 1 decrease sharpness.
2233
+ - "contrast": Adjusts the contrast of the image. Values > 1 increase contrast, while values < 1 decrease contrast.
2234
+ - "brightness": Adjusts the brightness of the image. Values > 1 increase brightness, while values < 1 decrease brightness.
2235
+ - "color": Adjusts the color saturation of the image. Values > 1 increase saturation, while values < 1 decrease saturation.
2236
+ - "rotate": Rotates the image by the specified angle.
2237
+ - "crop" or "cut": Crops the image. The value should be a tuple specifying the crop box as (left, upper, right, lower).
2238
+ - "size": Resizes the image to the specified dimensions.
2239
+ - "thumbnail": Resizes the image to fit within the given size while preserving aspect ratio.
2240
+ - "cover": Resizes and crops the image to fill the specified size.
2241
+ - "contain": Resizes the image to fit within the specified size, adding borders if necessary.
2242
+ - "fit": Resizes and pads the image to fit within the specified size.
2243
+ - "filter": Applies various filters to the image (e.g., BLUR, CONTOUR, EDGE_ENHANCE).
2244
+
2245
+ Note:
2246
+ The "color" and "enhance" enhancements are not implemented in this function.
2247
+ """
2248
+ def confirm_rembg_models(model_name):
2249
+ models_support = [
2250
+ "u2net",
2251
+ "u2netp",
2252
+ "u2net_human_seg",
2253
+ "u2net_cloth_seg",
2254
+ "silueta",
2255
+ "isnet-general-use",
2256
+ "isnet-anime",
2257
+ "sam",
2258
+ ]
2259
+ if model_name in models_support:
2260
+ print(f"model_name: {model_name}")
2261
+ return model_name
2262
+ else:
2263
+ print(f"{model_name} cannot be found, check the name:{models_support}, default('isnet-general-use') has been used")
2264
+ return "isnet-general-use"
2265
+ def auto_enhance(img):
2266
+ """
2267
+ Automatically enhances the image based on its characteristics.
2268
+
2269
+ Args:
2270
+ img (PIL.Image): The input image.
2271
+
2272
+ Returns:
2273
+ dict: A dictionary containing the optimal enhancement values.
2274
+ """
2275
+ # Determine the bit depth based on the image mode
2276
+ if img.mode in ["1", "L", "P", "RGB", "YCbCr", "LAB", "HSV"]:
2277
+ # 8-bit depth per channel
2278
+ bit_depth = 8
2279
+ elif img.mode in ["RGBA", "CMYK"]:
2280
+ # 8-bit depth per channel + alpha (RGBA) or additional channels (CMYK)
2281
+ bit_depth = 8
2282
+ elif img.mode in ["I", "F"]:
2283
+ # 16-bit depth per channel (integer or floating-point)
2284
+ bit_depth = 16
2285
+ else:
2286
+ raise ValueError("Unsupported image mode")
2287
+
2288
+ # Calculate the brightness and contrast for each channel
2289
+ num_channels = len(img.getbands())
2290
+ brightness_factors = []
2291
+ contrast_factors = []
2292
+ for channel in range(num_channels):
2293
+ channel_histogram = img.split()[channel].histogram()
2294
+ brightness = sum(i * w for i, w in enumerate(channel_histogram)) / sum(
2295
+ channel_histogram
2296
+ )
2297
+ channel_min, channel_max = img.split()[channel].getextrema()
2298
+ contrast = channel_max - channel_min
2299
+
2300
+ # Adjust calculations based on bit depth
2301
+ normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
2302
+ brightness_factor = (
2303
+ 1.0 + (brightness - normalization_factor / 2) / normalization_factor
2304
+ )
2305
+ contrast_factor = (
2306
+ 1.0 + (contrast - normalization_factor / 2) / normalization_factor
2307
+ )
2308
+
2309
+ brightness_factors.append(brightness_factor)
2310
+ contrast_factors.append(contrast_factor)
2311
+
2312
+ # Calculate the average brightness and contrast factors across channels
2313
+ avg_brightness_factor = sum(brightness_factors) / num_channels
2314
+ avg_contrast_factor = sum(contrast_factors) / num_channels
2315
+
2316
+ return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
2317
+
2318
+ # Load image if input is a file path
2319
+ if isinstance(img, str):
2320
+ img = load_img(img)
2321
+
2322
+ img_update = img.copy()
2323
+ # Auto-enhance image if requested
2324
+ if auto:
2325
+ auto_params = auto_enhance(img_update)
2326
+ sets.update(auto_params)
2327
+
2328
+ if sets is None:
2329
+ sets = {}
2330
+ for k, value in sets.items():
2331
+ if "shar" in k.lower():
2332
+ enhancer = ImageEnhance.Sharpness(img_update)
2333
+ img_update = enhancer.enhance(value)
2334
+ elif "col" in k.lower() and 'bg' not in k.lower():
2335
+ enhancer = ImageEnhance.Color(img_update)
2336
+ img_update = enhancer.enhance(value)
2337
+ elif "contr" in k.lower():
2338
+ if value and isinstance(value,(float,int)):
2339
+ enhancer = ImageEnhance.Contrast(img_update)
2340
+ img_update = enhancer.enhance(value)
2341
+ else:
2342
+ print('autocontrasted')
2343
+ img_update = ImageOps.autocontrast(img_update)
2344
+ elif "bri" in k.lower():
2345
+ enhancer = ImageEnhance.Brightness(img_update)
2346
+ img_update = enhancer.enhance(value)
2347
+ elif "cro" in k.lower() or "cut" in k.lower():
2348
+ img_update=img_update.crop(value)
2349
+ elif "rota" in k.lower():
2350
+ img_update = img_update.rotate(value)
2351
+ elif "si" in k.lower():
2352
+ img_update = img_update.resize(value)
2353
+ elif "thum" in k.lower():
2354
+ img_update.thumbnail(value)
2355
+ elif "cover" in k.lower():
2356
+ img_update = ImageOps.cover(img_update, size=value)
2357
+ elif "contain" in k.lower():
2358
+ img_update = ImageOps.contain(img_update, size=value)
2359
+ elif "fit" in k.lower():
2360
+ img_update = ImageOps.fit(img_update, size=value)
2361
+ elif "pad" in k.lower():
2362
+ img_update = ImageOps.pad(img_update, size=value)
2363
+ elif 'rem' in k.lower() or 'rm' in k.lower() or 'back' in k.lower():
2364
+ if value and isinstance(value,(int,float,list)):
2365
+ print('example usage: {"rm":[alpha_matting_background_threshold(20),alpha_matting_foreground_threshold(270),alpha_matting_erode_sive(11)]}')
2366
+ print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
2367
+ # ### Parameters:
2368
+ # data (Union[bytes, PILImage, np.ndarray]): The input image data.
2369
+ # alpha_matting (bool, optional): Flag indicating whether to use alpha matting. Defaults to False.
2370
+ # alpha_matting_foreground_threshold (int, optional): Foreground threshold for alpha matting. Defaults to 240.
2371
+ # alpha_matting_background_threshold (int, optional): Background threshold for alpha matting. Defaults to 10.
2372
+ # alpha_matting_erode_size (int, optional): Erosion size for alpha matting. Defaults to 10.
2373
+ # session (Optional[BaseSession], optional): A session object for the 'u2net' model. Defaults to None.
2374
+ # only_mask (bool, optional): Flag indicating whether to return only the binary masks. Defaults to False.
2375
+ # post_process_mask (bool, optional): Flag indicating whether to post-process the masks. Defaults to False.
2376
+ # bgcolor (Optional[Tuple[int, int, int, int]], optional): Background color for the cutout image. Defaults to None.
2377
+ # ###
2378
+ if isinstance(value,int):
2379
+ value=[value]
2380
+ if len(value) <2:
2381
+ img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value)
2382
+ elif 2<=len(value)<3:
2383
+ img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1])
2384
+ elif 3<=len(value)<4:
2385
+ img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1],alpha_matting_erode_size=value[2])
2386
+ if isinstance(value,tuple): # replace the background color
2387
+ if len(value)==3:
2388
+ value+=(255,)
2389
+ img_update = remove(img_update, bgcolor=value)
2390
+ if isinstance(value,str):
2391
+ if confirm_rembg_models(value):
2392
+ img_update=remove(img_update,session=new_session(value))
2393
+ else:
2394
+ img_update=remove(img_update)
2395
+ elif 'bgcolor' in k.lower():
2396
+ if isinstance(value,list):
2397
+ value=tuple(value)
2398
+ if isinstance(value,tuple): # replace the background color
2399
+ if len(value)==3:
2400
+ value+=(255,)
2401
+ img_update = remove(img_update, bgcolor=value)
2402
+
2403
+ if filter_kws:
2404
+ for filter_name, filter_value in filter_kws.items():
2405
+ img_update = apply_filter(img_update, filter_name, filter_value)
2406
+
2407
+
2408
+ # Display the image if requested
2409
+ if show:
2410
+ if figsize is None:
2411
+ plt.figure(dpi=dpi)
2412
+ else:
2413
+ plt.figure(figsize=figsize, dpi=dpi)
2414
+ plt.imshow(img_update)
2415
+ plt.axis("on") if show_axis else plt.axis("off")
2416
+
2417
+ return img_update
2418
+ # # usage:
2419
+ # img = imgsets(
2420
+ # fpath,
2421
+ # sets={"rota": -5},
2422
+ # dpi=200,
2423
+ # filter_kws={"EMBOSS": 5, "sharpen": 5, "EDGE_ENHANCE_MORE": 10},
2424
+ # show_axis=True,
2425
+ # )
2426
+
2427
+ def figsets(*args):
2428
+ fig = plt.gcf()
2429
+ fontsize = 11
2430
+ fontname = "Arial"
2431
+ sns_themes = ["white", "whitegrid", "dark", "darkgrid", "ticks"]
2432
+ sns_contexts = ["notebook", "talk", "poster"] # now available "paper"
2433
+ scienceplots_styles = ["science","nature",
2434
+ "scatter","ieee","no-latex","std-colors","high-vis","bright","dark_background","science",
2435
+ "high-vis","vibrant","muted","retro","grid","high-contrast","light","cjk-tc-font","cjk-kr-font",
2436
+ ]
2437
+
2438
+ def sets_priority(ax,key, value):
2439
+ if ("fo" in key) and (("size" in key) or ("sz" in key)):
2440
+ fontsize=value
2441
+ plt.rcParams.update({"font.size": value})
2442
+ # style
2443
+ if "st" in key.lower() or "th" in key.lower():
2444
+ if isinstance(value, str):
2445
+ if (value in plt.style.available) or (value in scienceplots_styles):
2446
+ plt.style.use(value)
2447
+ elif value in sns_themes:
2448
+ sns.set_style(value)
2449
+ elif value in sns_contexts:
2450
+ sns.set_context(value)
2451
+ else:
2452
+ print(
2453
+ f"\nWarning\n'{value}' is not a plt.style,select on below:\n{plt.style.available+sns_themes+sns_contexts+scienceplots_styles}"
2454
+ )
2455
+ if isinstance(value, list):
2456
+ for i in value:
2457
+ if (i in plt.style.available) or (i in scienceplots_styles):
2458
+ plt.style.use(i)
2459
+ elif i in sns_themes:
2460
+ sns.set_style(i)
2461
+ elif i in sns_contexts:
2462
+ sns.set_context(i)
2463
+ else:
2464
+ print(
2465
+ f"\nWarning\n'{i}' is not a plt.style,select on below:\n{plt.style.available+sns_themes+sns_contexts+scienceplots_styles}"
2466
+ )
2467
+ if "la" in key.lower():
2468
+ if "loc" in key.lower() or "po" in key.lower():
2469
+ for i in value:
2470
+ if "l" in i.lower():
2471
+ ax.yaxis.set_label_position("left")
2472
+ if "r" in i.lower():
2473
+ ax.yaxis.set_label_position("right")
2474
+ if "t" in i.lower():
2475
+ ax.xaxis.set_label_position("top")
2476
+ if "b" in i.lower():
2477
+ ax.xaxis.set_label_position("bottom")
2478
+ if ("x" in key.lower()) and (
2479
+ "tic" not in key.lower() and "tk" not in key.lower()
2480
+ ):
2481
+ ax.set_xlabel(value, fontname=fontname)
2482
+ if ("y" in key.lower()) and (
2483
+ "tic" not in key.lower() and "tk" not in key.lower()
2484
+ ):
2485
+ ax.set_ylabel(value, fontname=fontname)
2486
+ if ("z" in key.lower()) and (
2487
+ "tic" not in key.lower() and "tk" not in key.lower()
2488
+ ):
2489
+ ax.set_zlabel(value, fontname=fontname)
2490
+ # tick location
2491
+ if "tic" in key.lower() or "tk" in key.lower():
2492
+ if ("loc" in key.lower()) or ("po" in key.lower()):
2493
+ if isinstance(value, (str, list)):
2494
+ loc = []
2495
+ for i in value:
2496
+ if ("l" in i.lower()) and ("a" not in i.lower()):
2497
+ ax.yaxis.set_ticks_position("left")
2498
+ if "r" in i.lower():
2499
+ ax.yaxis.set_ticks_position("right")
2500
+ if "t" in i.lower():
2501
+ ax.xaxis.set_ticks_position("top")
2502
+ if "b" in i.lower():
2503
+ ax.xaxis.set_ticks_position("bottom")
2504
+ if i.lower() in ["a", "both", "all", "al", ":"]:
2505
+ ax.xaxis.set_ticks_position("both")
2506
+ ax.yaxis.set_ticks_position("both")
2507
+ if i.lower() in ["xnone",'xoff',"none"]:
2508
+ ax.xaxis.set_ticks_position("none")
2509
+ if i.lower() in ["ynone",'yoff','none']:
2510
+ ax.yaxis.set_ticks_position("none")
2511
+ # ticks / labels
2512
+ elif "x" in key.lower():
2513
+ if "la" not in key.lower():
2514
+ ax.set_xticks(value)
2515
+ if "la" in key.lower():
2516
+ ax.set_xticklabels(value)
2517
+ elif "y" in key.lower():
2518
+ if "la" not in key.lower():
2519
+ ax.set_yticks(value)
2520
+ if "la" in key.lower():
2521
+ ax.set_yticklabels(value)
2522
+ elif "z" in key.lower():
2523
+ if "la" not in key.lower():
2524
+ ax.set_zticks(value)
2525
+ if "la" in key.lower():
2526
+ ax.set_zticklabels(value)
2527
+ # rotation
2528
+ if "angle" in key.lower() or ("rot" in key.lower()):
2529
+ if "x" in key.lower():
2530
+ ax.tick_params(axis="x", rotation=value)
2531
+ if "y" in key.lower():
2532
+ ax.tick_params(axis="y", rotation=value)
2533
+
2534
+ if "bo" in key in key: # and ("p" in key or "l" in key):
2535
+ # print("'ticks' style is recommended")
2536
+ if isinstance(value, (str, list)):
2537
+ locations = []
2538
+ for i in value:
2539
+ if "l" in i.lower():
2540
+ locations.append("left")
2541
+ if "r" in i.lower():
2542
+ locations.append("right")
2543
+ if "t" in i.lower():
2544
+ locations.append("top")
2545
+ if "b" in i.lower():
2546
+ locations.append("bottom")
2547
+ if i.lower() in ["a", "both", "all", "al", ":"]:
2548
+ [
2549
+ locations.append(x)
2550
+ for x in ["left", "right", "top", "bottom"]
2551
+ ]
2552
+ for i in value:
2553
+ if i.lower() in "none":
2554
+ locations = []
2555
+ # check spines
2556
+ for loc, spi in ax.spines.items():
2557
+ if loc in locations:
2558
+ spi.set_position(("outward", 0))
2559
+ else:
2560
+ spi.set_color("none") # no spine
2561
+ if key == "tick" or key == "ticks" or key == "ticks_para":
2562
+ if isinstance(value, dict):
2563
+ for k, val in value.items():
2564
+ if "wh" in k.lower():
2565
+ ax.tick_params(
2566
+ which=val
2567
+ ) # {'major', 'minor', 'both'}, default: 'major'
2568
+ elif "dir" in k.lower():
2569
+ ax.tick_params(direction=val) # {'in', 'out', 'inout'}
2570
+ elif "len" in k.lower():
2571
+ ax.tick_params(length=val)
2572
+ elif ("wid" in k.lower()) or ("wd" in k.lower()):
2573
+ ax.tick_params(width=val)
2574
+ elif "ax" in k.lower():
2575
+ ax.tick_params(axis=val) # {'x', 'y', 'both'}, default: 'both'
2576
+ elif ("c" in k.lower()) and ("ect" not in k.lower()):
2577
+ ax.tick_params(colors=val) # Tick color.
2578
+ elif "pad" in k.lower():
2579
+ ax.tick_params(
2580
+ pad=val
2581
+ ) # float, distance in points between tick and label
2582
+ elif (
2583
+ ("lab" in k.lower())
2584
+ and ("s" in k.lower())
2585
+ and ("z" in k.lower())
2586
+ ):
2587
+ ax.tick_params(
2588
+ labelsize=val
2589
+ ) # float, distance in points between tick and label
2590
+
2591
+ if "mi" in key.lower() and "tic" in key.lower():
2592
+ if "x" in value.lower() or "x" in key.lower():
2593
+ ax.xaxis.set_minor_locator(tck.AutoMinorLocator()) # ax.minorticks_on()
2594
+ if "y" in value.lower() or "y" in key.lower():
2595
+ ax.yaxis.set_minor_locator(
2596
+ tck.AutoMinorLocator()
2597
+ ) # ax.minorticks_off()
2598
+ if value.lower() in ["both", ":", "all", "a", "b", "on"]:
2599
+ ax.minorticks_on()
2600
+ if key == "colormap" or key == "cmap":
2601
+ plt.set_cmap(value)
2602
+ def sets_small(ax,key, value):
2603
+ if key == "figsize":
2604
+ pass
2605
+ if key == "xlim":
2606
+ ax.set_xlim(value)
2607
+ if key == "ylim":
2608
+ ax.set_ylim(value)
2609
+ if key == "zlim":
2610
+ ax.set_zlim(value)
2611
+ if "sc" in key.lower():
2612
+ if "x" in key.lower():
2613
+ ax.set_xscale(value)
2614
+ if "y" in key.lower():
2615
+ ax.set_yscale(value)
2616
+ if "z" in key.lower():
2617
+ ax.set_zscale(value)
2618
+ if key == "grid":
2619
+ if isinstance(value, dict):
2620
+ for k, val in value.items():
2621
+ if "wh" in k.lower():
2622
+ ax.grid(
2623
+ which=val
2624
+ ) # {'major', 'minor', 'both'}, default: 'major'
2625
+ elif "ax" in k.lower():
2626
+ ax.grid(axis=val) # {'x', 'y', 'both'}, default: 'both'
2627
+ elif ("c" in k.lower()) and ("ect" not in k.lower()):
2628
+ ax.grid(color=val) # Tick color.
2629
+ elif "l" in k.lower() and ("s" in k.lower()):
2630
+ ax.grid(linestyle=val)
2631
+ elif "l" in k.lower() and ("w" in k.lower()):
2632
+ ax.grid(linewidth=val)
2633
+ elif "al" in k.lower():
2634
+ ax.grid(alpha=val)
2635
+ else:
2636
+ if value == "on" or value is True:
2637
+ ax.grid(visible=True)
2638
+ elif value == "off" or value is False:
2639
+ ax.grid(visible=False)
2640
+ if "tit" in key.lower():
2641
+ if "sup" in key.lower():
2642
+ plt.suptitle(value)
2643
+ else:
2644
+ ax.set_title(value)
2645
+ if key.lower() in ["spine", "adjust", "ad", "sp", "spi", "adj","spines"]:
2646
+ if isinstance(value, bool) or (value in ["go", "do", "ja", "yes"]):
2647
+ if value:
2648
+ adjust_spines(ax) # dafault distance=2
2649
+ if isinstance(value, (float, int)):
2650
+ adjust_spines(ax=ax, distance=value)
2651
+ if "c" in key.lower() and ("sp" in key.lower() or "ax" in key.lower()):
2652
+ for loc, spi in ax.spines.items():
2653
+ spi.set_color(value)
2654
+
2655
+ for arg in args:
2656
+ if isinstance(arg,matplotlib.axes._axes.Axes):
2657
+ ax=arg
2658
+ args=args[1:]
2659
+ if 'ax' not in locals():
2660
+ ax=plt.gca()
2661
+
2662
+ for arg in args:
2663
+ if isinstance(arg, dict):
2664
+ for k, val in arg.items():
2665
+ sets_priority(ax,k, val)
2666
+ for k, val in arg.items():
2667
+ sets_small(ax,k, val)
2668
+ else:
2669
+ Nargin = len(args) // 2
2670
+ ax.labelFontSizeMultiplier = 1
2671
+ ax.titleFontSizeMultiplier = 1
2672
+ ax.set_facecolor("w")
2673
+
2674
+ for ip in range(Nargin):
2675
+ key = args[ip * 2].lower()
2676
+ value = args[ip * 2 + 1]
2677
+ sets_priority(ax,key, value)
2678
+ for ip in range(Nargin):
2679
+ key = args[ip * 2].lower()
2680
+ value = args[ip * 2 + 1]
2681
+ sets_small(ax,key, value)
2682
+ colors = [
2683
+ "#474747",
2684
+ "#FF2C00",
2685
+ "#0C5DA5",
2686
+ "#845B97",
2687
+ "#58BBCC",
2688
+ "#FF9500",
2689
+ "#D57DBE",
2690
+ ]
2691
+ matplotlib.rcParams["axes.prop_cycle"] = cycler(color=colors)
2692
+ if len(fig.get_axes()) > 1:
2693
+ plt.tight_layout()
2694
+ plt.gcf().align_labels()
2695
+
2696
+ def read_mplstyle(style_file):
2697
+ # Load the style file
2698
+ plt.style.use(style_file)
2699
+
2700
+ # Get the current style properties
2701
+ style_dict = plt.rcParams
2702
+
2703
+ # Convert to dictionary
2704
+ style_dict = dict(style_dict)
2705
+ # Print the style dictionary
2706
+ for i, j in style_dict.items():
2707
+ print(f"\n{i}::::{j}")
2708
+ return style_dict
2709
+ # #example usage:
2710
+ # style_file = "/ std-colors.mplstyle"
2711
+ # style_dict = read_mplstyle(style_file)
2712
+
2713
+
2714
+ # search and fine the director of the libary, which installed at local
2715
+ def dir_lib(lib_oi):
2716
+ import site
2717
+
2718
+ # Get the site-packages directory
2719
+ f = listdir(site.getsitepackages()[0], "folder")
2720
+
2721
+ # Find Seaborn directory within site-packages
2722
+ dir_list = []
2723
+ for directory in f.fpath:
2724
+ if lib_oi in directory.lower():
2725
+ dir_list.append(directory)
2726
+
2727
+ if dir_list != []:
2728
+ print(f"{lib_oi} directory:", dir_list)
2729
+ else:
2730
+ print(f"Cannot find the {lib_oi} in site-packages directory.")
2731
+ return dir_list
2732
+ # example usage:
2733
+ # dir_lib("seaborn")
2734
+
2735
+ # set up the colorlist, give the number, or the colormap's name
2736
+ def get_color(n=1, cmap="auto", how="start"):
2737
+ # Extract the colormap as a list
2738
+ def cmap2hex(cmap_name):
2739
+ cmap_ = matplotlib.pyplot.get_cmap(cmap_name)
2740
+ colors = [cmap_(i) for i in range(cmap_.N)]
2741
+ return [matplotlib.colors.rgb2hex(color) for color in colors]
2742
+ # usage: clist = cmap2hex("viridis")
2743
+ # cycle times, total number is n (defaultn=10)
2744
+ def cycle2list(colorlist, n=10):
2745
+ cycler_ = cycler(tmp=colorlist)
2746
+ clist = []
2747
+ for i, c_ in zip(range(n), cycler_()):
2748
+ clist.append(c_["tmp"])
2749
+ if i > n:
2750
+ break
2751
+ return clist
2752
+ def hue2rgb(hex_colors):
2753
+ def hex_to_rgb(hex_color):
2754
+ """Converts a hexadecimal color code to RGB values."""
2755
+ if hex_colors.startswith("#"):
2756
+ hex_color = hex_color.lstrip("#")
2757
+ return tuple(int(hex_color[i : i + 2], 16) / 255.0 for i in (0, 2, 4))
2758
+ if isinstance(hex_colors, str):
2759
+ return hex_to_rgb(hex_colors)
2760
+ elif isinstance(hex_colors, (list)):
2761
+ """Converts a list of hexadecimal color codes to a list of RGB values."""
2762
+ rgb_values = [hex_to_rgb(hex_color) for hex_color in hex_colors]
2763
+ return rgb_values
2764
+ if "aut" in cmap:
2765
+ colorlist = [
2766
+ "#474747",
2767
+ "#FF2C00",
2768
+ "#0C5DA5",
2769
+ "#845B97",
2770
+ "#58BBCC",
2771
+ "#FF9500",
2772
+ "#D57DBE",
2773
+ ]
2774
+ else:
2775
+ colorlist = cmap2hex(cmap)
2776
+ if "st" in how.lower() or "be" in how.lower():
2777
+ # cycle it
2778
+ clist = cycle2list(colorlist, n=n)
2779
+ if "l" in how.lower() or "p" in how.lower():
2780
+ clist = []
2781
+ [
2782
+ clist.append(colorlist[i])
2783
+ for i in [int(i) for i in np.linspace(0, len(colorlist) - 1, n)]
2784
+ ]
2785
+
2786
+ return clist # a color list
2787
+ # example usage: clist = get_color(4,cmap="auto", how="start") # get_color(4, cmap="hot", how="linspace")
2788
+
2789
+ """
2790
+ # n = 7
2791
+ # clist = get_color(n, cmap="auto", how="linspace") # get_color(100)
2792
+ # plt.figure(figsize=[8, 5], dpi=100)
2793
+ # x = np.linspace(0, 2 * np.pi, 50) * 100
2794
+ # y = np.sin(x)
2795
+ # for i in range(1, n + 1):
2796
+ # plt.plot(x, y + i, c=clist[i - 1], lw=5, label=str(i))
2797
+ # plt.legend()
2798
+ # plt.ylim(-2, 20)
2799
+ # figsets(plt.gca(), {"style": "whitegrid"}) """
2800
+
2801
+
2802
+ class FileInfo:
2803
+ def __init__(self, size, creation_time, ctime, mod_time, mtime, parent_dir, fname, kind, extra_info=None):
2804
+ self.size = size
2805
+ self.creation_time = creation_time
2806
+ self.ctime = ctime
2807
+ self.mod_time = mod_time
2808
+ self.mtime = mtime
2809
+ self.parent_dir = parent_dir
2810
+ self.fname = fname
2811
+ self.kind = kind
2812
+ if extra_info:
2813
+ for key, value in extra_info.items():
2814
+ setattr(self, key, value)
2815
+ print("to show the res: 'finfo(fpath).show()'")
2816
+
2817
+ def __repr__(self):
2818
+ return (f"FileInfo(size={self.size} MB, creation_time='{self.creation_time}', "
2819
+ f"ctime='{self.ctime}', mod_time='{self.mod_time}', mtime='{self.mtime}', "
2820
+ f"parent_dir='{self.parent_dir}', fname='{self.fname}', kind='{self.kind}')")
2821
+
2822
+ def __str__(self):
2823
+ return (f"FileInfo:\n"
2824
+ f" Size: {self.size} MB\n"
2825
+ f" Creation Time: {self.creation_time}\n"
2826
+ f" CTime: {self.ctime}\n"
2827
+ f" Modification Time: {self.mod_time}\n"
2828
+ f" MTime: {self.mtime}\n"
2829
+ f" Parent Directory: {self.parent_dir}\n"
2830
+ f" File Name: {self.fname}\n"
2831
+ f" Kind: {self.kind}")
2832
+ def show(self):
2833
+ # Convert the object to a dictionary
2834
+ return {
2835
+ "size": self.size,
2836
+ "creation_time": self.creation_time,
2837
+ "ctime": self.ctime,
2838
+ "mod_time": self.mod_time,
2839
+ "mtime": self.mtime,
2840
+ "parent_dir": self.parent_dir,
2841
+ "fname": self.fname,
2842
+ "kind": self.kind,
2843
+ **{key: getattr(self, key) for key in vars(self) if key not in ["size", "creation_time", "ctime", "mod_time", "mtime", "parent_dir", "fname", "kind"]}
2844
+ }