py2ls 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/COMMIT_EDITMSG +1 -0
- py2ls/.git/FETCH_HEAD +1 -0
- py2ls/.git/HEAD +1 -0
- py2ls/.git/config +15 -0
- py2ls/.git/description +1 -0
- py2ls/.git/hooks/applypatch-msg.sample +15 -0
- py2ls/.git/hooks/commit-msg.sample +24 -0
- py2ls/.git/hooks/fsmonitor-watchman.sample +174 -0
- py2ls/.git/hooks/post-update.sample +8 -0
- py2ls/.git/hooks/pre-applypatch.sample +14 -0
- py2ls/.git/hooks/pre-commit.sample +49 -0
- py2ls/.git/hooks/pre-merge-commit.sample +13 -0
- py2ls/.git/hooks/pre-push.sample +53 -0
- py2ls/.git/hooks/pre-rebase.sample +169 -0
- py2ls/.git/hooks/pre-receive.sample +24 -0
- py2ls/.git/hooks/prepare-commit-msg.sample +42 -0
- py2ls/.git/hooks/push-to-checkout.sample +78 -0
- py2ls/.git/hooks/update.sample +128 -0
- py2ls/.git/index +0 -0
- py2ls/.git/info/exclude +6 -0
- py2ls/.git/logs/HEAD +1 -0
- py2ls/.git/logs/refs/heads/main +1 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
- py2ls/.git/logs/refs/remotes/origin/main +1 -0
- py2ls/.git/objects/25/b796accd261b9135fd32a2c00785f68edf6c46 +0 -0
- py2ls/.git/objects/36/b4a1b7403abc6c360f8fe2cb656ab945254971 +0 -0
- py2ls/.git/objects/3f/d6561300938afbb3d11976cf9c8f29549280d9 +0 -0
- py2ls/.git/objects/58/20a729045d4dc7e37ccaf8aa8eec126850afe2 +0 -0
- py2ls/.git/objects/60/f273eb1c412d916fa3f11318a7da7a9911b52a +0 -0
- py2ls/.git/objects/61/570cec8c061abe74121f27f5face6c69b98f99 +0 -0
- py2ls/.git/objects/69/13c452ca319f7cbf6a0836dc10a5bb033c84e4 +0 -0
- py2ls/.git/objects/78/3d4167bc95c9d2175e0df03ef1c1c880ba75ab +0 -0
- py2ls/.git/objects/79/7ae089b2212a937840e215276005ce76881307 +0 -0
- py2ls/.git/objects/7e/5956c806b5edc344d46dab599dec337891ba1f +1 -0
- py2ls/.git/objects/8e/55a7d2b96184030211f20c9b9af201eefcac82 +0 -0
- py2ls/.git/objects/91/c69ad88fe0ba94aa7859fb5f7edac5e6f1a3f7 +0 -0
- py2ls/.git/objects/b0/56be4be89ba6b76949dd641df45bb7036050c8 +0 -0
- py2ls/.git/objects/b0/9cd7856d58590578ee1a4f3ad45d1310a97f87 +0 -0
- py2ls/.git/objects/d9/005f2cc7fc4e65f14ed5518276007c08cf2fd0 +0 -0
- py2ls/.git/objects/df/e0770424b2a19faf507a501ebfc23be8f54e7b +0 -0
- py2ls/.git/objects/e9/391ffe371f1cc43b42ef09b705d9c767c2e14f +0 -0
- py2ls/.git/objects/fc/292e793ecfd42240ac43be407023bd731fa9e7 +0 -0
- py2ls/.git/refs/heads/main +1 -0
- py2ls/.git/refs/remotes/origin/HEAD +1 -0
- py2ls/.git/refs/remotes/origin/main +1 -0
- py2ls/.gitattributes +2 -0
- py2ls/.gitignore +152 -0
- py2ls/LICENSE +201 -0
- py2ls/README.md +409 -0
- py2ls/__init__.py +17 -0
- py2ls/brain_atlas.py +145 -0
- py2ls/correlators.py +475 -0
- py2ls/dbhandler.py +97 -0
- py2ls/freqanalysis.py +800 -0
- py2ls/internet_finder.py +405 -0
- py2ls/ips.py +2844 -0
- py2ls/netfinder.py +780 -0
- py2ls/sleep_events_detectors.py +1350 -0
- py2ls/translator.py +686 -0
- py2ls/version.py +1 -0
- py2ls/wb_detector.py +169 -0
- py2ls-0.1.0.dist-info/METADATA +12 -0
- py2ls-0.1.0.dist-info/RECORD +64 -0
- py2ls-0.1.0.dist-info/WHEEL +4 -0
py2ls/ips.py
ADDED
@@ -0,0 +1,2844 @@
|
|
1
|
+
from scipy.ndimage import convolve1d
|
2
|
+
import numpy as np
|
3
|
+
import pandas as pd
|
4
|
+
import json
|
5
|
+
import matplotlib.pyplot as plt
|
6
|
+
# from functools import partial
|
7
|
+
import seaborn as sns
|
8
|
+
import scienceplots
|
9
|
+
import matplotlib
|
10
|
+
import sys
|
11
|
+
import os
|
12
|
+
from scipy.signal import savgol_filter
|
13
|
+
import pingouin as pg
|
14
|
+
from scipy import stats
|
15
|
+
import matplotlib.ticker as tck
|
16
|
+
from cycler import cycler
|
17
|
+
import re
|
18
|
+
from PIL import ImageEnhance, ImageOps,ImageFilter
|
19
|
+
from rembg import remove,new_session
|
20
|
+
from mpl_toolkits.mplot3d import Axes3D
|
21
|
+
import docx
|
22
|
+
import pandas as pd
|
23
|
+
from fpdf import FPDF
|
24
|
+
import yaml
|
25
|
+
from lxml import etree
|
26
|
+
from docx import Document
|
27
|
+
from PyPDF2 import PdfReader
|
28
|
+
from pdf2image import convert_from_path, pdfinfo_from_path
|
29
|
+
from nltk.tokenize import sent_tokenize,word_tokenize
|
30
|
+
import nltk # nltk.download("punkt")
|
31
|
+
from docx2pdf import convert
|
32
|
+
import img2pdf as image2pdf
|
33
|
+
import pprint
|
34
|
+
from itertools import pairwise
|
35
|
+
import time
|
36
|
+
from box import Box, BoxList
|
37
|
+
from numerizer import numerize
|
38
|
+
from tqdm import tqdm
|
39
|
+
|
40
|
+
def str2num(s, *args):
|
41
|
+
delimiter = None
|
42
|
+
round_digits = None
|
43
|
+
|
44
|
+
for arg in args:
|
45
|
+
if isinstance(arg, str):
|
46
|
+
delimiter = arg
|
47
|
+
elif isinstance(arg, int):
|
48
|
+
round_digits = arg
|
49
|
+
|
50
|
+
try:
|
51
|
+
num = int(s)
|
52
|
+
except ValueError:
|
53
|
+
try:
|
54
|
+
num = float(s)
|
55
|
+
except ValueError:
|
56
|
+
try:
|
57
|
+
numerized = numerize(s)
|
58
|
+
num = int(numerized) if '.' not in numerized else float(numerized)
|
59
|
+
except Exception as e:
|
60
|
+
# Attempt to handle multiple number segments
|
61
|
+
try:
|
62
|
+
number_segments = ssplit(s,by='number_strings')
|
63
|
+
nums = []
|
64
|
+
for segment in number_segments:
|
65
|
+
try:
|
66
|
+
nums.append(str2num(segment))
|
67
|
+
except ValueError:
|
68
|
+
continue
|
69
|
+
if len(nums) == 1:
|
70
|
+
num = nums[0]
|
71
|
+
else:
|
72
|
+
raise ValueError("Multiple number segments found, cannot determine single numeric value")
|
73
|
+
except Exception as e:
|
74
|
+
raise ValueError(f"Cannot convert {s} to a number: {e}")
|
75
|
+
|
76
|
+
# Apply rounding if specified
|
77
|
+
if round_digits is not None:
|
78
|
+
num_adj = num + 0.00000000001 # Ensure precise rounding
|
79
|
+
num = round(num_adj, round_digits)
|
80
|
+
|
81
|
+
# Apply delimiter formatting if specified
|
82
|
+
if delimiter is not None:
|
83
|
+
num_str = f"{num:,}".replace(",", delimiter)
|
84
|
+
return num_str
|
85
|
+
|
86
|
+
return num
|
87
|
+
# Examples
|
88
|
+
# print(str2num("123")) # Output: 123
|
89
|
+
# print(str2num("123.456", 2)) # Output: 123.46
|
90
|
+
# print(str2num("one hundred and twenty three")) # Output: 123
|
91
|
+
# print(str2num("seven million")) # Output: 7000000
|
92
|
+
# print(str2num('one thousand thirty one',',')) # Output: 1,031
|
93
|
+
# print(str2num("12345.6789", ",")) # Output: 12,345.6789
|
94
|
+
# print(str2num("12345.6789", " ", 2)) # Output: 12 345.68
|
95
|
+
# print(str2num('111113.34555',3,',')) # Output: 111,113.346
|
96
|
+
# print(str2num("123.55555 sec miniuets",3)) # Output: 1.3
|
97
|
+
def num2str(num, *args):
|
98
|
+
delimiter = None
|
99
|
+
round_digits = None
|
100
|
+
|
101
|
+
# Parse additional arguments
|
102
|
+
for arg in args:
|
103
|
+
if isinstance(arg, str):
|
104
|
+
delimiter = arg
|
105
|
+
elif isinstance(arg, int):
|
106
|
+
round_digits = arg
|
107
|
+
|
108
|
+
# Apply rounding if specified
|
109
|
+
if round_digits is not None:
|
110
|
+
num = round(num, round_digits)
|
111
|
+
|
112
|
+
# Convert number to string
|
113
|
+
num_str = f"{num}"
|
114
|
+
|
115
|
+
# Apply delimiter if specified
|
116
|
+
if delimiter is not None:
|
117
|
+
num_str = num_str.replace(".", ",") # Replace decimal point with comma
|
118
|
+
num_str_parts = num_str.split(",")
|
119
|
+
if len(num_str_parts) > 1:
|
120
|
+
integer_part = num_str_parts[0]
|
121
|
+
decimal_part = num_str_parts[1]
|
122
|
+
integer_part = "{:,}".format(int(integer_part))
|
123
|
+
num_str = integer_part + "." + decimal_part
|
124
|
+
else:
|
125
|
+
num_str = "{:,}".format(int(num_str_parts[0]))
|
126
|
+
|
127
|
+
return num_str
|
128
|
+
# Examples
|
129
|
+
# print(num2str(123),type(num2str(123))) # Output: "123"
|
130
|
+
# print(num2str(123.456, 2),type(num2str(123.456, 2))) # Output: "123.46"
|
131
|
+
# print(num2str(7000.125, 2),type(num2str(7000.125, 2))) # Output: "7000.13"
|
132
|
+
# print(num2str(12345.6789, ","),type(num2str(12345.6789, ","))) # Output: "12,345.6789"
|
133
|
+
# print(num2str(7000.00, ","),type(num2str(7000.00, ","))) # Output: "7,000.00"
|
134
|
+
def sreplace(text, dict_replace=None, robust=True):
|
135
|
+
"""
|
136
|
+
Replace specified substrings in the input text with provided replacements.
|
137
|
+
Args:
|
138
|
+
text (str): The input text where replacements will be made.
|
139
|
+
dict_replace (dict, optional): A dictionary containing substrings to be replaced as keys
|
140
|
+
and their corresponding replacements as values. Defaults to {".com": "..come", "\n": " ", "\t": " ", " ": " "}.
|
141
|
+
robust (bool, optional): If True, additional default replacements for newline and tab characters will be applied.
|
142
|
+
Default is False.
|
143
|
+
Returns:
|
144
|
+
str: The text after replacements have been made.
|
145
|
+
"""
|
146
|
+
# Default replacements for newline and tab characters
|
147
|
+
default_replacements = {
|
148
|
+
"\a": "",
|
149
|
+
"\b": "",
|
150
|
+
"\f": "",
|
151
|
+
"\n": "",
|
152
|
+
"\r": "",
|
153
|
+
"\t": "",
|
154
|
+
"\v": "",
|
155
|
+
"\\": "", # Corrected here
|
156
|
+
# "\?": "",
|
157
|
+
"�": "",
|
158
|
+
"\\x": "", # Corrected here
|
159
|
+
"\\x hhhh": "",
|
160
|
+
"\\ ooo": "", # Corrected here
|
161
|
+
"\xa0": "",
|
162
|
+
" ": " ",
|
163
|
+
}
|
164
|
+
|
165
|
+
# If dict_replace is None, use the default dictionary
|
166
|
+
if dict_replace is None:
|
167
|
+
dict_replace = {}
|
168
|
+
|
169
|
+
# If robust is True, update the dictionary with default replacements
|
170
|
+
if robust:
|
171
|
+
dict_replace.update(default_replacements)
|
172
|
+
|
173
|
+
# Iterate over each key-value pair in the dictionary and replace substrings accordingly
|
174
|
+
for k, v in dict_replace.items():
|
175
|
+
text = text.replace(k, v)
|
176
|
+
return text
|
177
|
+
# usage:
|
178
|
+
# sreplace(text, dict_replace=dict(old_str='new_str'), robust=True)
|
179
|
+
|
180
|
+
def paper_size(paper_type_str='a4'):
|
181
|
+
df=pd.DataFrame({'a0':[841,1189],'a1':[594,841],'a2':[420,594],'a3':[297,420],'a4':[210,297],'a5':[148,210],'a6':[105,148],'a7':[74,105],
|
182
|
+
'b0':[1028,1456],'b1':[707,1000],'b2':[514,728],'b3':[364,514],'b4':[257,364],'b5':[182,257],'b6':[128,182],
|
183
|
+
'letter': [215.9, 279.4],'legal':[215.9, 355.6],'business card':[85.6, 53.98],
|
184
|
+
'photo china passport':[33,48],'passport single':[125,88],'visa':[105,74],'sim':[25,15]})
|
185
|
+
for name in df.columns:
|
186
|
+
if paper_type_str in name.lower():
|
187
|
+
paper_type=name
|
188
|
+
if not paper_type:
|
189
|
+
paper_type='a4' # default
|
190
|
+
return df[paper_type].tolist()
|
191
|
+
def docx2pdf(dir_docx, dir_pdf=None):
|
192
|
+
if dir_pdf:
|
193
|
+
convert(dir_docx,dir_pdf)
|
194
|
+
else:
|
195
|
+
convert(dir_docx)
|
196
|
+
|
197
|
+
def img2pdf(dir_img, kind="jpeg",page=None, dir_save=None, page_size="a4", dpi=300):
|
198
|
+
def mm_to_point(size):
|
199
|
+
return (image2pdf.mm_to_pt(size[0]),image2pdf.mm_to_pt(size[1]))
|
200
|
+
def set_dpi(x):
|
201
|
+
dpix=dpiy=x
|
202
|
+
return image2pdf.get_fixed_dpi_layout_fun((dpix, dpiy))
|
203
|
+
if not kind.startswith("."):
|
204
|
+
kind="."+kind
|
205
|
+
if dir_save is None:
|
206
|
+
dir_save = dir_img.replace(kind,'.pdf')
|
207
|
+
imgs = []
|
208
|
+
if os.path.isdir(dir_img):
|
209
|
+
if not dir_save.endswith(".pdf"):
|
210
|
+
dir_save+="#merged_img2pdf.pdf"
|
211
|
+
if page is None:
|
212
|
+
select_range = listdir(dir_img,kind=kind).fpath
|
213
|
+
else:
|
214
|
+
if not isinstance(page, (np.ndarray,list,range)):
|
215
|
+
page=[page]
|
216
|
+
select_range = listdir(dir_img,kind=kind)['fpath'][page]
|
217
|
+
for fname in select_range:
|
218
|
+
if not fname.endswith(kind):
|
219
|
+
continue
|
220
|
+
path = os.path.join(dir_img, fname)
|
221
|
+
if os.path.isdir(path):
|
222
|
+
continue
|
223
|
+
imgs.append(path)
|
224
|
+
else:
|
225
|
+
imgs=[os.path.isdir(dir_img),dir_img]
|
226
|
+
|
227
|
+
if page_size:
|
228
|
+
if isinstance(page_size,str):
|
229
|
+
pdf_in_mm=mm_to_point(paper_size(page_size))
|
230
|
+
else:
|
231
|
+
print("default: page_size = (210,297)")
|
232
|
+
pdf_in_mm=mm_to_point(page_size)
|
233
|
+
print(f"page size was set to {page_size}")
|
234
|
+
p_size= image2pdf.get_layout_fun(pdf_in_mm)
|
235
|
+
else:
|
236
|
+
p_size = set_dpi(dpi)
|
237
|
+
with open(dir_save,"wb") as f:
|
238
|
+
f.write(image2pdf.convert(imgs, layout_fun=p_size))
|
239
|
+
# usage:
|
240
|
+
# dir_img="/Users/macjianfeng/Dropbox/00-Personal/2015-History/2012-2015_兰州大学/120901-大学课件/生物统计学 陆卫/复习题/"
|
241
|
+
# img2pdf(dir_img,kind='tif', page=range(3,7,2))
|
242
|
+
def ssplit(text, by="space", verbose=False, **kws):
|
243
|
+
if isinstance(text, list):
|
244
|
+
nested_list= [ssplit(i,by=by,verbose=verbose,**kws) for i in text]
|
245
|
+
flat_list = [item for sublist in nested_list for item in sublist]
|
246
|
+
return flat_list
|
247
|
+
def split_by_word_length(text, length):
|
248
|
+
return [word for word in text.split() if len(word) == length]
|
249
|
+
|
250
|
+
def split_by_multiple_delimiters(text, delimiters):
|
251
|
+
regex_pattern = "|".join(map(re.escape, delimiters))
|
252
|
+
return re.split(regex_pattern, text)
|
253
|
+
|
254
|
+
def split_by_camel_case(text):
|
255
|
+
return re.findall(r"[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))", text)
|
256
|
+
|
257
|
+
def split_at_upper_fl_lower(text):
|
258
|
+
return re.findall(r"[A-Z](?:[a-z]+|[A-Z]+(?=[A-Z]|$))", text)
|
259
|
+
|
260
|
+
def split_at_lower_fl_upper(text):
|
261
|
+
split_text = re.split(r"(?<=[a-z])(?=[A-Z])", text)
|
262
|
+
return split_text
|
263
|
+
|
264
|
+
def split_at_upper(text):
|
265
|
+
split_text = re.split(r"(?=[A-Z])", text)
|
266
|
+
split_text = [part for part in split_text if part]
|
267
|
+
return split_text
|
268
|
+
|
269
|
+
def split_by_regex_lookahead(text, pattern):
|
270
|
+
return re.split(f'(?<={pattern})', text)
|
271
|
+
|
272
|
+
def split_by_regex_end(text, pattern):
|
273
|
+
return re.split(f'(?={pattern})', text)
|
274
|
+
|
275
|
+
# def split_by_sentence_endings(text):
|
276
|
+
# return re.split(r"(?<=[.!?])", text)
|
277
|
+
def split_non_ascii(text):
|
278
|
+
# return re.split(r"([^\x00-\x7F\w\s,.!?:\"'()\-]+)", text)
|
279
|
+
# return re.split(r"[^\x00-\x7F]+", text)
|
280
|
+
return re.split(r"([^\x00-\x7F]+)", text)
|
281
|
+
def split_by_consecutive_non_alphanumeric(text):
|
282
|
+
return re.split(r"\W+", text)
|
283
|
+
|
284
|
+
def split_by_fixed_length_chunks(text, length):
|
285
|
+
return [text[i : i + length] for i in range(0, len(text), length)]
|
286
|
+
def split_by_sent_num(text,n=10):
|
287
|
+
# split text into sentences
|
288
|
+
text_split_by_sent=sent_tokenize(text)
|
289
|
+
cut_loc_array=np.arange(0,len(text_split_by_sent),n)
|
290
|
+
if cut_loc_array[-1]!=len(text_split_by_sent):
|
291
|
+
cut_loc=np.append(cut_loc_array,len(text_split_by_sent))
|
292
|
+
else:
|
293
|
+
cut_loc = cut_loc_array
|
294
|
+
# get text in section (e.g., every 10 sentences)
|
295
|
+
text_section=[]
|
296
|
+
for i,j in pairwise(cut_loc):
|
297
|
+
text_section.append(text_split_by_sent[i:j])
|
298
|
+
return text_section
|
299
|
+
def split_general(text, by, verbose=False, ignore_case=False):
|
300
|
+
if ignore_case:
|
301
|
+
if verbose:
|
302
|
+
print(f"used {by} to split, ignore_case=True")
|
303
|
+
pattern = re.compile(re.escape(by), re.IGNORECASE)
|
304
|
+
split_text = pattern.split(text)
|
305
|
+
return split_text
|
306
|
+
else:
|
307
|
+
if verbose:
|
308
|
+
print(f"used {by} to split, ignore_case=False")
|
309
|
+
return text.split(by)
|
310
|
+
def reg_split(text, pattern):
|
311
|
+
return re.split(pattern, text)
|
312
|
+
if "sp" in by or "white" in by:
|
313
|
+
if verbose:
|
314
|
+
print(f"splited by space")
|
315
|
+
return text.split()
|
316
|
+
elif "word" in by and "len" in by:
|
317
|
+
if verbose:
|
318
|
+
print(f"split_by_word_length(text, length)")
|
319
|
+
return split_by_word_length(text, **kws) # split_by_word_length(text, length)
|
320
|
+
elif "," in by:
|
321
|
+
if verbose:
|
322
|
+
print(f"splited by ','")
|
323
|
+
return text.split(",")
|
324
|
+
elif isinstance(by, list):
|
325
|
+
if verbose:
|
326
|
+
print(f"split_by_multiple_delimiters: ['|','&']")
|
327
|
+
return split_by_multiple_delimiters(text, by)
|
328
|
+
elif all([("digi" in by or "num" in by),not 'sent' in by, not 'str' in by]):
|
329
|
+
if verbose:
|
330
|
+
print(f"splited by digital (numbers)")
|
331
|
+
return re.split(r"(\d+)", text)
|
332
|
+
elif all([("digi" in by or "num" in by), 'str' in by]):
|
333
|
+
if verbose:
|
334
|
+
print(f"Splitting by (number strings)")
|
335
|
+
pattern = re.compile(r'\b((?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?)(?:[-\s]?(?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?))*)\b', re.IGNORECASE)
|
336
|
+
return re.split(pattern, text)
|
337
|
+
elif "pun" in by:
|
338
|
+
if verbose:
|
339
|
+
print(f"splited by 标点('.!?;')")
|
340
|
+
return re.split(r"[.!?;]", text)
|
341
|
+
elif "\n" in by or "li" in by:
|
342
|
+
if verbose:
|
343
|
+
print(f"splited by lines('\n')")
|
344
|
+
return text.splitlines()
|
345
|
+
elif "cam" in by:
|
346
|
+
if verbose:
|
347
|
+
print(f"splited by camel_case")
|
348
|
+
return split_by_camel_case(text)
|
349
|
+
elif "word" in by:
|
350
|
+
if verbose:
|
351
|
+
print(f"splited by word")
|
352
|
+
return word_tokenize(text)
|
353
|
+
elif "sen" in by and not 'num' in by:
|
354
|
+
if verbose:
|
355
|
+
print(f"splited by sentence")
|
356
|
+
return sent_tokenize(text)
|
357
|
+
elif 'sen' in by and 'num' in by:
|
358
|
+
return split_by_sent_num(text,**kws)
|
359
|
+
elif "cha" in by:
|
360
|
+
if verbose:
|
361
|
+
print(f"splited by chracters")
|
362
|
+
return list(text)
|
363
|
+
elif ("up" in by or "cap" in by) and "l" not in by:
|
364
|
+
if verbose:
|
365
|
+
print(f"splited by upper case")
|
366
|
+
return split_at_upper(text)
|
367
|
+
elif "u" in by and "l" in by:
|
368
|
+
if by.find("u") < by.find("l"):
|
369
|
+
if verbose:
|
370
|
+
print(f"splited by upper followed by lower case")
|
371
|
+
return split_at_upper_fl_lower(text)
|
372
|
+
else:
|
373
|
+
if verbose:
|
374
|
+
print(f"splited by lower followed by upper case")
|
375
|
+
return split_at_lower_fl_upper(text)
|
376
|
+
elif "start" in by or "head" in by:
|
377
|
+
if verbose:
|
378
|
+
print(f"splited by lookahead")
|
379
|
+
return split_by_regex_lookahead(text, **kws)
|
380
|
+
elif "end" in by or "tail" in by:
|
381
|
+
if verbose:
|
382
|
+
print(f"splited by endings")
|
383
|
+
return split_by_regex_end(text, **kws)
|
384
|
+
elif "other" in by or "non_alp" in by:
|
385
|
+
if verbose:
|
386
|
+
print(f"splited by non_alphanumeric")
|
387
|
+
return split_by_consecutive_non_alphanumeric(text)
|
388
|
+
elif "len" in by:
|
389
|
+
if verbose:
|
390
|
+
print(f"splited by fixed length")
|
391
|
+
return split_by_fixed_length_chunks(text, **kws)
|
392
|
+
elif "re" in by or "cus" in by or "cos" in by:
|
393
|
+
if verbose:
|
394
|
+
print(f"splited by customed, re; => {by}")
|
395
|
+
return reg_split(text, **kws)
|
396
|
+
elif 'lang' in by or 'eng' in by:
|
397
|
+
return split_non_ascii(text)
|
398
|
+
else:
|
399
|
+
return split_general(text, by, verbose=verbose, **kws)
|
400
|
+
|
401
|
+
|
402
|
+
def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
|
403
|
+
df_dir_img_single_page = pd.DataFrame()
|
404
|
+
dir_single_page = []
|
405
|
+
if verbose:
|
406
|
+
pprint.pp(pdfinfo_from_path(dir_pdf))
|
407
|
+
if isinstance(page, tuple) and page:
|
408
|
+
page = list(page)
|
409
|
+
if isinstance(page,int):
|
410
|
+
page=[page]
|
411
|
+
if page is None:
|
412
|
+
page = [pdfinfo_from_path(dir_pdf)["Pages"]]
|
413
|
+
if len(page)==1 and page != pdfinfo_from_path(dir_pdf)["Pages"]:
|
414
|
+
page=[page[0], page[0]]
|
415
|
+
else:
|
416
|
+
page=[1, page[0]]
|
417
|
+
pages = convert_from_path(dir_pdf, first_page=page[0], last_page=page[1], **kws)
|
418
|
+
if dir_save is None:
|
419
|
+
dir_save = newfolder(dirname(dir_pdf), basename(dir_pdf).split(".")[0] + "_img")
|
420
|
+
for i, page in enumerate(pages):
|
421
|
+
if verbose:
|
422
|
+
print(f"processing page: {i+1}")
|
423
|
+
if i < 9:
|
424
|
+
dir_img_each_page = dir_save + f"page_0{i+1}.png"
|
425
|
+
else:
|
426
|
+
dir_img_each_page = dir_save + f"page_{i+1}.png"
|
427
|
+
dir_single_page.append(dir_img_each_page)
|
428
|
+
page.save(dir_img_each_page, kind.upper())
|
429
|
+
df_dir_img_single_page["fpath"] = dir_single_page
|
430
|
+
return df_dir_img_single_page
|
431
|
+
|
432
|
+
# dir_pdf = "/Users/macjianfeng/Dropbox/github/python/240308_Python Data Science Handbook.pdf"
|
433
|
+
# df_page = pdf2img(dir_pdf, page=[1, 5],dpi=300)
|
434
|
+
|
435
|
+
|
436
|
+
|
437
|
+
def fload(fpath, kind=None, **kwargs):
|
438
|
+
"""
|
439
|
+
Load content from a file with specified file type.
|
440
|
+
Parameters:
|
441
|
+
fpath (str): The file path from which content will be loaded.
|
442
|
+
kind (str): The file type to load. Supported options: 'docx', 'txt', 'md', 'html', 'json', 'yaml', 'xml', 'csv', 'xlsx', 'pdf'.
|
443
|
+
**kwargs: Additional parameters for 'csv' and 'xlsx' file types.
|
444
|
+
Returns:
|
445
|
+
content: The content loaded from the file.
|
446
|
+
"""
|
447
|
+
def load_txt_md(fpath):
|
448
|
+
with open(fpath, "r") as file:
|
449
|
+
content = file.read()
|
450
|
+
return content
|
451
|
+
|
452
|
+
def load_html(fpath):
|
453
|
+
with open(fpath, "r") as file:
|
454
|
+
content = file.read()
|
455
|
+
return content
|
456
|
+
|
457
|
+
def load_json(fpath):
|
458
|
+
with open(fpath, "r") as file:
|
459
|
+
content = json.load(file)
|
460
|
+
return content
|
461
|
+
|
462
|
+
def load_yaml(fpath):
|
463
|
+
with open(fpath, "r") as file:
|
464
|
+
content = yaml.safe_load(file)
|
465
|
+
return content
|
466
|
+
|
467
|
+
def load_xml(fpath):
|
468
|
+
tree = etree.parse(fpath)
|
469
|
+
root = tree.getroot()
|
470
|
+
return etree.tostring(root, pretty_print=True).decode()
|
471
|
+
|
472
|
+
def load_csv(fpath, **kwargs):
|
473
|
+
df = pd.read_csv(fpath, **kwargs)
|
474
|
+
return df
|
475
|
+
|
476
|
+
def load_xlsx(fpath, **kwargs):
|
477
|
+
df = pd.read_excel(fpath, **kwargs)
|
478
|
+
return df
|
479
|
+
|
480
|
+
def load_pdf(fpath, page='all', verbose=False, **kwargs):
|
481
|
+
"""
|
482
|
+
Parameters:
|
483
|
+
fpath: The path to the PDF file to be loaded.
|
484
|
+
page (optional):
|
485
|
+
Specifies which page or pages to extract text from. By default, it's set to "all", which means text from all
|
486
|
+
pages will be returned. It can also be an integer to specify a single page number or a list of integers to
|
487
|
+
specify multiple pages.
|
488
|
+
verbose (optional):
|
489
|
+
If True, prints the total number of pages processed.
|
490
|
+
Functionality:
|
491
|
+
It initializes an empty dictionary text_dict to store page numbers as keys and their corresponding text as values.
|
492
|
+
It iterates through each page of the PDF file using a for loop.
|
493
|
+
For each page, it extracts the text using PyPDF2's extract_text() method and stores it in text_dict with the page number incremented by 1 as the key.
|
494
|
+
If the page parameter is an integer, it converts it into a list containing that single page number to ensure consistency in handling.
|
495
|
+
If the page parameter is a NumPy array, it converts it to a list using the tolist() method to ensure compatibility with list operations.
|
496
|
+
If verbose is True, it prints the total number of pages processed.
|
497
|
+
If page is a list, it combines the text of the specified pages into a single string combined_text and returns it.
|
498
|
+
If page is set to "all", it returns the entire text_dict containing text of all pages.
|
499
|
+
If page is an integer, it returns the text of the specified page number.
|
500
|
+
If the specified page is not found, it returns the string "Page is not found".
|
501
|
+
"""
|
502
|
+
text_dict = {}
|
503
|
+
with open(fpath, "rb") as file:
|
504
|
+
pdf_reader = PdfReader(file)
|
505
|
+
num_pages = len(pdf_reader.pages)
|
506
|
+
for page_num in range(num_pages):
|
507
|
+
if verbose:
|
508
|
+
print(f"processing page {page_num}")
|
509
|
+
page_ = pdf_reader.pages[page_num]
|
510
|
+
text_dict[page_num + 1] = page_.extract_text()
|
511
|
+
if isinstance(page, int):
|
512
|
+
page = [page]
|
513
|
+
elif isinstance(page, np.ndarray):
|
514
|
+
page = page.tolist()
|
515
|
+
if verbose:
|
516
|
+
print(f"total pages: {page_num}")
|
517
|
+
if isinstance(page, list):
|
518
|
+
combined_text = ""
|
519
|
+
for page_num in page:
|
520
|
+
combined_text += text_dict.get(page_num, "")
|
521
|
+
return combined_text
|
522
|
+
elif "all" in page.lower():
|
523
|
+
combined_text = ""
|
524
|
+
for i in text_dict.values():
|
525
|
+
combined_text += i
|
526
|
+
return combined_text
|
527
|
+
else:
|
528
|
+
return text_dict.get(int(page), "Page is not found")
|
529
|
+
|
530
|
+
def load_docx(fpath):
|
531
|
+
doc = Document(fpath)
|
532
|
+
content = [para.text for para in doc.paragraphs]
|
533
|
+
return content
|
534
|
+
|
535
|
+
if kind is None:
|
536
|
+
_, kind = os.path.splitext(fpath)
|
537
|
+
kind = kind.lower()
|
538
|
+
|
539
|
+
kind = kind.lstrip('.').lower()
|
540
|
+
img_types=[ 'bmp','eps', 'gif', 'icns', 'ico', 'im', 'jpg','jpeg', 'jpeg2000','msp', 'pcx', 'png', 'ppm', 'sgi', 'spider', 'tga','tiff','webp',"json"]
|
541
|
+
doc_types = ["docx", "txt", "md", "html", "json", "yaml", "xml", "csv", "xlsx", "pdf"]
|
542
|
+
supported_types = [*doc_types, *img_types]
|
543
|
+
if kind not in supported_types:
|
544
|
+
raise ValueError(f"Error:\n{kind} is not in the supported list {supported_types}")
|
545
|
+
if kind == "docx":
|
546
|
+
return load_docx(fpath)
|
547
|
+
elif kind == "txt" or kind == "md":
|
548
|
+
return load_txt_md(fpath)
|
549
|
+
elif kind == "html":
|
550
|
+
return load_html(fpath)
|
551
|
+
elif kind == "json":
|
552
|
+
return load_json(fpath)
|
553
|
+
elif kind == "yaml":
|
554
|
+
return load_yaml(fpath)
|
555
|
+
elif kind == "xml":
|
556
|
+
return load_xml(fpath)
|
557
|
+
elif kind == "csv":
|
558
|
+
return load_csv(fpath, **kwargs)
|
559
|
+
elif kind == "xlsx":
|
560
|
+
return load_xlsx(fpath, **kwargs)
|
561
|
+
elif kind == "pdf":
|
562
|
+
print('usage:load_pdf(fpath, page="all", verbose=False)')
|
563
|
+
return load_pdf(fpath, **kwargs)
|
564
|
+
elif kind.lower() in img_types:
|
565
|
+
print(f'Image ".{kind}" is loaded.')
|
566
|
+
return load_img(fpath)
|
567
|
+
else:
|
568
|
+
raise ValueError(f"Error:\n{kind} is not in the supported list {supported_types}")
|
569
|
+
|
570
|
+
# Example usage
|
571
|
+
# txt_content = fload('sample.txt')
|
572
|
+
# md_content = fload('sample.md')
|
573
|
+
# html_content = fload('sample.html')
|
574
|
+
# json_content = fload('sample.json')
|
575
|
+
# yaml_content = fload('sample.yaml')
|
576
|
+
# xml_content = fload('sample.xml')
|
577
|
+
# csv_content = fload('sample.csv')
|
578
|
+
# xlsx_content = fload('sample.xlsx')
|
579
|
+
# docx_content = fload('sample.docx')
|
580
|
+
|
581
|
+
def fsave(
|
582
|
+
fpath,
|
583
|
+
content,
|
584
|
+
kind=None,
|
585
|
+
font_name="Times",
|
586
|
+
font_size=10,
|
587
|
+
spacing=6,
|
588
|
+
**kwargs,
|
589
|
+
):
|
590
|
+
"""
|
591
|
+
Save content into a file with specified file type and formatting.
|
592
|
+
Parameters:
|
593
|
+
fpath (str): The file path where content will be saved.
|
594
|
+
content (list of str or dict): The content to be saved, where each string represents a paragraph or a dictionary for tabular data.
|
595
|
+
kind (str): The file type to save. Supported options: 'docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml'.
|
596
|
+
font_name (str): The font name for text formatting (only applicable for 'docx', 'html', and 'pdf').
|
597
|
+
font_size (int): The font size for text formatting (only applicable for 'docx', 'html', and 'pdf').
|
598
|
+
spacing (int): The space after each paragraph (only applicable for 'docx').
|
599
|
+
**kwargs: Additional parameters for 'csv', 'xlsx', 'json', 'yaml' file types.
|
600
|
+
Returns:
|
601
|
+
None
|
602
|
+
"""
|
603
|
+
def save_content(fpath, content):
|
604
|
+
with open(fpath, "w", encoding='utf-8') as file:
|
605
|
+
file.write(content)
|
606
|
+
|
607
|
+
|
608
|
+
def save_docx(fpath, content, font_name, font_size, spacing):
|
609
|
+
if isinstance(content, str):
|
610
|
+
content = content.split(". ")
|
611
|
+
doc = docx.Document()
|
612
|
+
for i, paragraph_text in enumerate(content):
|
613
|
+
paragraph = doc.add_paragraph()
|
614
|
+
run = paragraph.add_run(paragraph_text)
|
615
|
+
font = run.font
|
616
|
+
font.name = font_name
|
617
|
+
font.size = docx.shared.Pt(font_size)
|
618
|
+
if i != len(content) - 1: # Add spacing for all but the last paragraph
|
619
|
+
paragraph.space_after = docx.shared.Pt(spacing)
|
620
|
+
doc.save(fpath)
|
621
|
+
|
622
|
+
|
623
|
+
def save_txt_md(fpath, content, sep="\n"):
|
624
|
+
# Ensure content is a single string
|
625
|
+
if isinstance(content, list):
|
626
|
+
content = sep.join(content)
|
627
|
+
save_content(fpath, sep.join(content))
|
628
|
+
|
629
|
+
|
630
|
+
def save_html(fpath, content, font_name, font_size):
|
631
|
+
html_content = "<html><body>"
|
632
|
+
for paragraph_text in content:
|
633
|
+
html_content += f'<p style="font-family:{font_name}; font-size:{font_size}px;">{paragraph_text}</p>'
|
634
|
+
html_content += "</body></html>"
|
635
|
+
save_content(fpath, html_content)
|
636
|
+
|
637
|
+
|
638
|
+
def save_pdf(fpath, content, font_name, font_size):
|
639
|
+
pdf = FPDF()
|
640
|
+
pdf.add_page()
|
641
|
+
# pdf.add_font('Arial','',r'/System/Library/Fonts/Supplemental/Arial.ttf',uni=True)
|
642
|
+
pdf.set_font(font_name, '',font_size)
|
643
|
+
for paragraph_text in content:
|
644
|
+
pdf.multi_cell(0, 10, paragraph_text)
|
645
|
+
pdf.ln(h = '')
|
646
|
+
pdf.output(fpath,'F')
|
647
|
+
|
648
|
+
|
649
|
+
def save_csv(fpath, data, **kwargs):
|
650
|
+
df = pd.DataFrame(data)
|
651
|
+
df.to_csv(fpath, **kwargs)
|
652
|
+
|
653
|
+
|
654
|
+
def save_xlsx(fpath, data, **kwargs):
|
655
|
+
df = pd.DataFrame(data)
|
656
|
+
df.to_excel(fpath, **kwargs)
|
657
|
+
|
658
|
+
|
659
|
+
# def save_json(fpath, data, **kwargs):
|
660
|
+
# with open(fpath, "w") as file:
|
661
|
+
# json.dump(data, file, **kwargs)
|
662
|
+
|
663
|
+
def save_json(fpath_fname, var_dict_or_df):
|
664
|
+
with open(fpath_fname, "w") as f_json:
|
665
|
+
# Check if var_dict_or_df is a DataFrame
|
666
|
+
if isinstance(var_dict_or_df, pd.DataFrame):
|
667
|
+
# Convert DataFrame to a list of dictionaries
|
668
|
+
var_dict_or_df = var_dict_or_df.to_dict(orient="dict")
|
669
|
+
|
670
|
+
# Check if var_dict_or_df is a dictionary
|
671
|
+
if isinstance(var_dict_or_df, dict):
|
672
|
+
# Convert NumPy arrays to lists
|
673
|
+
for key, value in var_dict_or_df.items():
|
674
|
+
if isinstance(value, np.ndarray):
|
675
|
+
var_dict_or_df[key] = value.tolist()
|
676
|
+
|
677
|
+
# Save the dictionary or list of dictionaries to a JSON file
|
678
|
+
json.dump(var_dict_or_df, f_json, indent=4)
|
679
|
+
# # Example usage:
|
680
|
+
# sets = {"title": "mse_path_ MSE"}
|
681
|
+
# jsonsave("/.json", sets)
|
682
|
+
# # setss = jsonload("/.json")
|
683
|
+
|
684
|
+
def save_yaml(fpath, data, **kwargs):
|
685
|
+
with open(fpath, "w") as file:
|
686
|
+
yaml.dump(data, file, **kwargs)
|
687
|
+
|
688
|
+
|
689
|
+
def save_xml(fpath, data):
|
690
|
+
root = etree.Element("root")
|
691
|
+
if isinstance(data, dict):
|
692
|
+
for key, val in data.items():
|
693
|
+
child = etree.SubElement(root, key)
|
694
|
+
child.text = str(val)
|
695
|
+
else:
|
696
|
+
raise ValueError("XML saving only supports dictionary data")
|
697
|
+
tree = etree.ElementTree(root)
|
698
|
+
tree.write(fpath, pretty_print=True, xml_declaration=True, encoding="UTF-8")
|
699
|
+
|
700
|
+
if kind is None:
|
701
|
+
_, kind = os.path.splitext(fpath)
|
702
|
+
kind = kind.lower()
|
703
|
+
|
704
|
+
kind = kind.lstrip(".").lower()
|
705
|
+
|
706
|
+
if kind not in [
|
707
|
+
"docx",
|
708
|
+
"txt",
|
709
|
+
"md",
|
710
|
+
"html",
|
711
|
+
"pdf",
|
712
|
+
"csv",
|
713
|
+
"xlsx",
|
714
|
+
"json",
|
715
|
+
"xml",
|
716
|
+
"yaml",
|
717
|
+
]:
|
718
|
+
raise ValueError(
|
719
|
+
f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
720
|
+
)
|
721
|
+
|
722
|
+
if kind == "docx" or kind=="doc":
|
723
|
+
save_docx(fpath, content, font_name, font_size, spacing)
|
724
|
+
elif kind == "txt":
|
725
|
+
save_txt_md(fpath, content, sep="")
|
726
|
+
elif kind == "md":
|
727
|
+
save_txt_md(fpath, content, sep="")
|
728
|
+
elif kind == "html":
|
729
|
+
save_html(fpath, content, font_name, font_size)
|
730
|
+
elif kind == "pdf":
|
731
|
+
save_pdf(fpath, content, font_name, font_size)
|
732
|
+
elif kind == "csv":
|
733
|
+
save_csv(
|
734
|
+
fpath, content, **kwargs
|
735
|
+
) # Assuming content is in tabular form (list of dicts or DataFrame)
|
736
|
+
elif kind == "xlsx":
|
737
|
+
save_xlsx(
|
738
|
+
fpath, content, **kwargs
|
739
|
+
) # Assuming content is in tabular form (list of dicts or DataFrame)
|
740
|
+
elif kind == "json":
|
741
|
+
save_json(fpath, content) # Assuming content is a serializable object
|
742
|
+
elif kind == "xml":
|
743
|
+
save_xml(fpath, content) # Assuming content is a dictionary
|
744
|
+
elif kind == "yaml":
|
745
|
+
save_yaml(fpath, content, **kwargs) # Assuming content is a serializable object
|
746
|
+
else:
|
747
|
+
raise ValueError(
|
748
|
+
f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
749
|
+
)
|
750
|
+
|
751
|
+
|
752
|
+
# # Example usage
|
753
|
+
# text_content = ["Hello, this is a sample text file.", "This is the second paragraph."]
|
754
|
+
# tabular_content = {"Name": ["Alice", "Bob"], "Age": [24, 30]}
|
755
|
+
# json_content = {"name": "Alice", "age": 24}
|
756
|
+
# yaml_content = {"Name": "Alice", "Age": 24}
|
757
|
+
# xml_content = {"Name": "Alice", "Age": 24}
|
758
|
+
# dir_save = "/Users/macjianfeng/Dropbox/Downloads/"
|
759
|
+
# fsave(dir_save + "sample.txt", text_content)
|
760
|
+
# fsave(dir_save + "sample.md", text_content)
|
761
|
+
# fsave(dir_save + "sample.html", text_content)
|
762
|
+
# fsave(dir_save + "sample.pdf", text_content)
|
763
|
+
# fsave(dir_save + "sample.docx", text_content)
|
764
|
+
# fsave(dir_save + "sample.csv", tabular_content, index=False)
|
765
|
+
# fsave(dir_save + "sample.xlsx", tabular_content, sheet_name="Sheet1", index=False)
|
766
|
+
# fsave(dir_save + "sample.json", json_content, indent=4)
|
767
|
+
# fsave(dir_save + "sample.yaml", yaml_content)
|
768
|
+
# fsave(dir_save + "sample.xml", xml_content)
|
769
|
+
|
770
|
+
def addpath(fpath):
|
771
|
+
sys.path.insert(0,dir)
|
772
|
+
def dirname(fpath):
|
773
|
+
dirname_=os.path.dirname(fpath)
|
774
|
+
if not dirname_.endswith('/'):
|
775
|
+
dirname_=dirname_+"/"
|
776
|
+
return dirname_
|
777
|
+
def dir_name(fpath):
|
778
|
+
return dirname(fpath)
|
779
|
+
def basename(fpath):
|
780
|
+
return os.path.basename(fpath)
|
781
|
+
|
782
|
+
def finfo(fpath):
|
783
|
+
fname, fmt = os.path.splitext(fpath)
|
784
|
+
dir_par = os.path.dirname(fpath) + '/'
|
785
|
+
data = {
|
786
|
+
"size": round(os.path.getsize(fpath) / 1024 / 1024, 3),
|
787
|
+
"creation_time": time.ctime(os.path.getctime(fpath)),
|
788
|
+
"ctime": time.ctime(os.path.getctime(fpath)),
|
789
|
+
"mod_time": time.ctime(os.path.getmtime(fpath)),
|
790
|
+
"mtime": time.ctime(os.path.getmtime(fpath)),
|
791
|
+
"parent_dir": dir_par,
|
792
|
+
"fname": fname.replace(dir_par, ""),
|
793
|
+
"kind": fmt
|
794
|
+
}
|
795
|
+
extra_info = {}
|
796
|
+
if data["kind"] == ".pdf":
|
797
|
+
extra_info = pdfinfo_from_path(fpath)
|
798
|
+
|
799
|
+
return FileInfo(
|
800
|
+
size=data["size"],
|
801
|
+
creation_time=data["creation_time"],
|
802
|
+
ctime=data["ctime"],
|
803
|
+
mod_time=data["mod_time"],
|
804
|
+
mtime=data["mtime"],
|
805
|
+
parent_dir=data["parent_dir"],
|
806
|
+
fname=data["fname"],
|
807
|
+
kind=data["kind"],
|
808
|
+
extra_info=extra_info
|
809
|
+
)
|
810
|
+
|
811
|
+
def listdir(
|
812
|
+
rootdir,
|
813
|
+
kind="folder",
|
814
|
+
sort_by="name",
|
815
|
+
ascending=True,
|
816
|
+
contains=None,
|
817
|
+
orient = "list",
|
818
|
+
output='df'
|
819
|
+
):
|
820
|
+
def sort_kind(df, by="name", ascending=True):
|
821
|
+
if df[by].dtype == 'object': # Check if the column contains string values
|
822
|
+
if ascending:
|
823
|
+
sorted_index = df[by].str.lower().argsort()
|
824
|
+
else:
|
825
|
+
sorted_index = df[by].str.lower().argsort()[::-1]
|
826
|
+
else:
|
827
|
+
if ascending:
|
828
|
+
sorted_index = df[by].argsort()
|
829
|
+
else:
|
830
|
+
sorted_index = df[by].argsort()[::-1]
|
831
|
+
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
832
|
+
return sorted_df
|
833
|
+
|
834
|
+
if not kind.startswith("."):
|
835
|
+
kind = "." + kind
|
836
|
+
|
837
|
+
if os.path.isdir(rootdir):
|
838
|
+
ls = os.listdir(rootdir)
|
839
|
+
fd = [".fd", ".fld", ".fol", ".fd", ".folder"]
|
840
|
+
i = 0
|
841
|
+
f = {
|
842
|
+
"name": [],
|
843
|
+
"length": [],
|
844
|
+
"path": [],
|
845
|
+
"created_time": [],
|
846
|
+
"modified_time": [],
|
847
|
+
"last_open_time":[],
|
848
|
+
"size": [],
|
849
|
+
"fname": [],
|
850
|
+
"fpath": [],
|
851
|
+
}
|
852
|
+
for item in ls:
|
853
|
+
item_path = os.path.join(rootdir, item)
|
854
|
+
if item.startswith("."):
|
855
|
+
continue
|
856
|
+
filename, file_extension = os.path.splitext(item)
|
857
|
+
is_folder = kind.lower() in fd and os.path.isdir(item_path)
|
858
|
+
is_file = kind.lower() in file_extension.lower() and (
|
859
|
+
os.path.isfile(item_path)
|
860
|
+
)
|
861
|
+
if not is_folder and not is_file:
|
862
|
+
continue
|
863
|
+
f["name"].append(filename)
|
864
|
+
f["length"].append(len(filename))
|
865
|
+
f["path"].append(os.path.join(os.path.dirname(item_path), item))
|
866
|
+
fpath=os.path.join(os.path.dirname(item_path), item)
|
867
|
+
f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
|
868
|
+
f["created_time"].append(pd.to_datetime(os.path.getctime(item_path),unit='s'))
|
869
|
+
f["modified_time"].append(pd.to_datetime(os.path.getmtime(item_path),unit='s'))
|
870
|
+
f['last_open_time'].append(pd.to_datetime(os.path.getatime(item_path),unit='s'))
|
871
|
+
f["fname"].append(filename) # will be removed
|
872
|
+
f["fpath"].append(fpath) # will be removed
|
873
|
+
i += 1
|
874
|
+
|
875
|
+
f["num"] = i
|
876
|
+
f["rootdir"] = rootdir
|
877
|
+
f["os"] = os.uname().machine
|
878
|
+
else:
|
879
|
+
raise FileNotFoundError(
|
880
|
+
'The directory "{}" does NOT exist. Please check the directory "rootdir".'.format(
|
881
|
+
rootdir
|
882
|
+
)
|
883
|
+
)
|
884
|
+
f = pd.DataFrame(f)
|
885
|
+
if contains is not None:
|
886
|
+
f = f[f["name"].str.contains(contains,case=False)]
|
887
|
+
if "nam" in sort_by.lower():
|
888
|
+
# f.sort_values(by="name", ascending=ascending, ignore_index=True, inplace=True)
|
889
|
+
f=sort_kind(f, by="name", ascending=ascending)
|
890
|
+
elif "crea" in sort_by.lower():
|
891
|
+
f=sort_kind(f, by="created_time", ascending=ascending)
|
892
|
+
elif "modi" in sort_by.lower():
|
893
|
+
f=sort_kind(f, by="modified_time", ascending=ascending)
|
894
|
+
elif "s" in sort_by.lower() and "z" in sort_by.lower():
|
895
|
+
f=sort_kind(f, by="size", ascending=ascending)
|
896
|
+
if 'df' in output:
|
897
|
+
return f
|
898
|
+
else:
|
899
|
+
if 'l' in orient.lower(): # list # default
|
900
|
+
res_output = Box(f.to_dict(orient="list"))
|
901
|
+
return res_output
|
902
|
+
if 'd' in orient.lower(): # dict
|
903
|
+
return Box(f.to_dict(orient="dict"))
|
904
|
+
if 'r' in orient.lower(): # records
|
905
|
+
return Box(f.to_dict(orient="records"))
|
906
|
+
if 'in' in orient.lower(): # records
|
907
|
+
return Box(f.to_dict(orient="index"))
|
908
|
+
if 'se' in orient.lower(): # records
|
909
|
+
return Box(f.to_dict(orient="series"))
|
910
|
+
|
911
|
+
# Example usage:
|
912
|
+
# result = listdir('your_root_directory')
|
913
|
+
# print(result)
|
914
|
+
# df=listdir("/", contains='sss',sort_by='name',ascending=False)
|
915
|
+
# print(df.fname.to_list(),"\n",df.fpath.to_list())
|
916
|
+
def list_func(lib_name, opt="call"):
|
917
|
+
if opt == "call":
|
918
|
+
funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
|
919
|
+
else:
|
920
|
+
funcs = dir(lib_name)
|
921
|
+
return funcs
|
922
|
+
def func_list(lib_name, opt="call"):
|
923
|
+
if opt == "call":
|
924
|
+
funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
|
925
|
+
else:
|
926
|
+
funcs = dir(lib_name)
|
927
|
+
return funcs
|
928
|
+
|
929
|
+
def newfolder(pardir, chdir):
|
930
|
+
import os
|
931
|
+
rootdir = []
|
932
|
+
# Convert string to list
|
933
|
+
if isinstance(chdir, str):
|
934
|
+
chdir = [chdir]
|
935
|
+
|
936
|
+
# Subfoldername should be unique
|
937
|
+
chdir = list(set(chdir))
|
938
|
+
|
939
|
+
if isinstance(pardir, str): # Dir_parents should be 'str' type
|
940
|
+
pardir = os.path.normpath(pardir)
|
941
|
+
|
942
|
+
# Get the slash type: "/" or "\"
|
943
|
+
stype = '/' if '/' in pardir else '\\'
|
944
|
+
|
945
|
+
# Check if the parent directory exists and is a directory path
|
946
|
+
if os.path.isdir(pardir):
|
947
|
+
os.chdir(pardir) # Set current path
|
948
|
+
|
949
|
+
# Check if subdirectories are not empty
|
950
|
+
if chdir:
|
951
|
+
chdir.sort()
|
952
|
+
# Create multiple subdirectories at once
|
953
|
+
for folder in chdir:
|
954
|
+
# Check if the subfolder already exists
|
955
|
+
child_tmp = os.path.join(pardir, folder)
|
956
|
+
if not os.path.isdir(child_tmp):
|
957
|
+
os.mkdir('./' + folder)
|
958
|
+
print(f'\n {folder} was created successfully!\n')
|
959
|
+
else:
|
960
|
+
print(f'\n {folder} already exists! \n')
|
961
|
+
|
962
|
+
rootdir.append(child_tmp + stype) # Note down
|
963
|
+
|
964
|
+
else:
|
965
|
+
print('\nWarning: Dir_child doesn\'t exist\n')
|
966
|
+
|
967
|
+
else:
|
968
|
+
print('\nWarning: Dir_parent is not a directory path\n')
|
969
|
+
|
970
|
+
# Dir is the main output, if only one dir, then str type is inconvenient
|
971
|
+
if len(rootdir) == 1:
|
972
|
+
rootdir = rootdir[0]
|
973
|
+
|
974
|
+
return rootdir
|
975
|
+
|
976
|
+
|
977
|
+
def figsave(*args,dpi=300):
|
978
|
+
DirSave = None
|
979
|
+
fname = None
|
980
|
+
|
981
|
+
for arg in args:
|
982
|
+
if isinstance(arg, str):
|
983
|
+
if '/' in arg or '\\' in arg:
|
984
|
+
DirSave = arg
|
985
|
+
elif '/' not in arg and '\\' not in arg:
|
986
|
+
fname = arg
|
987
|
+
|
988
|
+
# Backup original values
|
989
|
+
if '/' in DirSave:
|
990
|
+
if DirSave[-1] != '/':
|
991
|
+
DirSave = DirSave + '/'
|
992
|
+
elif '\\' in DirSave:
|
993
|
+
if DirSave[-1] != '\\':
|
994
|
+
DirSave = DirSave + '\\'
|
995
|
+
else:
|
996
|
+
raise ValueError('Check the Path of DirSave Directory')
|
997
|
+
|
998
|
+
ftype = fname.split('.')[-1]
|
999
|
+
if len(fname.split('.')) == 1:
|
1000
|
+
ftype = 'nofmt'
|
1001
|
+
fname = DirSave + fname + '.' + ftype
|
1002
|
+
else:
|
1003
|
+
fname = DirSave + fname
|
1004
|
+
|
1005
|
+
# Save figure based on file type
|
1006
|
+
if ftype.lower() == 'eps':
|
1007
|
+
plt.savefig(fname, format='eps', bbox_inches='tight')
|
1008
|
+
plt.savefig(fname.replace('.eps', '.pdf'),
|
1009
|
+
format='pdf', bbox_inches='tight',dpi=dpi)
|
1010
|
+
elif ftype.lower() == 'nofmt': # default: both "tif" and "pdf"
|
1011
|
+
fname_corr=fname.replace('nofmt','pdf')
|
1012
|
+
plt.savefig(fname_corr, format='pdf', bbox_inches='tight',dpi=dpi)
|
1013
|
+
fname=fname.replace('nofmt','tif')
|
1014
|
+
plt.savefig(fname, format='tiff', dpi=dpi, bbox_inches='tight')
|
1015
|
+
print(f"default saving filetype: both 'tif' and 'pdf")
|
1016
|
+
elif ftype.lower() == 'pdf':
|
1017
|
+
plt.savefig(fname, format='pdf', bbox_inches='tight',dpi=dpi)
|
1018
|
+
elif ftype.lower() in ['jpg', 'jpeg']:
|
1019
|
+
plt.savefig(fname, format='jpeg', dpi=dpi, bbox_inches='tight')
|
1020
|
+
elif ftype.lower() == 'png':
|
1021
|
+
plt.savefig(fname, format='png', dpi=dpi,
|
1022
|
+
bbox_inches='tight', transparent=True)
|
1023
|
+
elif ftype.lower() in ['tiff', 'tif']:
|
1024
|
+
plt.savefig(fname, format='tiff', dpi=dpi, bbox_inches='tight')
|
1025
|
+
elif ftype.lower() == 'emf':
|
1026
|
+
plt.savefig(fname, format='emf', dpi=dpi, bbox_inches='tight')
|
1027
|
+
elif ftype.lower() == 'fig':
|
1028
|
+
plt.savefig(fname, format='pdf', bbox_inches='tight',dpi=dpi)
|
1029
|
+
|
1030
|
+
print(f'\nSaved @: dpi={dpi}\n{fname}')
|
1031
|
+
|
1032
|
+
|
1033
|
+
# ==============FuncStars(ax,x1=1,x2=2, yscale=0.9, pval=0.01)====================================================
|
1034
|
+
# Usage:
|
1035
|
+
# FuncStars(ax, x1=2, x2=3, yscale=0.99, pval=0.02)
|
1036
|
+
# =============================================================================
|
1037
|
+
|
1038
|
+
# FuncStars --v 0.1.1
|
1039
|
+
def FuncStars(ax,
|
1040
|
+
pval=None,
|
1041
|
+
Ylim=None,
|
1042
|
+
Xlim=None,
|
1043
|
+
symbol='*',
|
1044
|
+
yscale=0.95,
|
1045
|
+
x1=0,
|
1046
|
+
x2=1,
|
1047
|
+
alpha=0.05,
|
1048
|
+
fontsize=14,
|
1049
|
+
fontsize_note=6,
|
1050
|
+
rotation=0,
|
1051
|
+
fontname='Arial',
|
1052
|
+
values_below=None,
|
1053
|
+
linego=True,
|
1054
|
+
linestyle='-',
|
1055
|
+
linecolor='k',
|
1056
|
+
linewidth=.8,
|
1057
|
+
nsshow='off',
|
1058
|
+
symbolcolor='k',
|
1059
|
+
tailindicator=[0.06, 0.06],
|
1060
|
+
report=None,
|
1061
|
+
report_scale=-0.1,
|
1062
|
+
report_loc=None):
|
1063
|
+
|
1064
|
+
|
1065
|
+
if ax is None:
|
1066
|
+
ax = plt.gca()
|
1067
|
+
if Ylim is None:
|
1068
|
+
Ylim = plt.gca().get_ylim()
|
1069
|
+
if Xlim is None:
|
1070
|
+
Xlim = ax.get_xlim()
|
1071
|
+
if report_loc is None and report is not None:
|
1072
|
+
report_loc = np.min(Ylim) + report_scale*np.abs(np.diff(Ylim))
|
1073
|
+
if report_scale > 0:
|
1074
|
+
report_scale = -np.abs(report_scale)
|
1075
|
+
|
1076
|
+
yscale = np.float64(yscale)
|
1077
|
+
y_loc = np.min(Ylim) + yscale*(np.max(Ylim)-np.min(Ylim))
|
1078
|
+
xcenter = np.mean([x1, x2])
|
1079
|
+
|
1080
|
+
# ns / *
|
1081
|
+
if alpha < pval:
|
1082
|
+
if nsshow == 'on':
|
1083
|
+
ns_str = f'p={round(pval, 3)}' if pval < 0.9 else 'ns'
|
1084
|
+
color = 'm' if pval < 0.1 else 'k'
|
1085
|
+
plt.text(xcenter, y_loc, ns_str,
|
1086
|
+
ha='center', va='bottom', # 'center_baseline',
|
1087
|
+
fontsize=fontsize-6 if fontsize > 6 else fontsize,
|
1088
|
+
fontname=fontname, color=color, rotation=rotation
|
1089
|
+
# bbox=dict(facecolor=None, edgecolor=None, color=None, linewidth=None)
|
1090
|
+
)
|
1091
|
+
elif 0.01 < pval <= alpha:
|
1092
|
+
plt.text(xcenter, y_loc, symbol,
|
1093
|
+
ha='center', va='center_baseline',
|
1094
|
+
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1095
|
+
elif 0.001 < pval <= 0.01:
|
1096
|
+
plt.text(xcenter, y_loc, symbol * 2,
|
1097
|
+
ha='center', va='center_baseline',
|
1098
|
+
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1099
|
+
elif 0 < pval <= 0.001:
|
1100
|
+
plt.text(xcenter, y_loc, symbol * 3,
|
1101
|
+
ha='center', va='center_baseline',
|
1102
|
+
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1103
|
+
|
1104
|
+
# lines indicators
|
1105
|
+
if linego: # and 0 < pval <= 0.05:
|
1106
|
+
print(pval)
|
1107
|
+
print(linego)
|
1108
|
+
# horizontal line
|
1109
|
+
if yscale < 0.99:
|
1110
|
+
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1111
|
+
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1112
|
+
[y_loc - np.abs(np.diff(Ylim)) * .03,
|
1113
|
+
y_loc - np.abs(np.diff(Ylim)) * .03],
|
1114
|
+
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1115
|
+
# vertical line
|
1116
|
+
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1117
|
+
x1 + np.abs(np.diff(Xlim)) * 0.01],
|
1118
|
+
[y_loc - np.abs(np.diff(Ylim)) * tailindicator[0],
|
1119
|
+
y_loc - np.abs(np.diff(Ylim)) * .03],
|
1120
|
+
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1121
|
+
plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
|
1122
|
+
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1123
|
+
[y_loc - np.abs(np.diff(Ylim)) * tailindicator[1],
|
1124
|
+
y_loc - np.abs(np.diff(Ylim)) * .03],
|
1125
|
+
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1126
|
+
else:
|
1127
|
+
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1128
|
+
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1129
|
+
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002,
|
1130
|
+
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1131
|
+
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1132
|
+
# vertical line
|
1133
|
+
plt.plot([x1 + np.abs(np.diff(Xlim)) * 0.01,
|
1134
|
+
x1 + np.abs(np.diff(Xlim)) * 0.01],
|
1135
|
+
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[0],
|
1136
|
+
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1137
|
+
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1138
|
+
plt.plot([x2 - np.abs(np.diff(Xlim)) * 0.01,
|
1139
|
+
x2 - np.abs(np.diff(Xlim)) * 0.01],
|
1140
|
+
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[1],
|
1141
|
+
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1142
|
+
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1143
|
+
|
1144
|
+
if values_below is not None:
|
1145
|
+
plt.text(xcenter, y_loc * (-0.1), values_below,
|
1146
|
+
ha='center', va='bottom', # 'center_baseline', rotation=rotation,
|
1147
|
+
fontsize=fontsize_note, fontname=fontname, color='k')
|
1148
|
+
|
1149
|
+
# report / comments
|
1150
|
+
if report is not None:
|
1151
|
+
plt.text(xcenter, report_loc, report,
|
1152
|
+
ha='left', va='bottom', # 'center_baseline', rotation=rotation,
|
1153
|
+
fontsize=fontsize_note, fontname=fontname, color='.7')
|
1154
|
+
def is_str_color(s):
|
1155
|
+
# Regular expression pattern for hexadecimal color codes
|
1156
|
+
color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
|
1157
|
+
return re.match(color_code_pattern, s) is not None
|
1158
|
+
|
1159
|
+
def stdshade(ax=None,*args, **kwargs):
|
1160
|
+
if (
|
1161
|
+
isinstance(ax, np.ndarray)
|
1162
|
+
and ax.ndim == 2
|
1163
|
+
and min(ax.shape) > 1
|
1164
|
+
and max(ax.shape) > 1
|
1165
|
+
):
|
1166
|
+
y = ax
|
1167
|
+
ax = plt.gca()
|
1168
|
+
if ax is None:
|
1169
|
+
ax = plt.gca()
|
1170
|
+
alpha = 0.5
|
1171
|
+
acolor = "k"
|
1172
|
+
paraStdSem = "sem"
|
1173
|
+
plotStyle = "-"
|
1174
|
+
plotMarker = "none"
|
1175
|
+
smth = 1
|
1176
|
+
l_c_one = ["r", "g", "b", "m", "c", "y", "k", "w"]
|
1177
|
+
l_style2 = ["--", "-."]
|
1178
|
+
l_style1 = ["-", ":"]
|
1179
|
+
l_mark = ["o", "+", "*", ".", "x", "_", "|", "s", "d", "^", "v", ">", "<", "p", "h"]
|
1180
|
+
|
1181
|
+
# Check each argument
|
1182
|
+
for iarg in range(len(args)):
|
1183
|
+
if (
|
1184
|
+
isinstance(args[iarg], np.ndarray)
|
1185
|
+
and args[iarg].ndim == 2
|
1186
|
+
and min(args[iarg].shape) > 1
|
1187
|
+
and max(args[iarg].shape) > 1
|
1188
|
+
):
|
1189
|
+
y = args[iarg]
|
1190
|
+
# Except y, continuous data is 'F'
|
1191
|
+
if (isinstance(args[iarg], np.ndarray) and args[iarg].ndim == 1) or isinstance(
|
1192
|
+
args[iarg], range
|
1193
|
+
):
|
1194
|
+
x = args[iarg]
|
1195
|
+
if isinstance(x, range):
|
1196
|
+
x = np.arange(start=x.start, stop=x.stop, step=x.step)
|
1197
|
+
# Only one number( 0~1), 'alpha' / color
|
1198
|
+
if isinstance(args[iarg], (int, float)):
|
1199
|
+
if np.size(args[iarg]) == 1 and 0 <= args[iarg] <= 1:
|
1200
|
+
alpha = args[iarg]
|
1201
|
+
if isinstance(args[iarg], (list, tuple)) and np.size(args[iarg]) == 3:
|
1202
|
+
acolor = args[iarg]
|
1203
|
+
acolor = tuple(acolor) if isinstance(acolor, list) else acolor
|
1204
|
+
# Color / plotStyle /
|
1205
|
+
if (
|
1206
|
+
isinstance(args[iarg], str)
|
1207
|
+
and len(args[iarg]) == 1
|
1208
|
+
and args[iarg] in l_c_one
|
1209
|
+
):
|
1210
|
+
acolor = args[iarg]
|
1211
|
+
else:
|
1212
|
+
if isinstance(args[iarg], str):
|
1213
|
+
if args[iarg] in ["sem", "std"]:
|
1214
|
+
paraStdSem = args[iarg]
|
1215
|
+
if args[iarg].startswith("#"):
|
1216
|
+
acolor=hue2rgb(args[iarg])
|
1217
|
+
if str2list(args[iarg])[0] in l_c_one:
|
1218
|
+
if len(args[iarg]) == 3:
|
1219
|
+
k = [i for i in str2list(args[iarg]) if i in l_c_one]
|
1220
|
+
if k != []:
|
1221
|
+
acolor = k[0]
|
1222
|
+
st = [i for i in l_style2 if i in args[iarg]]
|
1223
|
+
if st != []:
|
1224
|
+
plotStyle = st[0]
|
1225
|
+
elif len(args[iarg]) == 2:
|
1226
|
+
k = [i for i in str2list(args[iarg]) if i in l_c_one]
|
1227
|
+
if k != []:
|
1228
|
+
acolor = k[0]
|
1229
|
+
mk = [i for i in str2list(args[iarg]) if i in l_mark]
|
1230
|
+
if mk != []:
|
1231
|
+
plotMarker = mk[0]
|
1232
|
+
st = [i for i in l_style1 if i in args[iarg]]
|
1233
|
+
if st != []:
|
1234
|
+
plotStyle = st[0]
|
1235
|
+
if len(args[iarg]) == 1:
|
1236
|
+
k = [i for i in str2list(args[iarg]) if i in l_c_one]
|
1237
|
+
if k != []:
|
1238
|
+
acolor = k[0]
|
1239
|
+
mk = [i for i in str2list(args[iarg]) if i in l_mark]
|
1240
|
+
if mk != []:
|
1241
|
+
plotMarker = mk[0]
|
1242
|
+
st = [i for i in l_style1 if i in args[iarg]]
|
1243
|
+
if st != []:
|
1244
|
+
plotStyle = st[0]
|
1245
|
+
if len(args[iarg]) == 2:
|
1246
|
+
st = [i for i in l_style2 if i in args[iarg]]
|
1247
|
+
if st != []:
|
1248
|
+
plotStyle = st[0]
|
1249
|
+
# smth
|
1250
|
+
if (
|
1251
|
+
isinstance(args[iarg], (int, float))
|
1252
|
+
and np.size(args[iarg]) == 1
|
1253
|
+
and args[iarg] >= 1
|
1254
|
+
):
|
1255
|
+
smth = args[iarg]
|
1256
|
+
|
1257
|
+
if "x" not in locals() or x is None:
|
1258
|
+
x = np.arange(1, y.shape[1] + 1)
|
1259
|
+
elif len(x) < y.shape[1]:
|
1260
|
+
y = y[:, x]
|
1261
|
+
nRow = y.shape[0]
|
1262
|
+
nCol = y.shape[1]
|
1263
|
+
print(f"y was corrected, please confirm that {nRow} row, {nCol} col")
|
1264
|
+
else:
|
1265
|
+
x = np.arange(1, y.shape[1] + 1)
|
1266
|
+
|
1267
|
+
if x.shape[0] != 1:
|
1268
|
+
x = x.T
|
1269
|
+
yMean = np.nanmean(y, axis=0)
|
1270
|
+
if smth > 1:
|
1271
|
+
yMean = savgol_filter(np.nanmean(y, axis=0), smth, 1)
|
1272
|
+
else:
|
1273
|
+
yMean = np.nanmean(y, axis=0)
|
1274
|
+
if paraStdSem == "sem":
|
1275
|
+
if smth > 1:
|
1276
|
+
wings = savgol_filter(np.nanstd(y, axis=0) / np.sqrt(y.shape[0]), smth, 1)
|
1277
|
+
else:
|
1278
|
+
wings = np.nanstd(y, axis=0) / np.sqrt(y.shape[0])
|
1279
|
+
elif paraStdSem == "std":
|
1280
|
+
if smth > 1:
|
1281
|
+
wings = savgol_filter(np.nanstd(y, axis=0), smth, 1)
|
1282
|
+
else:
|
1283
|
+
wings = np.nanstd(y, axis=0)
|
1284
|
+
|
1285
|
+
fill_kws = kwargs.get('fill_kws', {})
|
1286
|
+
line_kws = kwargs.get('line_kws', {})
|
1287
|
+
fill = ax.fill_between(x, yMean + wings, yMean - wings, color=acolor, alpha=alpha, lw=0,**fill_kws)
|
1288
|
+
if line_kws != {} and not any(key.lower() in ['lw', 'linewidth'] for key in line_kws.keys()):
|
1289
|
+
line = ax.plot(x, yMean, color=acolor, lw=1.5, ls=plotStyle, marker=plotMarker, **line_kws)
|
1290
|
+
else:
|
1291
|
+
line = ax.plot(x, yMean, color=acolor, ls=plotStyle, marker=plotMarker, **line_kws)
|
1292
|
+
return line[0], fill
|
1293
|
+
|
1294
|
+
|
1295
|
+
# =============================================================================
|
1296
|
+
# # for plot figures {Qiu et al.2023}
|
1297
|
+
# =============================================================================
|
1298
|
+
# =============================================================================
|
1299
|
+
# plt.rcParams.update({'figure.max_open_warning': 0})
|
1300
|
+
# # Output matplotlib figure to SVG with text as text, not curves
|
1301
|
+
# plt.rcParams['svg.fonttype'] = 'none'
|
1302
|
+
# plt.rcParams['pdf.fonttype'] = 42
|
1303
|
+
#
|
1304
|
+
# plt.rc('text', usetex=False)
|
1305
|
+
# # plt.style.use('ggplot')
|
1306
|
+
# plt.style.use('science')
|
1307
|
+
# plt.rc('font', family='serif')
|
1308
|
+
# plt.rcParams.update({
|
1309
|
+
# "font.family": "serif", # specify font family here
|
1310
|
+
# "font.serif": ["Arial"], # specify font here
|
1311
|
+
# "font.size": 11})
|
1312
|
+
# # plt.tight_layout()
|
1313
|
+
# =============================================================================
|
1314
|
+
# =============================================================================
|
1315
|
+
# # axis spine
|
1316
|
+
# # use it like: adjust_spines(ax, ['left', 'bottom'])
|
1317
|
+
# =============================================================================
|
1318
|
+
|
1319
|
+
|
1320
|
+
def adjust_spines(ax=None, spines=['left', 'bottom'],distance=2):
|
1321
|
+
if ax is None:
|
1322
|
+
ax = plt.gca()
|
1323
|
+
for loc, spine in ax.spines.items():
|
1324
|
+
if loc in spines:
|
1325
|
+
spine.set_position(('outward', distance)) # outward by 2 points
|
1326
|
+
# spine.set_smart_bounds(True)
|
1327
|
+
else:
|
1328
|
+
spine.set_color('none') # don't draw spine
|
1329
|
+
# turn off ticks where there is no spine
|
1330
|
+
if 'left' in spines:
|
1331
|
+
ax.yaxis.set_ticks_position('left')
|
1332
|
+
else:
|
1333
|
+
ax.yaxis.set_ticks([])
|
1334
|
+
if 'bottom' in spines:
|
1335
|
+
ax.xaxis.set_ticks_position('bottom')
|
1336
|
+
else:
|
1337
|
+
# no xaxis ticks
|
1338
|
+
ax.xaxis.set_ticks([])
|
1339
|
+
# And then plot the data:
|
1340
|
+
|
1341
|
+
def add_colorbar(im, width=None, pad=None, **kwargs):
|
1342
|
+
# usage: add_colorbar(im, width=0.01, pad=0.005, label="PSD (dB)", shrink=0.8)
|
1343
|
+
l, b, w, h = im.axes.get_position().bounds # get boundaries
|
1344
|
+
width = width or 0.1 * w # get width of the colorbar
|
1345
|
+
pad = pad or width # get pad between im and cbar
|
1346
|
+
fig = im.axes.figure # get figure of image
|
1347
|
+
cax = fig.add_axes([l + w + pad, b, width, h]) # define cbar Axes
|
1348
|
+
return fig.colorbar(im, cax=cax, **kwargs) # draw cbar
|
1349
|
+
# =============================================================================
|
1350
|
+
# # for plot figures: setting rcParams
|
1351
|
+
# usage: set_pub()
|
1352
|
+
# or by setting sns.set_theme...see below:
|
1353
|
+
# sns.set_theme(style="ticks", rc=params) # 白色无刻度线,有坐标轴标度
|
1354
|
+
# # sns.set_theme(style="whitegrid", rc=params)# 白色+刻度线,无坐标轴标度
|
1355
|
+
# # sns.set_theme(style="white", rc=params) # 白色无刻度线,无坐标轴标度
|
1356
|
+
# # sns.set_theme(style="dark", rc=params) # 深色无刻度线,无坐标轴标度
|
1357
|
+
# =============================================================================
|
1358
|
+
|
1359
|
+
|
1360
|
+
def FuncCmpt(X1, X2, pmc='auto', pair='unpaired'):
|
1361
|
+
# output = {}
|
1362
|
+
|
1363
|
+
# pmc correction: 'parametric'/'non-parametric'/'auto'
|
1364
|
+
# meawhile get the opposite setting (to compare the results)
|
1365
|
+
def corr_pmc(pmc):
|
1366
|
+
cfg_pmc = None
|
1367
|
+
if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'npmc', 'nonparametric', 'non-parametric'}:
|
1368
|
+
cfg_pmc = 'parametric'
|
1369
|
+
elif pmc.lower() in {'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
|
1370
|
+
cfg_pmc = 'non-parametric'
|
1371
|
+
else:
|
1372
|
+
cfg_pmc = 'auto'
|
1373
|
+
return cfg_pmc
|
1374
|
+
|
1375
|
+
def corr_pair(pair):
|
1376
|
+
cfg_pair = None
|
1377
|
+
if 'pa' in pair.lower() and 'np' not in pair.lower():
|
1378
|
+
cfg_pair = 'paired'
|
1379
|
+
elif 'np' in pair.lower():
|
1380
|
+
cfg_pair = 'unpaired'
|
1381
|
+
return cfg_pair
|
1382
|
+
|
1383
|
+
def check_normality(data):
|
1384
|
+
stat_shapiro, pval_shapiro = stats.shapiro(data)
|
1385
|
+
if pval_shapiro > 0.05:
|
1386
|
+
Normality = True
|
1387
|
+
else:
|
1388
|
+
Normality = False
|
1389
|
+
print(f'\n normally distributed\n') if Normality else print(
|
1390
|
+
f'\n NOT normally distributed\n')
|
1391
|
+
return Normality
|
1392
|
+
|
1393
|
+
def sub_cmpt_2group(X1, X2, cfg_pmc='pmc', pair='unpaired'):
|
1394
|
+
output = {}
|
1395
|
+
nX1 = np.sum(~np.isnan(X1))
|
1396
|
+
nX2 = np.sum(~np.isnan(X2))
|
1397
|
+
if cfg_pmc == 'parametric' or cfg_pmc == 'auto':
|
1398
|
+
# VarType correction by checking variance Type via "levene"
|
1399
|
+
stat_lev, pval_lev = stats.levene(
|
1400
|
+
X1, X2, center='median', proportiontocut=0.05)
|
1401
|
+
VarType = True if pval_lev > 0.05 and nX1 == nX2 else False
|
1402
|
+
|
1403
|
+
if 'np' in pair: # 'unpaired'
|
1404
|
+
if VarType and Normality:
|
1405
|
+
# The independent t-test requires that the dependent variable is approximately normally
|
1406
|
+
# distributed within each group
|
1407
|
+
# Note: Technically, it is the residuals that need to be normally distributed, but for
|
1408
|
+
# an independent t-test, both will give you the same result.
|
1409
|
+
stat_value, pval= stats.ttest_ind(
|
1410
|
+
X1, X2, axis=0, equal_var=True, nan_policy='omit', alternative='two-sided')
|
1411
|
+
notes_stat = 'unpaired t test'
|
1412
|
+
notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1413
|
+
else:
|
1414
|
+
# If the Levene's Test for Equality of Variances is statistically significant,
|
1415
|
+
# which indicates that the group variances are unequal in the population, you
|
1416
|
+
# can correct for this violation by not using the pooled estimate for the error
|
1417
|
+
# term for the t-statistic, but instead using an adjustment to the degrees of
|
1418
|
+
# freedom using the Welch-Satterthwaite method
|
1419
|
+
stat_value, pval= stats.ttest_ind(
|
1420
|
+
X1, X2, axis=0, equal_var=False, nan_policy='omit', alternative='two-sided')
|
1421
|
+
notes_stat = 'Welchs t-test'
|
1422
|
+
# note: APA FORMAT
|
1423
|
+
notes_APA = f't({nX1+nX2-2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1424
|
+
elif 'pa' in pair and 'np' not in pair: # 'paired'
|
1425
|
+
# the paired-samples t-test is considered “robust” in handling violations of normality
|
1426
|
+
# to some extent. It can still yield valid results even if the data is not normally
|
1427
|
+
# distributed. Therefore, this test typically requires only approximately normal data
|
1428
|
+
stat_value, pval= stats.ttest_rel(
|
1429
|
+
X1, X2, axis=0, nan_policy='omit', alternative='two-sided')
|
1430
|
+
notes_stat = 'paired t test'
|
1431
|
+
# note: APA FORMAT
|
1432
|
+
notes_APA = f't({sum([nX1-1])})={round(stat_value, 5)},p={round(pval, 5)}'
|
1433
|
+
elif cfg_pmc == 'non-parametric':
|
1434
|
+
if 'np' in pair: # Perform Mann-Whitney
|
1435
|
+
stat_value, pval = stats.mannwhitneyu(
|
1436
|
+
X1, X2, method='exact', nan_policy='omit')
|
1437
|
+
notes_stat = 'Mann-Whitney U'
|
1438
|
+
if nX1 == nX2:
|
1439
|
+
notes_APA = f'U(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
|
1440
|
+
else:
|
1441
|
+
notes_APA = f'U(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1442
|
+
elif 'pa' in pair and 'np' not in pair: # Wilcoxon signed-rank test
|
1443
|
+
stat_value, pval = stats.wilcoxon(
|
1444
|
+
X1, X2, method='exact', nan_policy='omit')
|
1445
|
+
notes_stat = 'Wilcoxon signed-rank'
|
1446
|
+
if nX1 == nX2:
|
1447
|
+
notes_APA = f'Z(n={nX1})={round(stat_value, 5)},p={round(pval, 5)}'
|
1448
|
+
else:
|
1449
|
+
notes_APA = f'Z(n1={nX1},n2={nX2})={round(stat_value, 5)},p={round(pval, 5)}'
|
1450
|
+
|
1451
|
+
# filling output
|
1452
|
+
output['stat'] = stat_value
|
1453
|
+
output['pval'] = pval
|
1454
|
+
output['method'] = notes_stat
|
1455
|
+
output['APA'] = notes_APA
|
1456
|
+
|
1457
|
+
print(f"{output['method']}\n {notes_APA}\n\n")
|
1458
|
+
|
1459
|
+
return output, pval
|
1460
|
+
|
1461
|
+
Normality1 = check_normality(X1)
|
1462
|
+
Normality2 = check_normality(X2)
|
1463
|
+
Normality = True if all([Normality1, Normality2]) else False
|
1464
|
+
|
1465
|
+
nX1 = np.sum(~np.isnan(X1))
|
1466
|
+
nX2 = np.sum(~np.isnan(X2))
|
1467
|
+
|
1468
|
+
cfg_pmc = corr_pmc(pmc)
|
1469
|
+
cfg_pair = corr_pair(pair)
|
1470
|
+
|
1471
|
+
output, p = sub_cmpt_2group(
|
1472
|
+
X1, X2, cfg_pmc=cfg_pmc, pair=cfg_pair)
|
1473
|
+
return p, output
|
1474
|
+
|
1475
|
+
|
1476
|
+
# ======compare 2 group test===================================================
|
1477
|
+
# # Example
|
1478
|
+
# X1 = [19, 22, 16, 29, 24]
|
1479
|
+
# X2 = [20, 11, 17, 12, 22]
|
1480
|
+
|
1481
|
+
# p, res= FuncCmpt(X1, X2, pmc='pmc', pair='unparrr')
|
1482
|
+
|
1483
|
+
# =============================================================================
|
1484
|
+
|
1485
|
+
# =============================================================================
|
1486
|
+
# # method = ['anova', # 'One-way and N-way ANOVA',
|
1487
|
+
# # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
|
1488
|
+
# # 'mixed_anova', # 'Two way mixed ANOVA',
|
1489
|
+
# # 'welch_anova', # 'One-way Welch ANOVA',
|
1490
|
+
# # 'kruskal', # 'Non-parametric one-way ANOVA'
|
1491
|
+
# # 'friedman', # Non-parametric one-way repeated measures ANOVA
|
1492
|
+
# # ]
|
1493
|
+
# =============================================================================
|
1494
|
+
|
1495
|
+
|
1496
|
+
# =============================================================================
|
1497
|
+
# # method = ['anova', # 'One-way and N-way ANOVA',
|
1498
|
+
# # 'rm_anova', # 'One-way and two-way repeated measures ANOVA',
|
1499
|
+
# # 'mixed_anova', # 'Two way mixed ANOVA',
|
1500
|
+
# # 'welch_anova', # 'One-way Welch ANOVA',
|
1501
|
+
# # 'kruskal', # 'Non-parametric one-way ANOVA'
|
1502
|
+
# # 'friedman', # Non-parametric one-way repeated measures ANOVA
|
1503
|
+
# # ]
|
1504
|
+
# =============================================================================
|
1505
|
+
def df_wide_long(df):
|
1506
|
+
rows, columns = df.shape
|
1507
|
+
if columns > rows:
|
1508
|
+
return "Wide"
|
1509
|
+
elif rows > columns:
|
1510
|
+
return "Long"
|
1511
|
+
|
1512
|
+
def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
|
1513
|
+
ss_type=2, detailed=True, effsize='np2',
|
1514
|
+
correction='auto', between=None, within=None,
|
1515
|
+
subject=None, group=None
|
1516
|
+
):
|
1517
|
+
|
1518
|
+
def corr_pair(pair):
|
1519
|
+
cfg_pair = None
|
1520
|
+
if 'pa' in pair.lower() and 'np' not in pair.lower():
|
1521
|
+
cfg_pair = 'paired'
|
1522
|
+
elif 'np' in pair.lower():
|
1523
|
+
cfg_pair = 'unpaired'
|
1524
|
+
elif 'mix' in pair.lower():
|
1525
|
+
cfg_pair = 'mix'
|
1526
|
+
return cfg_pair
|
1527
|
+
|
1528
|
+
def check_normality(data):
|
1529
|
+
stat_shapiro, pval_shapiro = stats.shapiro(data)
|
1530
|
+
if pval_shapiro > 0.05:
|
1531
|
+
Normality = True
|
1532
|
+
else:
|
1533
|
+
Normality = False
|
1534
|
+
print(f'\n normally distributed\n') if Normality else print(
|
1535
|
+
f'\n NOT normally distributed\n')
|
1536
|
+
return Normality
|
1537
|
+
|
1538
|
+
def corr_pmc(pmc):
|
1539
|
+
cfg_pmc = None
|
1540
|
+
if pmc.lower() in {'pmc', 'parametric'} and pmc.lower() not in {'upmc', 'npmc', 'nonparametric', 'non-parametric'}:
|
1541
|
+
cfg_pmc = 'parametric'
|
1542
|
+
elif pmc.lower() in {'upmc', 'npmc', 'nonparametric', 'non-parametric'} and pmc.lower() not in {'pmc', 'parametric'}:
|
1543
|
+
cfg_pmc = 'non-parametric'
|
1544
|
+
else:
|
1545
|
+
cfg_pmc = 'auto'
|
1546
|
+
return cfg_pmc
|
1547
|
+
|
1548
|
+
def extract_apa(res_tab):
|
1549
|
+
notes_APA = []
|
1550
|
+
if "ddof1" in res_tab:
|
1551
|
+
for irow in range(res_tab.shape[0]):
|
1552
|
+
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.ddof1[irow]),round(res_tab.ddof2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
|
1553
|
+
notes_APA.append([note_tmp])
|
1554
|
+
elif "DF" in res_tab:
|
1555
|
+
print(res_tab.shape[0])
|
1556
|
+
for irow in range(res_tab.shape[0]-1):
|
1557
|
+
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF[irow]),round(res_tab.DF[res_tab.shape[0]-1])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
|
1558
|
+
notes_APA.append([note_tmp])
|
1559
|
+
notes_APA.append(['NaN'])
|
1560
|
+
elif "DF1" in res_tab: # in 'mix' case
|
1561
|
+
for irow in range(res_tab.shape[0]):
|
1562
|
+
note_tmp = f'{res_tab.Source[irow]}:F{round(res_tab.DF1[irow]),round(res_tab.DF2[irow])}={round(res_tab.F[irow], 5)},p={round(res_tab["p-unc"][irow], 5)}'
|
1563
|
+
notes_APA.append([note_tmp])
|
1564
|
+
return notes_APA
|
1565
|
+
|
1566
|
+
def anovatable(res_tab):
|
1567
|
+
if 'df' in res_tab: # statsmodels
|
1568
|
+
res_tab['mean_sq'] = res_tab[:]['sum_sq']/res_tab[:]['df']
|
1569
|
+
res_tab['est_sq'] = res_tab[:-1]['sum_sq'] / \
|
1570
|
+
sum(res_tab['sum_sq'])
|
1571
|
+
res_tab['omega_sq'] = (res_tab[:-1]['sum_sq']-(res_tab[:-1]['df'] *
|
1572
|
+
res_tab['mean_sq'][-1]))/(sum(res_tab['sum_sq'])+res_tab['mean_sq'][-1])
|
1573
|
+
elif 'DF' in res_tab:
|
1574
|
+
res_tab['MS'] = res_tab[:]['SS']/res_tab[:]['DF']
|
1575
|
+
res_tab['est_sq'] = res_tab[:-1]['SS']/sum(res_tab['SS'])
|
1576
|
+
res_tab['omega_sq'] = (res_tab[:-1]['SS']-(res_tab[:-1]['DF'] *
|
1577
|
+
res_tab['MS'][1]))/(sum(res_tab['SS'])+res_tab['MS'][1])
|
1578
|
+
if 'p-unc' in res_tab:
|
1579
|
+
if 'np2' in res_tab:
|
1580
|
+
res_tab['est_sq'] = res_tab['np2']
|
1581
|
+
if 'p-unc' in res_tab:
|
1582
|
+
res_tab['PR(>F)'] = res_tab['p-unc']
|
1583
|
+
return res_tab
|
1584
|
+
|
1585
|
+
def run_anova(data, dv, factor, ss_type=2, detailed=True, effsize='np2'):
|
1586
|
+
# perform ANOVA
|
1587
|
+
# =============================================================================
|
1588
|
+
# # # ANOVA (input: formula, dataset)
|
1589
|
+
# =============================================================================
|
1590
|
+
# # note: if the data is balanced (equal sample size for each group), Type 1, 2, and 3 sums of squares
|
1591
|
+
# # (typ parameter) will produce similar results.
|
1592
|
+
# lm = ols("values ~ C(group)", data=df).fit()
|
1593
|
+
# res_tab = anova_lm(lm, typ=ss_type)
|
1594
|
+
|
1595
|
+
# # however, it does not provide any effect size measures to tell if the
|
1596
|
+
# # statistical significance is meaningful. The function below calculates
|
1597
|
+
# # eta-squared () and omega-squared (). A quick note, is the exact same
|
1598
|
+
# # thing as except when coming from the ANOVA framework people call it ;
|
1599
|
+
# # is considered a better measure of effect size since it is unbiased in
|
1600
|
+
# # it's calculation by accounting for the degrees of freedom in the model.
|
1601
|
+
# # note: No effect sizes are calculated when using statsmodels.
|
1602
|
+
# # to calculate eta squared, use the sum of squares from the table
|
1603
|
+
# res_tab = anovatable(res_tab)
|
1604
|
+
|
1605
|
+
# =============================================================================
|
1606
|
+
# # alternativ for ANOVA
|
1607
|
+
# =============================================================================
|
1608
|
+
res_tab = pg.anova(dv=dv, between=factor, data=data,
|
1609
|
+
detailed=detailed, ss_type=ss_type, effsize=effsize)
|
1610
|
+
res_tab = anovatable(res_tab)
|
1611
|
+
return res_tab
|
1612
|
+
|
1613
|
+
def run_rmanova(data, dv, factor, subject, correction='auto', detailed=True, effsize='ng2'):
|
1614
|
+
# One-way repeated-measures ANOVA using a long-format dataset.
|
1615
|
+
res_tab = pg.rm_anova(data=data, dv=dv, within=factor,
|
1616
|
+
subject=subject, detailed=detailed, effsize=effsize)
|
1617
|
+
return res_tab
|
1618
|
+
|
1619
|
+
def run_welchanova(data, dv, factor):
|
1620
|
+
# When the groups are balanced and have equal variances, the optimal
|
1621
|
+
# post-hoc test is the Tukey-HSD test (pingouin.pairwise_tukey()). If the
|
1622
|
+
# groups have unequal variances, the Games-Howell test is more adequate
|
1623
|
+
# (pingouin.pairwise_gameshowell()). Results have been tested against R.
|
1624
|
+
res_tab = pg.welch_anova(data=data, dv=dv, between=factor)
|
1625
|
+
res_tab = anovatable(res_tab)
|
1626
|
+
return res_tab
|
1627
|
+
|
1628
|
+
def run_mixedanova(data, dv, between, within, subject, correction='auto', effsize='np2'):
|
1629
|
+
# Notes
|
1630
|
+
# Data are expected to be in long-format (even the repeated measures).
|
1631
|
+
# If your data is in wide-format, you can use the pandas.melt() function
|
1632
|
+
# to convert from wide to long format.
|
1633
|
+
|
1634
|
+
# Warning
|
1635
|
+
# If the between-subject groups are unbalanced(=unequal sample sizes), a
|
1636
|
+
# type II ANOVA will be computed. Note however that SPSS, JAMOVI and JASP
|
1637
|
+
# by default return a type III ANOVA, which may lead to slightly different
|
1638
|
+
# results.
|
1639
|
+
res_tab = pg.mixed_anova(data=data, dv=dv, within=within, subject=subject,
|
1640
|
+
between=between, correction=correction, effsize=effsize)
|
1641
|
+
res_tab = anovatable(res_tab)
|
1642
|
+
return res_tab
|
1643
|
+
|
1644
|
+
def run_friedman(data, dv, factor, subject, method='chisq'):
|
1645
|
+
# Friedman test for repeated measurements
|
1646
|
+
# The Friedman test is used for non-parametric (rank-based) one-way
|
1647
|
+
# repeated measures ANOVA
|
1648
|
+
|
1649
|
+
# check df form ('long' or 'wide')
|
1650
|
+
# df_long = data.melt(ignore_index=False).reset_index()
|
1651
|
+
# if data.describe().shape[1] >= df_long.describe().shape[1]:
|
1652
|
+
# res_tab = pg.friedman(data, method=method)
|
1653
|
+
# else:
|
1654
|
+
# res_tab = pg.friedman(data=df_long, dv='value',
|
1655
|
+
# within="variable", subject="index", method=method)
|
1656
|
+
if "Wide" in df_wide_long(data):
|
1657
|
+
df_long = data.melt(ignore_index=False).reset_index()
|
1658
|
+
res_tab = pg.friedman(data=df_long, dv='value',
|
1659
|
+
within="variable", subject="index", method=method)
|
1660
|
+
else:
|
1661
|
+
res_tab = pg.friedman(data, dv=dv, within=factor, subject=subject,method=method)
|
1662
|
+
res_tab = anovatable(res_tab)
|
1663
|
+
return res_tab
|
1664
|
+
|
1665
|
+
def run_kruskal(data, dv, factor):
|
1666
|
+
# Kruskal-Wallis H-test for independent samples
|
1667
|
+
res_tab = pg.kruskal(data=data, dv=dv, between=factor)
|
1668
|
+
res_tab = anovatable(res_tab)
|
1669
|
+
return res_tab
|
1670
|
+
|
1671
|
+
# Normality Check:
|
1672
|
+
# Conduct normality tests (Shapiro-Wilk) for each group.
|
1673
|
+
# If the data is approximately normally distributed, ANOVA is robust to
|
1674
|
+
# moderate departures from normality, especially with larger sample sizes.
|
1675
|
+
|
1676
|
+
# print(data[factor])
|
1677
|
+
# print(type(data[factor]))
|
1678
|
+
# print(len(data[factor].columns))
|
1679
|
+
# print(data[factor].nunique())
|
1680
|
+
# print(data[factor[0]])
|
1681
|
+
# print(data[factor[0]].unique())
|
1682
|
+
if group is None:
|
1683
|
+
group = factor
|
1684
|
+
|
1685
|
+
# print(f'\ngroup is :\n{data[group]},\ndv is :\n{dv}\n')
|
1686
|
+
norm_array = []
|
1687
|
+
for sub_group in data[group].unique():
|
1688
|
+
norm_curr = check_normality(
|
1689
|
+
data.loc[data[group] == sub_group, dv])
|
1690
|
+
norm_array.append(norm_curr)
|
1691
|
+
norm_all = True if all(norm_array) else False
|
1692
|
+
|
1693
|
+
# Homogeneity of Variances:
|
1694
|
+
# Check for homogeneity of variances (homoscedasticity) among groups.
|
1695
|
+
# Levene's test or Bartlett's test can be used for this purpose.
|
1696
|
+
# If variances are significantly different, consider transformations or use a
|
1697
|
+
# robust ANOVA method.
|
1698
|
+
|
1699
|
+
# # =============================================================================
|
1700
|
+
# # # method1: stats.levene
|
1701
|
+
# # =============================================================================
|
1702
|
+
# # data_array = []
|
1703
|
+
# # for sub_group in df["group"].unique():
|
1704
|
+
# # data_array.append(df.loc[df['group'] == sub_group, 'values'].values)
|
1705
|
+
# # print(data_array)
|
1706
|
+
# # variance_all = stats.levene(data_array[0],data_array[1],data_array[2])
|
1707
|
+
|
1708
|
+
# =============================================================================
|
1709
|
+
# # method2: pingouin.homoscedasticity
|
1710
|
+
# =============================================================================
|
1711
|
+
res_levene = None
|
1712
|
+
variance_all = pg.homoscedasticity(
|
1713
|
+
data, dv=dv, group=group, method='levene', alpha=0.05)
|
1714
|
+
res_levene = True if variance_all.iloc[0,1] > 0.05 else False
|
1715
|
+
# =============================================================================
|
1716
|
+
# # ANOVA Assumptions:
|
1717
|
+
# # Ensure that the assumptions of independence, homogeneity of variances, and
|
1718
|
+
# # normality are reasonably met before proceeding.
|
1719
|
+
# =============================================================================
|
1720
|
+
notes_norm = 'normally' if norm_all else 'NOT-normally'
|
1721
|
+
notes_variance = 'equal' if res_levene else 'unequal'
|
1722
|
+
print(f'Data is {notes_norm} distributed, shows {notes_variance} variance')
|
1723
|
+
|
1724
|
+
cfg_pmc = corr_pmc(pmc)
|
1725
|
+
cfg_pair = corr_pair(pair)
|
1726
|
+
output = {}
|
1727
|
+
if (cfg_pmc == 'parametric') or (cfg_pmc == 'auto'):
|
1728
|
+
if 'np' in cfg_pair: # 'unpaired'
|
1729
|
+
if cfg_pmc == 'auto':
|
1730
|
+
if norm_all:
|
1731
|
+
if res_levene:
|
1732
|
+
res_tab = run_anova(data, dv, factor, ss_type=ss_type,
|
1733
|
+
detailed=True, effsize='np2')
|
1734
|
+
notes_stat = f'{data[factor].nunique()} Way ANOVA'
|
1735
|
+
notes_APA = extract_apa(res_tab)
|
1736
|
+
|
1737
|
+
else:
|
1738
|
+
res_tab = run_welchanova(data, dv, factor)
|
1739
|
+
notes_stat = f'{data[factor].nunique()} Way Welch ANOVA'
|
1740
|
+
notes_APA = extract_apa(res_tab)
|
1741
|
+
|
1742
|
+
else:
|
1743
|
+
|
1744
|
+
res_tab = run_kruskal(data, dv, factor)
|
1745
|
+
notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
|
1746
|
+
notes_APA = extract_apa(res_tab)
|
1747
|
+
|
1748
|
+
elif cfg_pmc == 'parametric':
|
1749
|
+
res_tab = run_anova(data, dv, factor, ss_type=ss_type,
|
1750
|
+
detailed=True, effsize='np2')
|
1751
|
+
notes_stat = f'{data[factor].nunique()} Way ANOVA'
|
1752
|
+
notes_APA = extract_apa(res_tab)
|
1753
|
+
|
1754
|
+
elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
|
1755
|
+
res_tab = run_rmanova(data, dv, factor, subject, correction='auto',
|
1756
|
+
detailed=True, effsize='ng2')
|
1757
|
+
notes_stat = f'{data[factor].nunique()} Way Repeated measures ANOVA'
|
1758
|
+
notes_APA = extract_apa(res_tab)
|
1759
|
+
|
1760
|
+
elif 'mix' in cfg_pair or 'both' in cfg_pair:
|
1761
|
+
res_tab = run_mixedanova(data, dv, between, within, subject)
|
1762
|
+
# notes_stat = f'{len(sum(len(between)+sum(len(within))))} Way Mixed ANOVA'
|
1763
|
+
notes_stat = ""
|
1764
|
+
# n_inter = res_tab.loc(res_tab["Source"] == "Interaction")
|
1765
|
+
# print(n_inter)
|
1766
|
+
notes_APA = extract_apa(res_tab)
|
1767
|
+
|
1768
|
+
elif cfg_pmc == 'non-parametric':
|
1769
|
+
if 'np' in cfg_pair: # 'unpaired'
|
1770
|
+
res_tab = run_kruskal(data, dv, factor)
|
1771
|
+
notes_stat = f'Non-parametric Kruskal: {data[factor].nunique()} Way ANOVA'
|
1772
|
+
notes_APA = f'H({res_tab.ddof1[0]},n={data.shape[0]})={round(res_tab.H[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
|
1773
|
+
|
1774
|
+
elif 'pa' in cfg_pair and 'np' not in cfg_pair: # 'paired'
|
1775
|
+
res_tab = run_friedman(data, dv, factor, subject, method='chisq')
|
1776
|
+
notes_stat = f'Non-parametric {data[factor].nunique()} Way Friedman repeated measures ANOVA'
|
1777
|
+
notes_APA = f'X^2({res_tab.ddof1[0]})={round(res_tab.Q[0], 5)},p={round(res_tab["p-unc"][0], 5)}'
|
1778
|
+
|
1779
|
+
# =============================================================================
|
1780
|
+
# # Post-hoc
|
1781
|
+
# Post-Hoc Tests (if significant):
|
1782
|
+
# If ANOVA indicates significant differences, perform post-hoc tests (e.g.,
|
1783
|
+
# Tukey's HSD, Bonferroni, or Scheffé) to identify which groups differ from each other.
|
1784
|
+
# # https://pingouin-stats.org/build/html/generated/pingouin.pairwise_tests.html
|
1785
|
+
# =============================================================================
|
1786
|
+
go_pmc = True if cfg_pmc == 'parametric' else False
|
1787
|
+
go_subject = subject if ('pa' in cfg_pair) and (
|
1788
|
+
'np' not in cfg_pair) else None
|
1789
|
+
go_mix_between = between if ('mix' in cfg_pair) or (
|
1790
|
+
'both' in cfg_pair) else None
|
1791
|
+
go_mix_between = None if ('pa' in cfg_pair) or (
|
1792
|
+
'np' not in cfg_pair) else factor
|
1793
|
+
go_mix_within = within if ('mix' in cfg_pair) or (
|
1794
|
+
'both' in cfg_pair) else None
|
1795
|
+
go_mix_within = factor if ('pa' in cfg_pair) or (
|
1796
|
+
'np' not in cfg_pair) else None
|
1797
|
+
|
1798
|
+
if res_tab['p-unc'][0] <= .05:
|
1799
|
+
# Pairwise Comparisons
|
1800
|
+
method_post_hoc = [
|
1801
|
+
"bonf", # 'bonferroni', # : one-step correction
|
1802
|
+
"sidak", # one-step correction
|
1803
|
+
"holm", # step-down method using Bonferroni adjustments
|
1804
|
+
"fdr_bh", # Benjamini/Hochberg (non-negative)
|
1805
|
+
"fdr_by", # Benjamini/Yekutieli (negative)
|
1806
|
+
]
|
1807
|
+
res_posthoc = pd.DataFrame()
|
1808
|
+
for met in method_post_hoc:
|
1809
|
+
post_curr = pg.pairwise_tests(data=data, dv=dv, between=go_mix_between, within=go_mix_within, subject=go_subject, parametric=go_pmc, marginal=True, alpha=0.05, alternative='two-sided',
|
1810
|
+
padjust=met)
|
1811
|
+
|
1812
|
+
res_posthoc = pd.concat([res_posthoc, post_curr],
|
1813
|
+
ignore_index=True)
|
1814
|
+
else:
|
1815
|
+
res_posthoc = None
|
1816
|
+
output['res_posthoc'] = res_posthoc
|
1817
|
+
# =============================================================================
|
1818
|
+
# # filling output
|
1819
|
+
# =============================================================================
|
1820
|
+
|
1821
|
+
pd.set_option('display.max_columns', None)
|
1822
|
+
output['stat'] = notes_stat
|
1823
|
+
# print(output['APA'])
|
1824
|
+
output['APA'] = notes_APA
|
1825
|
+
output['pval'] = res_tab['p-unc']
|
1826
|
+
output['res_tab'] = res_tab
|
1827
|
+
if res_tab.shape[0] == len(notes_APA):
|
1828
|
+
output['res_tab']['APA'] = output['APA'] # note APA in the table
|
1829
|
+
# print(output['stat'])
|
1830
|
+
# print(output['res_tab'])
|
1831
|
+
|
1832
|
+
return output
|
1833
|
+
|
1834
|
+
|
1835
|
+
# =============================================================================
|
1836
|
+
# # One-way ANOVA
|
1837
|
+
# =============================================================================
|
1838
|
+
# url = "http://stats191.stanford.edu/data/rehab.csv"
|
1839
|
+
# rehab_table = pd.read_table(url, delimiter=",")
|
1840
|
+
# rehab_table.to_csv("rehab.table")
|
1841
|
+
# fig, ax = plt.subplots(figsize=(8, 6))
|
1842
|
+
# fig = rehab_table.boxplot("Time", "Fitness", ax=ax, grid=False)
|
1843
|
+
# # fig, ax = plt.subplots(figsize=(8, 6))
|
1844
|
+
# # set_pub()
|
1845
|
+
# # sns.boxenplot(x="Time",y="Fitness",data = rehab_table)
|
1846
|
+
|
1847
|
+
# out2 = FuncMultiCmpt(pmc='pmc', pair='unpair',
|
1848
|
+
# data=rehab_table, dv='Time', factor='Fitness')
|
1849
|
+
# # print(out2['res_tab'])
|
1850
|
+
# # print(out2['APA'])
|
1851
|
+
# out2['res_posthoc']
|
1852
|
+
# out2['res_posthoc']['p-unc'][0]
|
1853
|
+
# out2['res_posthoc']['p-adjust'][0]
|
1854
|
+
# out2['res_posthoc']['p-corr'][0]
|
1855
|
+
|
1856
|
+
|
1857
|
+
# =============================================================================
|
1858
|
+
# # Interactions and ANOVA
|
1859
|
+
# https://www.statsmodels.org/dev/examples/notebooks/generated/interactions_anova.html
|
1860
|
+
# url = "http://stats191.stanford.edu/data/salary.table"
|
1861
|
+
# fh = urlopen(url)
|
1862
|
+
# df = pd.read_table(fh)
|
1863
|
+
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
1864
|
+
# dv='S', factor=['X', 'E', 'M'], group='M')
|
1865
|
+
# # # two-way anova
|
1866
|
+
# # https://www.statology.org/two-way-anova-python/
|
1867
|
+
# # =============================================================================
|
1868
|
+
# # df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
|
1869
|
+
# # 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
|
1870
|
+
# # 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
|
1871
|
+
# # 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
|
1872
|
+
# # 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
|
1873
|
+
# # out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
1874
|
+
# # dv='height', factor=['water','sun'],group='water')
|
1875
|
+
|
1876
|
+
|
1877
|
+
# =============================================================================
|
1878
|
+
# # two way anova
|
1879
|
+
# https://www.geeksforgeeks.org/how-to-perform-a-two-way-anova-in-python/
|
1880
|
+
# =============================================================================
|
1881
|
+
# df1=pd.DataFrame({'Fertilizer': np.repeat(['daily', 'weekly'], 15),
|
1882
|
+
# 'Watering': np.repeat(['daily', 'weekly'], 15),
|
1883
|
+
# 'height': [14, 16, 15, 15, 16, 13, 12, 11,
|
1884
|
+
# 14, 15, 16, 16, 17, 18, 14, 13,
|
1885
|
+
# 14, 14, 14, 15, 16, 16, 17, 18,
|
1886
|
+
# 14, 13, 14, 14, 14, 15]})
|
1887
|
+
|
1888
|
+
# df1['subject'] = np.tile(range(0, 15), (1, 2)).T
|
1889
|
+
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df1,
|
1890
|
+
# dv='height', factor=['Fertilizer','Watering'],group='Watering')
|
1891
|
+
# # print(out1['stat'])
|
1892
|
+
# # print(out1['res_tab'])
|
1893
|
+
|
1894
|
+
# =============================================================================
|
1895
|
+
# # welch anova
|
1896
|
+
# https://www.geeksforgeeks.org/how-to-perform-welchs-anova-in-python/
|
1897
|
+
# =============================================================================
|
1898
|
+
# df = pd.DataFrame({'score': [64, 66, 68, 75, 78, 94, 98, 79, 71, 80,
|
1899
|
+
# 91, 92, 93, 90, 97, 94, 82, 88, 95, 96,
|
1900
|
+
# 79, 78, 88, 94, 92, 85, 83, 85, 82, 81],
|
1901
|
+
# 'group': np.repeat(['strat1', 'strat2', 'strat3'],
|
1902
|
+
# repeats=10)})
|
1903
|
+
# out1 = FuncMultiCmpt(pmc='auto',pair='unpaired',data=df, dv='score', factor='group', group='group')
|
1904
|
+
# =============================================================================
|
1905
|
+
# # two way anova
|
1906
|
+
# https://www.statology.org/two-way-anova-python/
|
1907
|
+
# =============================================================================
|
1908
|
+
# df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
|
1909
|
+
# 'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
|
1910
|
+
# 'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
|
1911
|
+
# 6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
|
1912
|
+
# 4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})
|
1913
|
+
# df['subject'] = np.tile(range(0, 15), (1, 2)).T
|
1914
|
+
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
1915
|
+
# dv='height', factor=['water', 'sun'], subject='subject', group='water')
|
1916
|
+
# # print(out1['stat'])
|
1917
|
+
# # print(out1['res_tab'])
|
1918
|
+
|
1919
|
+
# =============================================================================
|
1920
|
+
# # 3-way ANOVA
|
1921
|
+
# =============================================================================
|
1922
|
+
# df = pd.DataFrame({'program': np.repeat([1, 2], 20),
|
1923
|
+
# 'gender': np.tile(np.repeat(['M', 'F'], 10), 2),
|
1924
|
+
# 'division': np.tile(np.repeat([1, 2], 5), 4),
|
1925
|
+
# 'height': [7, 7, 8, 8, 7, 6, 6, 5, 6, 5,
|
1926
|
+
# 5, 5, 4, 5, 4, 3, 3, 4, 3, 3,
|
1927
|
+
# 6, 6, 5, 4, 5, 4, 5, 4, 4, 3,
|
1928
|
+
# 2, 2, 1, 4, 4, 2, 1, 1, 2, 1]})
|
1929
|
+
# df['subject'] = np.tile(range(0, 20), (1, 2)).T
|
1930
|
+
# out1 = FuncMultiCmpt(pmc='pmc', pair='unpaired', data=df,
|
1931
|
+
# dv='height', factor=['gender', 'program', 'division'], subject='subject', group='program')
|
1932
|
+
# # print(out1['stat'])
|
1933
|
+
# # print(out1['res_tab'])
|
1934
|
+
|
1935
|
+
# =============================================================================
|
1936
|
+
# # Repeated Measures ANOVA in Python
|
1937
|
+
# =============================================================================
|
1938
|
+
# df = pd.DataFrame({'patient': np.repeat([1, 2, 3, 4, 5], 4),
|
1939
|
+
# 'drug': np.tile([1, 2, 3, 4], 5),
|
1940
|
+
# 'response': [30, 28, 16, 34,
|
1941
|
+
# 14, 18, 10, 22,
|
1942
|
+
# 24, 20, 18, 30,
|
1943
|
+
# 38, 34, 20, 44,
|
1944
|
+
# 26, 28, 14, 30]})
|
1945
|
+
# # df['subject'] = np.tile(range(0, 20), (1, 2)).T
|
1946
|
+
# out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
|
1947
|
+
# dv='response', factor=['drug'], subject='patient', group='drug')
|
1948
|
+
# print(out1['stat'])
|
1949
|
+
# print(out1['res_tab'])
|
1950
|
+
# print(out1['APA'])
|
1951
|
+
|
1952
|
+
# =============================================================================
|
1953
|
+
# # repeated anova
|
1954
|
+
# https://www.geeksforgeeks.org/how-to-perform-a-repeated-measures-anova-in-python/
|
1955
|
+
# =============================================================================
|
1956
|
+
# df = pd.DataFrame({'Cars': np.repeat([1, 2, 3, 4, 5], 4),
|
1957
|
+
# 'Engine Oil': np.tile([1, 2, 3, 4], 5),
|
1958
|
+
# 'Mileage': [36, 38, 30, 29,
|
1959
|
+
# 34, 38, 30, 29,
|
1960
|
+
# 34, 28, 38, 32,
|
1961
|
+
# 38, 34, 20, 44,
|
1962
|
+
# 26, 28, 34, 50]})
|
1963
|
+
# out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
|
1964
|
+
# dv='Mileage', factor=['Engine Oil'], subject='Cars', group='Cars')
|
1965
|
+
# =============================================================================
|
1966
|
+
# #two-way repeated anova
|
1967
|
+
# =============================================================================
|
1968
|
+
# df = pd.read_csv(
|
1969
|
+
# "https://reneshbedre.github.io/assets/posts/anova/plants_leaves_two_within.csv")
|
1970
|
+
# df
|
1971
|
+
# # df['subject'] = np.tile(range(0, 20), (1, 2)).T
|
1972
|
+
# out1 = FuncMultiCmpt(pmc='pmc', pair='paired', data=df,
|
1973
|
+
# dv='num_leaves', factor=['year', 'time'], subject='plants', group='year')
|
1974
|
+
# print(out1['stat'])
|
1975
|
+
# print(out1['res_tab'])
|
1976
|
+
# print(out1['APA'])
|
1977
|
+
|
1978
|
+
# =============================================================================
|
1979
|
+
# # repeated anova
|
1980
|
+
# =============================================================================
|
1981
|
+
# df = pd.read_csv('/Users/macjianfeng/Desktop/test.csv')
|
1982
|
+
# df.head()
|
1983
|
+
# df.loc[df['animal'].str.contains('Sleep'), 'experiment'] = 'sleep'
|
1984
|
+
# df.loc[df['animal'].str.contains('Wake'), 'experiment'] = 'wake'
|
1985
|
+
# df.loc[df['variable'].str.contains('hypo'), 'region'] = 'hypo'
|
1986
|
+
# df.loc[df['variable'].str.contains('cort'), 'region'] = 'cort'
|
1987
|
+
# df
|
1988
|
+
# for i in range(4):
|
1989
|
+
# match i:
|
1990
|
+
# case 0:
|
1991
|
+
# prot_name = 'A1'
|
1992
|
+
# case 1:
|
1993
|
+
# prot_name = 'A2'
|
1994
|
+
# case 2:
|
1995
|
+
# prot_name = '845'
|
1996
|
+
# case 3:
|
1997
|
+
# prot_name = '831'
|
1998
|
+
# df_tmp = df[df["variable"].str.contains(prot_name)]
|
1999
|
+
# df_tmp['protein'] = prot_name
|
2000
|
+
# df_tmp = df_tmp.reset_index()
|
2001
|
+
# print(df_tmp)
|
2002
|
+
|
2003
|
+
# out1 = FuncMultiCmpt(pmc='pmc', pair='mix', data=df_tmp,
|
2004
|
+
# dv='value', between='experiment', within='region', subject='animal', group='experiment')
|
2005
|
+
# print(out1['stat'])
|
2006
|
+
# print(out1['res_tab'])
|
2007
|
+
# # =============================================================================
|
2008
|
+
# One-way ANOVA
|
2009
|
+
# df1 = pd.read_csv('/Users/macjianfeng/Desktop/Book2.csv')
|
2010
|
+
# df2 = df1.melt()
|
2011
|
+
# out1 = FuncMultiCmpt(pmc='npmc', pair='unpaired', data=df2,
|
2012
|
+
# dv='libido', factor=['brand x', 'brand y', 'brand z'], subject='participant')
|
2013
|
+
# print(out1['stat'])
|
2014
|
+
# print(out1['res_tab'])
|
2015
|
+
# =============================================================================
|
2016
|
+
|
2017
|
+
|
2018
|
+
# =============================================================================
|
2019
|
+
# # #One-way ANOVA new example: https://www.pythonfordatascience.org/anova-python/
|
2020
|
+
# =============================================================================
|
2021
|
+
# df1 = pd.read_csv(
|
2022
|
+
# "https://raw.githubusercontent.com/researchpy/Data-sets/master/difficile.csv")
|
2023
|
+
# df1.drop('person', axis=1, inplace=True)
|
2024
|
+
# # Recoding value from numeric to string
|
2025
|
+
# df1['dose'].replace({1: 'placebo', 2: 'low', 3: 'high'}, inplace=True)
|
2026
|
+
# df1.head(10)
|
2027
|
+
|
2028
|
+
# out3= FuncMultiCmpt(pmc='pmc', data=df1, dv='libido', factor='dose')
|
2029
|
+
# # print(out3['res_tab'])
|
2030
|
+
# # # print(out3['res_posthoc'])
|
2031
|
+
# # print(out3['APA'])
|
2032
|
+
|
2033
|
+
# =============================================================================
|
2034
|
+
# https://lifewithdata.com/2023/06/08/how-to-perform-a-two-way-anova-in-python/
|
2035
|
+
# =============================================================================
|
2036
|
+
# data = {
|
2037
|
+
# 'Diet': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C'],
|
2038
|
+
# 'Workout': ['Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High', 'Low', 'Medium', 'High'],
|
2039
|
+
# 'WeightLoss': [3, 4, 5, 3.2, 5, 6, 5.2, 6, 5.5, 4, 5.5, 6.2]
|
2040
|
+
# }
|
2041
|
+
# df = pd.DataFrame(data)
|
2042
|
+
# out4= FuncMultiCmpt(pmc='pmc', pair='unpaired',data=df, dv='WeightLoss', factor=['Diet','Workout'],group='Diet')
|
2043
|
+
|
2044
|
+
# =============================================================================
|
2045
|
+
# # convert to list to string
|
2046
|
+
# =============================================================================
|
2047
|
+
def list2str(x_str):
|
2048
|
+
s = ''.join(str(x) for x in x_str)
|
2049
|
+
return s
|
2050
|
+
def str2list(str_):
|
2051
|
+
l = []
|
2052
|
+
[l.append(x) for x in str_]
|
2053
|
+
return l
|
2054
|
+
|
2055
|
+
def load_img(fpath):
|
2056
|
+
"""
|
2057
|
+
Load an image from the specified file path.
|
2058
|
+
|
2059
|
+
Args:
|
2060
|
+
fpath (str): The file path to the image.
|
2061
|
+
|
2062
|
+
Returns:
|
2063
|
+
PIL.Image: The loaded image.
|
2064
|
+
|
2065
|
+
Raises:
|
2066
|
+
FileNotFoundError: If the specified file is not found.
|
2067
|
+
OSError: If the specified file cannot be opened or is not a valid image file.
|
2068
|
+
"""
|
2069
|
+
from PIL import Image
|
2070
|
+
|
2071
|
+
try:
|
2072
|
+
img = Image.open(fpath)
|
2073
|
+
return img
|
2074
|
+
except FileNotFoundError:
|
2075
|
+
raise FileNotFoundError(f"The file '{fpath}' was not found.")
|
2076
|
+
except OSError:
|
2077
|
+
raise OSError(f"Unable to open file '{fpath}' or it is not a valid image file.")
|
2078
|
+
|
2079
|
+
def apply_filter(img, *args):
|
2080
|
+
# def apply_filter(img, filter_name, filter_value=None):
|
2081
|
+
"""
|
2082
|
+
Apply the specified filter to the image.
|
2083
|
+
|
2084
|
+
Args:
|
2085
|
+
img (PIL.Image): The input image.
|
2086
|
+
filter_name (str): The name of the filter to apply.
|
2087
|
+
**kwargs: Additional parameters specific to the filter.
|
2088
|
+
|
2089
|
+
Returns:
|
2090
|
+
PIL.Image: The filtered image.
|
2091
|
+
"""
|
2092
|
+
def correct_filter_name(filter_name):
|
2093
|
+
if 'bl' in filter_name.lower() and 'box' not in filter_name.lower():
|
2094
|
+
return 'BLUR'
|
2095
|
+
elif 'cont' in filter_name.lower():
|
2096
|
+
return 'Contour'
|
2097
|
+
elif 'det' in filter_name.lower():
|
2098
|
+
return 'Detail'
|
2099
|
+
elif 'edg' in filter_name.lower() and 'mo' not in filter_name.lower() and 'f' not in filter_name.lower():
|
2100
|
+
return 'EDGE_ENHANCE'
|
2101
|
+
elif 'edg' in filter_name.lower() and 'mo' in filter_name.lower():
|
2102
|
+
return 'EDGE_ENHANCE_MORE'
|
2103
|
+
elif 'emb' in filter_name.lower():
|
2104
|
+
return 'EMBOSS'
|
2105
|
+
elif 'edg' in filter_name.lower() and 'f' in filter_name.lower():
|
2106
|
+
return 'FIND_EDGES'
|
2107
|
+
elif 'sh' in filter_name.lower() and 'mo' not in filter_name.lower():
|
2108
|
+
return 'SHARPEN'
|
2109
|
+
elif 'sm' in filter_name.lower() and 'mo' not in filter_name.lower():
|
2110
|
+
return 'SMOOTH'
|
2111
|
+
elif 'sm' in filter_name.lower() and 'mo' in filter_name.lower():
|
2112
|
+
return 'SMOOTH_MORE'
|
2113
|
+
elif 'min' in filter_name.lower():
|
2114
|
+
return 'MIN_FILTER'
|
2115
|
+
elif 'max' in filter_name.lower():
|
2116
|
+
return 'MAX_FILTER'
|
2117
|
+
elif 'mod' in filter_name.lower():
|
2118
|
+
return 'MODE_FILTER'
|
2119
|
+
elif 'mul' in filter_name.lower():
|
2120
|
+
return 'MULTIBAND_FILTER'
|
2121
|
+
elif 'gau' in filter_name.lower():
|
2122
|
+
return 'GAUSSIAN_BLUR'
|
2123
|
+
elif 'box' in filter_name.lower():
|
2124
|
+
return 'BOX_BLUR'
|
2125
|
+
elif 'med' in filter_name.lower():
|
2126
|
+
return 'MEDIAN_FILTER'
|
2127
|
+
else:
|
2128
|
+
supported_filters = [
|
2129
|
+
"BLUR",
|
2130
|
+
"CONTOUR",
|
2131
|
+
"DETAIL",
|
2132
|
+
"EDGE_ENHANCE",
|
2133
|
+
"EDGE_ENHANCE_MORE",
|
2134
|
+
"EMBOSS",
|
2135
|
+
"FIND_EDGES",
|
2136
|
+
"SHARPEN",
|
2137
|
+
"SMOOTH",
|
2138
|
+
"SMOOTH_MORE",
|
2139
|
+
"MIN_FILTER",
|
2140
|
+
"MAX_FILTER",
|
2141
|
+
"MODE_FILTER",
|
2142
|
+
"MULTIBAND_FILTER",
|
2143
|
+
"GAUSSIAN_BLUR",
|
2144
|
+
"BOX_BLUR",
|
2145
|
+
"MEDIAN_FILTER",
|
2146
|
+
]
|
2147
|
+
raise ValueError(
|
2148
|
+
f"Unsupported filter: {filter_name}, should be one of: {supported_filters}"
|
2149
|
+
)
|
2150
|
+
for arg in args:
|
2151
|
+
if isinstance(arg, str):
|
2152
|
+
filter_name = arg
|
2153
|
+
filter_name = correct_filter_name(filter_name)
|
2154
|
+
else:
|
2155
|
+
filter_value = arg
|
2156
|
+
filter_name = filter_name.upper() # Ensure filter name is uppercase
|
2157
|
+
|
2158
|
+
# Supported filters
|
2159
|
+
supported_filters = {
|
2160
|
+
"BLUR": ImageFilter.BLUR,
|
2161
|
+
"CONTOUR": ImageFilter.CONTOUR,
|
2162
|
+
"DETAIL": ImageFilter.DETAIL,
|
2163
|
+
"EDGE_ENHANCE": ImageFilter.EDGE_ENHANCE,
|
2164
|
+
"EDGE_ENHANCE_MORE": ImageFilter.EDGE_ENHANCE_MORE,
|
2165
|
+
"EMBOSS": ImageFilter.EMBOSS,
|
2166
|
+
"FIND_EDGES": ImageFilter.FIND_EDGES,
|
2167
|
+
"SHARPEN": ImageFilter.SHARPEN,
|
2168
|
+
"SMOOTH": ImageFilter.SMOOTH,
|
2169
|
+
"SMOOTH_MORE": ImageFilter.SMOOTH_MORE,
|
2170
|
+
"MIN_FILTER": ImageFilter.MinFilter,
|
2171
|
+
"MAX_FILTER": ImageFilter.MaxFilter,
|
2172
|
+
"MODE_FILTER": ImageFilter.ModeFilter,
|
2173
|
+
"MULTIBAND_FILTER": ImageFilter.MultibandFilter,
|
2174
|
+
"GAUSSIAN_BLUR": ImageFilter.GaussianBlur,
|
2175
|
+
"BOX_BLUR": ImageFilter.BoxBlur,
|
2176
|
+
"MEDIAN_FILTER": ImageFilter.MedianFilter,
|
2177
|
+
}
|
2178
|
+
# Check if the filter name is supported
|
2179
|
+
if filter_name not in supported_filters:
|
2180
|
+
raise ValueError(
|
2181
|
+
f"Unsupported filter: {filter_name}, should be one of: {[i.lower() for i in supported_filters.keys()]}"
|
2182
|
+
)
|
2183
|
+
|
2184
|
+
# Apply the filter
|
2185
|
+
if filter_name.upper() in [
|
2186
|
+
"BOX_BLUR",
|
2187
|
+
"GAUSSIAN_BLUR",
|
2188
|
+
"MEDIAN_FILTER",
|
2189
|
+
"MIN_FILTER",
|
2190
|
+
"MAX_FILTER",
|
2191
|
+
"MODE_FILTER",
|
2192
|
+
]:
|
2193
|
+
radius = filter_value if filter_value is not None else 2
|
2194
|
+
return img.filter(supported_filters[filter_name](radius))
|
2195
|
+
elif filter_name in ["MULTIBAND_FILTER"]:
|
2196
|
+
bands = filter_value if filter_value is not None else None
|
2197
|
+
return img.filter(supported_filters[filter_name](bands))
|
2198
|
+
else:
|
2199
|
+
if filter_value is not None:
|
2200
|
+
print(f"{filter_name} doesn't require a value for {filter_value}, but it remains unaffected")
|
2201
|
+
return img.filter(supported_filters[filter_name])
|
2202
|
+
|
2203
|
+
|
2204
|
+
def imgsets(
|
2205
|
+
img,
|
2206
|
+
sets=None,
|
2207
|
+
show=True,
|
2208
|
+
show_axis=False,
|
2209
|
+
size=None,
|
2210
|
+
dpi=100,
|
2211
|
+
figsize=None,
|
2212
|
+
auto=False,
|
2213
|
+
filter_kws=None,
|
2214
|
+
):
|
2215
|
+
"""
|
2216
|
+
Apply various enhancements and filters to an image using PIL's ImageEnhance and ImageFilter modules.
|
2217
|
+
|
2218
|
+
Args:
|
2219
|
+
img (PIL.Image): The input image.
|
2220
|
+
sets (dict): A dictionary specifying the enhancements, filters, and their parameters.
|
2221
|
+
show (bool): Whether to display the enhanced image.
|
2222
|
+
show_axis (bool): Whether to display axes on the image plot.
|
2223
|
+
size (tuple): The size of the thumbnail, cover, contain, or fit operation.
|
2224
|
+
dpi (int): Dots per inch for the displayed image.
|
2225
|
+
figsize (tuple): The size of the figure for displaying the image.
|
2226
|
+
auto (bool): Whether to automatically enhance the image based on its characteristics.
|
2227
|
+
|
2228
|
+
Returns:
|
2229
|
+
PIL.Image: The enhanced image.
|
2230
|
+
|
2231
|
+
Supported enhancements and filters:
|
2232
|
+
- "sharpness": Adjusts the sharpness of the image. Values > 1 increase sharpness, while values < 1 decrease sharpness.
|
2233
|
+
- "contrast": Adjusts the contrast of the image. Values > 1 increase contrast, while values < 1 decrease contrast.
|
2234
|
+
- "brightness": Adjusts the brightness of the image. Values > 1 increase brightness, while values < 1 decrease brightness.
|
2235
|
+
- "color": Adjusts the color saturation of the image. Values > 1 increase saturation, while values < 1 decrease saturation.
|
2236
|
+
- "rotate": Rotates the image by the specified angle.
|
2237
|
+
- "crop" or "cut": Crops the image. The value should be a tuple specifying the crop box as (left, upper, right, lower).
|
2238
|
+
- "size": Resizes the image to the specified dimensions.
|
2239
|
+
- "thumbnail": Resizes the image to fit within the given size while preserving aspect ratio.
|
2240
|
+
- "cover": Resizes and crops the image to fill the specified size.
|
2241
|
+
- "contain": Resizes the image to fit within the specified size, adding borders if necessary.
|
2242
|
+
- "fit": Resizes and pads the image to fit within the specified size.
|
2243
|
+
- "filter": Applies various filters to the image (e.g., BLUR, CONTOUR, EDGE_ENHANCE).
|
2244
|
+
|
2245
|
+
Note:
|
2246
|
+
The "color" and "enhance" enhancements are not implemented in this function.
|
2247
|
+
"""
|
2248
|
+
def confirm_rembg_models(model_name):
|
2249
|
+
models_support = [
|
2250
|
+
"u2net",
|
2251
|
+
"u2netp",
|
2252
|
+
"u2net_human_seg",
|
2253
|
+
"u2net_cloth_seg",
|
2254
|
+
"silueta",
|
2255
|
+
"isnet-general-use",
|
2256
|
+
"isnet-anime",
|
2257
|
+
"sam",
|
2258
|
+
]
|
2259
|
+
if model_name in models_support:
|
2260
|
+
print(f"model_name: {model_name}")
|
2261
|
+
return model_name
|
2262
|
+
else:
|
2263
|
+
print(f"{model_name} cannot be found, check the name:{models_support}, default('isnet-general-use') has been used")
|
2264
|
+
return "isnet-general-use"
|
2265
|
+
def auto_enhance(img):
|
2266
|
+
"""
|
2267
|
+
Automatically enhances the image based on its characteristics.
|
2268
|
+
|
2269
|
+
Args:
|
2270
|
+
img (PIL.Image): The input image.
|
2271
|
+
|
2272
|
+
Returns:
|
2273
|
+
dict: A dictionary containing the optimal enhancement values.
|
2274
|
+
"""
|
2275
|
+
# Determine the bit depth based on the image mode
|
2276
|
+
if img.mode in ["1", "L", "P", "RGB", "YCbCr", "LAB", "HSV"]:
|
2277
|
+
# 8-bit depth per channel
|
2278
|
+
bit_depth = 8
|
2279
|
+
elif img.mode in ["RGBA", "CMYK"]:
|
2280
|
+
# 8-bit depth per channel + alpha (RGBA) or additional channels (CMYK)
|
2281
|
+
bit_depth = 8
|
2282
|
+
elif img.mode in ["I", "F"]:
|
2283
|
+
# 16-bit depth per channel (integer or floating-point)
|
2284
|
+
bit_depth = 16
|
2285
|
+
else:
|
2286
|
+
raise ValueError("Unsupported image mode")
|
2287
|
+
|
2288
|
+
# Calculate the brightness and contrast for each channel
|
2289
|
+
num_channels = len(img.getbands())
|
2290
|
+
brightness_factors = []
|
2291
|
+
contrast_factors = []
|
2292
|
+
for channel in range(num_channels):
|
2293
|
+
channel_histogram = img.split()[channel].histogram()
|
2294
|
+
brightness = sum(i * w for i, w in enumerate(channel_histogram)) / sum(
|
2295
|
+
channel_histogram
|
2296
|
+
)
|
2297
|
+
channel_min, channel_max = img.split()[channel].getextrema()
|
2298
|
+
contrast = channel_max - channel_min
|
2299
|
+
|
2300
|
+
# Adjust calculations based on bit depth
|
2301
|
+
normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
|
2302
|
+
brightness_factor = (
|
2303
|
+
1.0 + (brightness - normalization_factor / 2) / normalization_factor
|
2304
|
+
)
|
2305
|
+
contrast_factor = (
|
2306
|
+
1.0 + (contrast - normalization_factor / 2) / normalization_factor
|
2307
|
+
)
|
2308
|
+
|
2309
|
+
brightness_factors.append(brightness_factor)
|
2310
|
+
contrast_factors.append(contrast_factor)
|
2311
|
+
|
2312
|
+
# Calculate the average brightness and contrast factors across channels
|
2313
|
+
avg_brightness_factor = sum(brightness_factors) / num_channels
|
2314
|
+
avg_contrast_factor = sum(contrast_factors) / num_channels
|
2315
|
+
|
2316
|
+
return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
|
2317
|
+
|
2318
|
+
# Load image if input is a file path
|
2319
|
+
if isinstance(img, str):
|
2320
|
+
img = load_img(img)
|
2321
|
+
|
2322
|
+
img_update = img.copy()
|
2323
|
+
# Auto-enhance image if requested
|
2324
|
+
if auto:
|
2325
|
+
auto_params = auto_enhance(img_update)
|
2326
|
+
sets.update(auto_params)
|
2327
|
+
|
2328
|
+
if sets is None:
|
2329
|
+
sets = {}
|
2330
|
+
for k, value in sets.items():
|
2331
|
+
if "shar" in k.lower():
|
2332
|
+
enhancer = ImageEnhance.Sharpness(img_update)
|
2333
|
+
img_update = enhancer.enhance(value)
|
2334
|
+
elif "col" in k.lower() and 'bg' not in k.lower():
|
2335
|
+
enhancer = ImageEnhance.Color(img_update)
|
2336
|
+
img_update = enhancer.enhance(value)
|
2337
|
+
elif "contr" in k.lower():
|
2338
|
+
if value and isinstance(value,(float,int)):
|
2339
|
+
enhancer = ImageEnhance.Contrast(img_update)
|
2340
|
+
img_update = enhancer.enhance(value)
|
2341
|
+
else:
|
2342
|
+
print('autocontrasted')
|
2343
|
+
img_update = ImageOps.autocontrast(img_update)
|
2344
|
+
elif "bri" in k.lower():
|
2345
|
+
enhancer = ImageEnhance.Brightness(img_update)
|
2346
|
+
img_update = enhancer.enhance(value)
|
2347
|
+
elif "cro" in k.lower() or "cut" in k.lower():
|
2348
|
+
img_update=img_update.crop(value)
|
2349
|
+
elif "rota" in k.lower():
|
2350
|
+
img_update = img_update.rotate(value)
|
2351
|
+
elif "si" in k.lower():
|
2352
|
+
img_update = img_update.resize(value)
|
2353
|
+
elif "thum" in k.lower():
|
2354
|
+
img_update.thumbnail(value)
|
2355
|
+
elif "cover" in k.lower():
|
2356
|
+
img_update = ImageOps.cover(img_update, size=value)
|
2357
|
+
elif "contain" in k.lower():
|
2358
|
+
img_update = ImageOps.contain(img_update, size=value)
|
2359
|
+
elif "fit" in k.lower():
|
2360
|
+
img_update = ImageOps.fit(img_update, size=value)
|
2361
|
+
elif "pad" in k.lower():
|
2362
|
+
img_update = ImageOps.pad(img_update, size=value)
|
2363
|
+
elif 'rem' in k.lower() or 'rm' in k.lower() or 'back' in k.lower():
|
2364
|
+
if value and isinstance(value,(int,float,list)):
|
2365
|
+
print('example usage: {"rm":[alpha_matting_background_threshold(20),alpha_matting_foreground_threshold(270),alpha_matting_erode_sive(11)]}')
|
2366
|
+
print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
|
2367
|
+
# ### Parameters:
|
2368
|
+
# data (Union[bytes, PILImage, np.ndarray]): The input image data.
|
2369
|
+
# alpha_matting (bool, optional): Flag indicating whether to use alpha matting. Defaults to False.
|
2370
|
+
# alpha_matting_foreground_threshold (int, optional): Foreground threshold for alpha matting. Defaults to 240.
|
2371
|
+
# alpha_matting_background_threshold (int, optional): Background threshold for alpha matting. Defaults to 10.
|
2372
|
+
# alpha_matting_erode_size (int, optional): Erosion size for alpha matting. Defaults to 10.
|
2373
|
+
# session (Optional[BaseSession], optional): A session object for the 'u2net' model. Defaults to None.
|
2374
|
+
# only_mask (bool, optional): Flag indicating whether to return only the binary masks. Defaults to False.
|
2375
|
+
# post_process_mask (bool, optional): Flag indicating whether to post-process the masks. Defaults to False.
|
2376
|
+
# bgcolor (Optional[Tuple[int, int, int, int]], optional): Background color for the cutout image. Defaults to None.
|
2377
|
+
# ###
|
2378
|
+
if isinstance(value,int):
|
2379
|
+
value=[value]
|
2380
|
+
if len(value) <2:
|
2381
|
+
img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value)
|
2382
|
+
elif 2<=len(value)<3:
|
2383
|
+
img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1])
|
2384
|
+
elif 3<=len(value)<4:
|
2385
|
+
img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1],alpha_matting_erode_size=value[2])
|
2386
|
+
if isinstance(value,tuple): # replace the background color
|
2387
|
+
if len(value)==3:
|
2388
|
+
value+=(255,)
|
2389
|
+
img_update = remove(img_update, bgcolor=value)
|
2390
|
+
if isinstance(value,str):
|
2391
|
+
if confirm_rembg_models(value):
|
2392
|
+
img_update=remove(img_update,session=new_session(value))
|
2393
|
+
else:
|
2394
|
+
img_update=remove(img_update)
|
2395
|
+
elif 'bgcolor' in k.lower():
|
2396
|
+
if isinstance(value,list):
|
2397
|
+
value=tuple(value)
|
2398
|
+
if isinstance(value,tuple): # replace the background color
|
2399
|
+
if len(value)==3:
|
2400
|
+
value+=(255,)
|
2401
|
+
img_update = remove(img_update, bgcolor=value)
|
2402
|
+
|
2403
|
+
if filter_kws:
|
2404
|
+
for filter_name, filter_value in filter_kws.items():
|
2405
|
+
img_update = apply_filter(img_update, filter_name, filter_value)
|
2406
|
+
|
2407
|
+
|
2408
|
+
# Display the image if requested
|
2409
|
+
if show:
|
2410
|
+
if figsize is None:
|
2411
|
+
plt.figure(dpi=dpi)
|
2412
|
+
else:
|
2413
|
+
plt.figure(figsize=figsize, dpi=dpi)
|
2414
|
+
plt.imshow(img_update)
|
2415
|
+
plt.axis("on") if show_axis else plt.axis("off")
|
2416
|
+
|
2417
|
+
return img_update
|
2418
|
+
# # usage:
|
2419
|
+
# img = imgsets(
|
2420
|
+
# fpath,
|
2421
|
+
# sets={"rota": -5},
|
2422
|
+
# dpi=200,
|
2423
|
+
# filter_kws={"EMBOSS": 5, "sharpen": 5, "EDGE_ENHANCE_MORE": 10},
|
2424
|
+
# show_axis=True,
|
2425
|
+
# )
|
2426
|
+
|
2427
|
+
def figsets(*args):
|
2428
|
+
fig = plt.gcf()
|
2429
|
+
fontsize = 11
|
2430
|
+
fontname = "Arial"
|
2431
|
+
sns_themes = ["white", "whitegrid", "dark", "darkgrid", "ticks"]
|
2432
|
+
sns_contexts = ["notebook", "talk", "poster"] # now available "paper"
|
2433
|
+
scienceplots_styles = ["science","nature",
|
2434
|
+
"scatter","ieee","no-latex","std-colors","high-vis","bright","dark_background","science",
|
2435
|
+
"high-vis","vibrant","muted","retro","grid","high-contrast","light","cjk-tc-font","cjk-kr-font",
|
2436
|
+
]
|
2437
|
+
|
2438
|
+
def sets_priority(ax,key, value):
|
2439
|
+
if ("fo" in key) and (("size" in key) or ("sz" in key)):
|
2440
|
+
fontsize=value
|
2441
|
+
plt.rcParams.update({"font.size": value})
|
2442
|
+
# style
|
2443
|
+
if "st" in key.lower() or "th" in key.lower():
|
2444
|
+
if isinstance(value, str):
|
2445
|
+
if (value in plt.style.available) or (value in scienceplots_styles):
|
2446
|
+
plt.style.use(value)
|
2447
|
+
elif value in sns_themes:
|
2448
|
+
sns.set_style(value)
|
2449
|
+
elif value in sns_contexts:
|
2450
|
+
sns.set_context(value)
|
2451
|
+
else:
|
2452
|
+
print(
|
2453
|
+
f"\nWarning\n'{value}' is not a plt.style,select on below:\n{plt.style.available+sns_themes+sns_contexts+scienceplots_styles}"
|
2454
|
+
)
|
2455
|
+
if isinstance(value, list):
|
2456
|
+
for i in value:
|
2457
|
+
if (i in plt.style.available) or (i in scienceplots_styles):
|
2458
|
+
plt.style.use(i)
|
2459
|
+
elif i in sns_themes:
|
2460
|
+
sns.set_style(i)
|
2461
|
+
elif i in sns_contexts:
|
2462
|
+
sns.set_context(i)
|
2463
|
+
else:
|
2464
|
+
print(
|
2465
|
+
f"\nWarning\n'{i}' is not a plt.style,select on below:\n{plt.style.available+sns_themes+sns_contexts+scienceplots_styles}"
|
2466
|
+
)
|
2467
|
+
if "la" in key.lower():
|
2468
|
+
if "loc" in key.lower() or "po" in key.lower():
|
2469
|
+
for i in value:
|
2470
|
+
if "l" in i.lower():
|
2471
|
+
ax.yaxis.set_label_position("left")
|
2472
|
+
if "r" in i.lower():
|
2473
|
+
ax.yaxis.set_label_position("right")
|
2474
|
+
if "t" in i.lower():
|
2475
|
+
ax.xaxis.set_label_position("top")
|
2476
|
+
if "b" in i.lower():
|
2477
|
+
ax.xaxis.set_label_position("bottom")
|
2478
|
+
if ("x" in key.lower()) and (
|
2479
|
+
"tic" not in key.lower() and "tk" not in key.lower()
|
2480
|
+
):
|
2481
|
+
ax.set_xlabel(value, fontname=fontname)
|
2482
|
+
if ("y" in key.lower()) and (
|
2483
|
+
"tic" not in key.lower() and "tk" not in key.lower()
|
2484
|
+
):
|
2485
|
+
ax.set_ylabel(value, fontname=fontname)
|
2486
|
+
if ("z" in key.lower()) and (
|
2487
|
+
"tic" not in key.lower() and "tk" not in key.lower()
|
2488
|
+
):
|
2489
|
+
ax.set_zlabel(value, fontname=fontname)
|
2490
|
+
# tick location
|
2491
|
+
if "tic" in key.lower() or "tk" in key.lower():
|
2492
|
+
if ("loc" in key.lower()) or ("po" in key.lower()):
|
2493
|
+
if isinstance(value, (str, list)):
|
2494
|
+
loc = []
|
2495
|
+
for i in value:
|
2496
|
+
if ("l" in i.lower()) and ("a" not in i.lower()):
|
2497
|
+
ax.yaxis.set_ticks_position("left")
|
2498
|
+
if "r" in i.lower():
|
2499
|
+
ax.yaxis.set_ticks_position("right")
|
2500
|
+
if "t" in i.lower():
|
2501
|
+
ax.xaxis.set_ticks_position("top")
|
2502
|
+
if "b" in i.lower():
|
2503
|
+
ax.xaxis.set_ticks_position("bottom")
|
2504
|
+
if i.lower() in ["a", "both", "all", "al", ":"]:
|
2505
|
+
ax.xaxis.set_ticks_position("both")
|
2506
|
+
ax.yaxis.set_ticks_position("both")
|
2507
|
+
if i.lower() in ["xnone",'xoff',"none"]:
|
2508
|
+
ax.xaxis.set_ticks_position("none")
|
2509
|
+
if i.lower() in ["ynone",'yoff','none']:
|
2510
|
+
ax.yaxis.set_ticks_position("none")
|
2511
|
+
# ticks / labels
|
2512
|
+
elif "x" in key.lower():
|
2513
|
+
if "la" not in key.lower():
|
2514
|
+
ax.set_xticks(value)
|
2515
|
+
if "la" in key.lower():
|
2516
|
+
ax.set_xticklabels(value)
|
2517
|
+
elif "y" in key.lower():
|
2518
|
+
if "la" not in key.lower():
|
2519
|
+
ax.set_yticks(value)
|
2520
|
+
if "la" in key.lower():
|
2521
|
+
ax.set_yticklabels(value)
|
2522
|
+
elif "z" in key.lower():
|
2523
|
+
if "la" not in key.lower():
|
2524
|
+
ax.set_zticks(value)
|
2525
|
+
if "la" in key.lower():
|
2526
|
+
ax.set_zticklabels(value)
|
2527
|
+
# rotation
|
2528
|
+
if "angle" in key.lower() or ("rot" in key.lower()):
|
2529
|
+
if "x" in key.lower():
|
2530
|
+
ax.tick_params(axis="x", rotation=value)
|
2531
|
+
if "y" in key.lower():
|
2532
|
+
ax.tick_params(axis="y", rotation=value)
|
2533
|
+
|
2534
|
+
if "bo" in key in key: # and ("p" in key or "l" in key):
|
2535
|
+
# print("'ticks' style is recommended")
|
2536
|
+
if isinstance(value, (str, list)):
|
2537
|
+
locations = []
|
2538
|
+
for i in value:
|
2539
|
+
if "l" in i.lower():
|
2540
|
+
locations.append("left")
|
2541
|
+
if "r" in i.lower():
|
2542
|
+
locations.append("right")
|
2543
|
+
if "t" in i.lower():
|
2544
|
+
locations.append("top")
|
2545
|
+
if "b" in i.lower():
|
2546
|
+
locations.append("bottom")
|
2547
|
+
if i.lower() in ["a", "both", "all", "al", ":"]:
|
2548
|
+
[
|
2549
|
+
locations.append(x)
|
2550
|
+
for x in ["left", "right", "top", "bottom"]
|
2551
|
+
]
|
2552
|
+
for i in value:
|
2553
|
+
if i.lower() in "none":
|
2554
|
+
locations = []
|
2555
|
+
# check spines
|
2556
|
+
for loc, spi in ax.spines.items():
|
2557
|
+
if loc in locations:
|
2558
|
+
spi.set_position(("outward", 0))
|
2559
|
+
else:
|
2560
|
+
spi.set_color("none") # no spine
|
2561
|
+
if key == "tick" or key == "ticks" or key == "ticks_para":
|
2562
|
+
if isinstance(value, dict):
|
2563
|
+
for k, val in value.items():
|
2564
|
+
if "wh" in k.lower():
|
2565
|
+
ax.tick_params(
|
2566
|
+
which=val
|
2567
|
+
) # {'major', 'minor', 'both'}, default: 'major'
|
2568
|
+
elif "dir" in k.lower():
|
2569
|
+
ax.tick_params(direction=val) # {'in', 'out', 'inout'}
|
2570
|
+
elif "len" in k.lower():
|
2571
|
+
ax.tick_params(length=val)
|
2572
|
+
elif ("wid" in k.lower()) or ("wd" in k.lower()):
|
2573
|
+
ax.tick_params(width=val)
|
2574
|
+
elif "ax" in k.lower():
|
2575
|
+
ax.tick_params(axis=val) # {'x', 'y', 'both'}, default: 'both'
|
2576
|
+
elif ("c" in k.lower()) and ("ect" not in k.lower()):
|
2577
|
+
ax.tick_params(colors=val) # Tick color.
|
2578
|
+
elif "pad" in k.lower():
|
2579
|
+
ax.tick_params(
|
2580
|
+
pad=val
|
2581
|
+
) # float, distance in points between tick and label
|
2582
|
+
elif (
|
2583
|
+
("lab" in k.lower())
|
2584
|
+
and ("s" in k.lower())
|
2585
|
+
and ("z" in k.lower())
|
2586
|
+
):
|
2587
|
+
ax.tick_params(
|
2588
|
+
labelsize=val
|
2589
|
+
) # float, distance in points between tick and label
|
2590
|
+
|
2591
|
+
if "mi" in key.lower() and "tic" in key.lower():
|
2592
|
+
if "x" in value.lower() or "x" in key.lower():
|
2593
|
+
ax.xaxis.set_minor_locator(tck.AutoMinorLocator()) # ax.minorticks_on()
|
2594
|
+
if "y" in value.lower() or "y" in key.lower():
|
2595
|
+
ax.yaxis.set_minor_locator(
|
2596
|
+
tck.AutoMinorLocator()
|
2597
|
+
) # ax.minorticks_off()
|
2598
|
+
if value.lower() in ["both", ":", "all", "a", "b", "on"]:
|
2599
|
+
ax.minorticks_on()
|
2600
|
+
if key == "colormap" or key == "cmap":
|
2601
|
+
plt.set_cmap(value)
|
2602
|
+
def sets_small(ax,key, value):
|
2603
|
+
if key == "figsize":
|
2604
|
+
pass
|
2605
|
+
if key == "xlim":
|
2606
|
+
ax.set_xlim(value)
|
2607
|
+
if key == "ylim":
|
2608
|
+
ax.set_ylim(value)
|
2609
|
+
if key == "zlim":
|
2610
|
+
ax.set_zlim(value)
|
2611
|
+
if "sc" in key.lower():
|
2612
|
+
if "x" in key.lower():
|
2613
|
+
ax.set_xscale(value)
|
2614
|
+
if "y" in key.lower():
|
2615
|
+
ax.set_yscale(value)
|
2616
|
+
if "z" in key.lower():
|
2617
|
+
ax.set_zscale(value)
|
2618
|
+
if key == "grid":
|
2619
|
+
if isinstance(value, dict):
|
2620
|
+
for k, val in value.items():
|
2621
|
+
if "wh" in k.lower():
|
2622
|
+
ax.grid(
|
2623
|
+
which=val
|
2624
|
+
) # {'major', 'minor', 'both'}, default: 'major'
|
2625
|
+
elif "ax" in k.lower():
|
2626
|
+
ax.grid(axis=val) # {'x', 'y', 'both'}, default: 'both'
|
2627
|
+
elif ("c" in k.lower()) and ("ect" not in k.lower()):
|
2628
|
+
ax.grid(color=val) # Tick color.
|
2629
|
+
elif "l" in k.lower() and ("s" in k.lower()):
|
2630
|
+
ax.grid(linestyle=val)
|
2631
|
+
elif "l" in k.lower() and ("w" in k.lower()):
|
2632
|
+
ax.grid(linewidth=val)
|
2633
|
+
elif "al" in k.lower():
|
2634
|
+
ax.grid(alpha=val)
|
2635
|
+
else:
|
2636
|
+
if value == "on" or value is True:
|
2637
|
+
ax.grid(visible=True)
|
2638
|
+
elif value == "off" or value is False:
|
2639
|
+
ax.grid(visible=False)
|
2640
|
+
if "tit" in key.lower():
|
2641
|
+
if "sup" in key.lower():
|
2642
|
+
plt.suptitle(value)
|
2643
|
+
else:
|
2644
|
+
ax.set_title(value)
|
2645
|
+
if key.lower() in ["spine", "adjust", "ad", "sp", "spi", "adj","spines"]:
|
2646
|
+
if isinstance(value, bool) or (value in ["go", "do", "ja", "yes"]):
|
2647
|
+
if value:
|
2648
|
+
adjust_spines(ax) # dafault distance=2
|
2649
|
+
if isinstance(value, (float, int)):
|
2650
|
+
adjust_spines(ax=ax, distance=value)
|
2651
|
+
if "c" in key.lower() and ("sp" in key.lower() or "ax" in key.lower()):
|
2652
|
+
for loc, spi in ax.spines.items():
|
2653
|
+
spi.set_color(value)
|
2654
|
+
|
2655
|
+
for arg in args:
|
2656
|
+
if isinstance(arg,matplotlib.axes._axes.Axes):
|
2657
|
+
ax=arg
|
2658
|
+
args=args[1:]
|
2659
|
+
if 'ax' not in locals():
|
2660
|
+
ax=plt.gca()
|
2661
|
+
|
2662
|
+
for arg in args:
|
2663
|
+
if isinstance(arg, dict):
|
2664
|
+
for k, val in arg.items():
|
2665
|
+
sets_priority(ax,k, val)
|
2666
|
+
for k, val in arg.items():
|
2667
|
+
sets_small(ax,k, val)
|
2668
|
+
else:
|
2669
|
+
Nargin = len(args) // 2
|
2670
|
+
ax.labelFontSizeMultiplier = 1
|
2671
|
+
ax.titleFontSizeMultiplier = 1
|
2672
|
+
ax.set_facecolor("w")
|
2673
|
+
|
2674
|
+
for ip in range(Nargin):
|
2675
|
+
key = args[ip * 2].lower()
|
2676
|
+
value = args[ip * 2 + 1]
|
2677
|
+
sets_priority(ax,key, value)
|
2678
|
+
for ip in range(Nargin):
|
2679
|
+
key = args[ip * 2].lower()
|
2680
|
+
value = args[ip * 2 + 1]
|
2681
|
+
sets_small(ax,key, value)
|
2682
|
+
colors = [
|
2683
|
+
"#474747",
|
2684
|
+
"#FF2C00",
|
2685
|
+
"#0C5DA5",
|
2686
|
+
"#845B97",
|
2687
|
+
"#58BBCC",
|
2688
|
+
"#FF9500",
|
2689
|
+
"#D57DBE",
|
2690
|
+
]
|
2691
|
+
matplotlib.rcParams["axes.prop_cycle"] = cycler(color=colors)
|
2692
|
+
if len(fig.get_axes()) > 1:
|
2693
|
+
plt.tight_layout()
|
2694
|
+
plt.gcf().align_labels()
|
2695
|
+
|
2696
|
+
def read_mplstyle(style_file):
|
2697
|
+
# Load the style file
|
2698
|
+
plt.style.use(style_file)
|
2699
|
+
|
2700
|
+
# Get the current style properties
|
2701
|
+
style_dict = plt.rcParams
|
2702
|
+
|
2703
|
+
# Convert to dictionary
|
2704
|
+
style_dict = dict(style_dict)
|
2705
|
+
# Print the style dictionary
|
2706
|
+
for i, j in style_dict.items():
|
2707
|
+
print(f"\n{i}::::{j}")
|
2708
|
+
return style_dict
|
2709
|
+
# #example usage:
|
2710
|
+
# style_file = "/ std-colors.mplstyle"
|
2711
|
+
# style_dict = read_mplstyle(style_file)
|
2712
|
+
|
2713
|
+
|
2714
|
+
# search and fine the director of the libary, which installed at local
|
2715
|
+
def dir_lib(lib_oi):
|
2716
|
+
import site
|
2717
|
+
|
2718
|
+
# Get the site-packages directory
|
2719
|
+
f = listdir(site.getsitepackages()[0], "folder")
|
2720
|
+
|
2721
|
+
# Find Seaborn directory within site-packages
|
2722
|
+
dir_list = []
|
2723
|
+
for directory in f.fpath:
|
2724
|
+
if lib_oi in directory.lower():
|
2725
|
+
dir_list.append(directory)
|
2726
|
+
|
2727
|
+
if dir_list != []:
|
2728
|
+
print(f"{lib_oi} directory:", dir_list)
|
2729
|
+
else:
|
2730
|
+
print(f"Cannot find the {lib_oi} in site-packages directory.")
|
2731
|
+
return dir_list
|
2732
|
+
# example usage:
|
2733
|
+
# dir_lib("seaborn")
|
2734
|
+
|
2735
|
+
# set up the colorlist, give the number, or the colormap's name
|
2736
|
+
def get_color(n=1, cmap="auto", how="start"):
|
2737
|
+
# Extract the colormap as a list
|
2738
|
+
def cmap2hex(cmap_name):
|
2739
|
+
cmap_ = matplotlib.pyplot.get_cmap(cmap_name)
|
2740
|
+
colors = [cmap_(i) for i in range(cmap_.N)]
|
2741
|
+
return [matplotlib.colors.rgb2hex(color) for color in colors]
|
2742
|
+
# usage: clist = cmap2hex("viridis")
|
2743
|
+
# cycle times, total number is n (defaultn=10)
|
2744
|
+
def cycle2list(colorlist, n=10):
|
2745
|
+
cycler_ = cycler(tmp=colorlist)
|
2746
|
+
clist = []
|
2747
|
+
for i, c_ in zip(range(n), cycler_()):
|
2748
|
+
clist.append(c_["tmp"])
|
2749
|
+
if i > n:
|
2750
|
+
break
|
2751
|
+
return clist
|
2752
|
+
def hue2rgb(hex_colors):
|
2753
|
+
def hex_to_rgb(hex_color):
|
2754
|
+
"""Converts a hexadecimal color code to RGB values."""
|
2755
|
+
if hex_colors.startswith("#"):
|
2756
|
+
hex_color = hex_color.lstrip("#")
|
2757
|
+
return tuple(int(hex_color[i : i + 2], 16) / 255.0 for i in (0, 2, 4))
|
2758
|
+
if isinstance(hex_colors, str):
|
2759
|
+
return hex_to_rgb(hex_colors)
|
2760
|
+
elif isinstance(hex_colors, (list)):
|
2761
|
+
"""Converts a list of hexadecimal color codes to a list of RGB values."""
|
2762
|
+
rgb_values = [hex_to_rgb(hex_color) for hex_color in hex_colors]
|
2763
|
+
return rgb_values
|
2764
|
+
if "aut" in cmap:
|
2765
|
+
colorlist = [
|
2766
|
+
"#474747",
|
2767
|
+
"#FF2C00",
|
2768
|
+
"#0C5DA5",
|
2769
|
+
"#845B97",
|
2770
|
+
"#58BBCC",
|
2771
|
+
"#FF9500",
|
2772
|
+
"#D57DBE",
|
2773
|
+
]
|
2774
|
+
else:
|
2775
|
+
colorlist = cmap2hex(cmap)
|
2776
|
+
if "st" in how.lower() or "be" in how.lower():
|
2777
|
+
# cycle it
|
2778
|
+
clist = cycle2list(colorlist, n=n)
|
2779
|
+
if "l" in how.lower() or "p" in how.lower():
|
2780
|
+
clist = []
|
2781
|
+
[
|
2782
|
+
clist.append(colorlist[i])
|
2783
|
+
for i in [int(i) for i in np.linspace(0, len(colorlist) - 1, n)]
|
2784
|
+
]
|
2785
|
+
|
2786
|
+
return clist # a color list
|
2787
|
+
# example usage: clist = get_color(4,cmap="auto", how="start") # get_color(4, cmap="hot", how="linspace")
|
2788
|
+
|
2789
|
+
"""
|
2790
|
+
# n = 7
|
2791
|
+
# clist = get_color(n, cmap="auto", how="linspace") # get_color(100)
|
2792
|
+
# plt.figure(figsize=[8, 5], dpi=100)
|
2793
|
+
# x = np.linspace(0, 2 * np.pi, 50) * 100
|
2794
|
+
# y = np.sin(x)
|
2795
|
+
# for i in range(1, n + 1):
|
2796
|
+
# plt.plot(x, y + i, c=clist[i - 1], lw=5, label=str(i))
|
2797
|
+
# plt.legend()
|
2798
|
+
# plt.ylim(-2, 20)
|
2799
|
+
# figsets(plt.gca(), {"style": "whitegrid"}) """
|
2800
|
+
|
2801
|
+
|
2802
|
+
class FileInfo:
|
2803
|
+
def __init__(self, size, creation_time, ctime, mod_time, mtime, parent_dir, fname, kind, extra_info=None):
|
2804
|
+
self.size = size
|
2805
|
+
self.creation_time = creation_time
|
2806
|
+
self.ctime = ctime
|
2807
|
+
self.mod_time = mod_time
|
2808
|
+
self.mtime = mtime
|
2809
|
+
self.parent_dir = parent_dir
|
2810
|
+
self.fname = fname
|
2811
|
+
self.kind = kind
|
2812
|
+
if extra_info:
|
2813
|
+
for key, value in extra_info.items():
|
2814
|
+
setattr(self, key, value)
|
2815
|
+
print("to show the res: 'finfo(fpath).show()'")
|
2816
|
+
|
2817
|
+
def __repr__(self):
|
2818
|
+
return (f"FileInfo(size={self.size} MB, creation_time='{self.creation_time}', "
|
2819
|
+
f"ctime='{self.ctime}', mod_time='{self.mod_time}', mtime='{self.mtime}', "
|
2820
|
+
f"parent_dir='{self.parent_dir}', fname='{self.fname}', kind='{self.kind}')")
|
2821
|
+
|
2822
|
+
def __str__(self):
|
2823
|
+
return (f"FileInfo:\n"
|
2824
|
+
f" Size: {self.size} MB\n"
|
2825
|
+
f" Creation Time: {self.creation_time}\n"
|
2826
|
+
f" CTime: {self.ctime}\n"
|
2827
|
+
f" Modification Time: {self.mod_time}\n"
|
2828
|
+
f" MTime: {self.mtime}\n"
|
2829
|
+
f" Parent Directory: {self.parent_dir}\n"
|
2830
|
+
f" File Name: {self.fname}\n"
|
2831
|
+
f" Kind: {self.kind}")
|
2832
|
+
def show(self):
|
2833
|
+
# Convert the object to a dictionary
|
2834
|
+
return {
|
2835
|
+
"size": self.size,
|
2836
|
+
"creation_time": self.creation_time,
|
2837
|
+
"ctime": self.ctime,
|
2838
|
+
"mod_time": self.mod_time,
|
2839
|
+
"mtime": self.mtime,
|
2840
|
+
"parent_dir": self.parent_dir,
|
2841
|
+
"fname": self.fname,
|
2842
|
+
"kind": self.kind,
|
2843
|
+
**{key: getattr(self, key) for key in vars(self) if key not in ["size", "creation_time", "ctime", "mod_time", "mtime", "parent_dir", "fname", "kind"]}
|
2844
|
+
}
|