py2ls 0.1.4.7__py3-none-any.whl → 0.1.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/COMMIT_EDITMSG +1 -1
- py2ls/.git/FETCH_HEAD +1 -1
- py2ls/.git/config +1 -0
- py2ls/.git/index +0 -0
- py2ls/.git/logs/HEAD +3 -0
- py2ls/.git/logs/refs/heads/main +3 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +5 -0
- py2ls/.git/logs/refs/remotes/origin/main +3 -0
- py2ls/.git/objects/01/d5bd8065e6860c0bd23ff9fa57161806a099e1 +0 -0
- py2ls/.git/objects/09/08da26de58c114225ad81f484b80bf5d351b34 +0 -0
- py2ls/.git/objects/1c/3f92adda34344bcbbbf9d409c79855ae2aaea8 +2 -0
- py2ls/.git/objects/32/fd627b62fad7cf3b2f9e34ab9777126a0987ad +0 -0
- py2ls/.git/objects/39/7ead045fbbcfb17c62019eb18fe21ed05dbee5 +0 -0
- py2ls/.git/objects/4f/7afb40dff2153d857fc85748c2eecb85125042 +0 -0
- py2ls/.git/objects/62/4488173ed2c8936fa5cea3cf5dd3f26a30b86e +0 -0
- py2ls/.git/objects/6d/ee29dbdcc84edeeacede105110446f3ccac963 +0 -0
- py2ls/.git/objects/b7/2c9e75ab7d0afe594664650aa8f6c772f5ac64 +0 -0
- py2ls/.git/objects/bb/81ccc0513f18fc160b54a82861e9a80d23f4f6 +0 -0
- py2ls/.git/objects/cd/822b3574a88ebdd1ed82fd6983f37e626d52b4 +0 -0
- py2ls/.git/objects/d8/4688b54c0040a30976b3a6540bc47adf7ce680 +0 -0
- py2ls/.git/objects/f1/e50757fddc28b445545dc7e2759b54cdd0f42e +0 -0
- py2ls/.git/refs/heads/main +1 -1
- py2ls/.git/refs/remotes/origin/main +1 -1
- py2ls/__init__.py +1 -1
- py2ls/data/.DS_Store +0 -0
- py2ls/data/db2ls_sql_chtsht.json +39 -0
- py2ls/data/lang_code_iso639.json +97 -0
- py2ls/db2ls.py +356 -0
- py2ls/ips.py +542 -226
- py2ls/netfinder.py +452 -128
- py2ls/translator.py +80 -122
- {py2ls-0.1.4.7.dist-info → py2ls-0.1.4.9.dist-info}/METADATA +1 -1
- {py2ls-0.1.4.7.dist-info → py2ls-0.1.4.9.dist-info}/RECORD +34 -18
- {py2ls-0.1.4.7.dist-info → py2ls-0.1.4.9.dist-info}/WHEEL +1 -1
- py2ls/dbhandler.py +0 -97
py2ls/ips.py
CHANGED
@@ -1,60 +1,294 @@
|
|
1
1
|
from scipy.ndimage import convolve1d
|
2
|
+
from scipy.signal import savgol_filter
|
3
|
+
import pingouin as pg
|
4
|
+
from scipy import stats
|
5
|
+
|
2
6
|
import numpy as np
|
3
7
|
import pandas as pd
|
8
|
+
|
4
9
|
import json
|
5
|
-
import matplotlib.pyplot as plt
|
6
|
-
import seaborn as sns
|
7
|
-
# import scienceplots
|
8
10
|
import matplotlib
|
9
|
-
import
|
10
|
-
import os
|
11
|
-
from scipy.signal import savgol_filter
|
12
|
-
import pingouin as pg
|
13
|
-
from scipy import stats
|
11
|
+
import matplotlib.pyplot as plt
|
14
12
|
import matplotlib.ticker as tck
|
13
|
+
from mpl_toolkits.mplot3d import Axes3D
|
14
|
+
import seaborn as sns
|
15
|
+
|
16
|
+
import sys, os,shutil,re, yaml,json
|
15
17
|
from cycler import cycler
|
16
|
-
import
|
18
|
+
import time
|
19
|
+
from dateutil import parser
|
20
|
+
from datetime import datetime
|
21
|
+
|
17
22
|
from PIL import Image,ImageEnhance, ImageOps,ImageFilter
|
18
23
|
from rembg import remove,new_session
|
19
|
-
|
24
|
+
|
20
25
|
import docx
|
21
|
-
import pandas as pd
|
22
26
|
from fpdf import FPDF
|
23
|
-
import yaml
|
24
27
|
from lxml import etree
|
25
28
|
from docx import Document
|
26
29
|
from PyPDF2 import PdfReader
|
27
30
|
from pdf2image import convert_from_path, pdfinfo_from_path
|
28
|
-
from nltk.tokenize import sent_tokenize,word_tokenize
|
31
|
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
29
32
|
import nltk # nltk.download("punkt")
|
30
33
|
from docx2pdf import convert
|
31
34
|
import img2pdf as image2pdf
|
32
|
-
import
|
35
|
+
import nbformat
|
36
|
+
from nbconvert import MarkdownExporter
|
37
|
+
|
33
38
|
from itertools import pairwise
|
34
|
-
import time
|
35
39
|
from box import Box, BoxList
|
36
40
|
from numerizer import numerize
|
37
41
|
from tqdm import tqdm
|
38
42
|
import mimetypes
|
39
43
|
from pprint import pp
|
40
|
-
from
|
41
|
-
from
|
44
|
+
from collections import Counter
|
45
|
+
from fuzzywuzzy import fuzz,process
|
46
|
+
from langdetect import detect
|
47
|
+
from duckduckgo_search import DDGS
|
42
48
|
|
43
|
-
|
49
|
+
from py2ls import netfinder
|
50
|
+
|
51
|
+
dir_save='/Users/macjianfeng/Dropbox/Downloads/'
|
52
|
+
|
53
|
+
def rm_folder(folder_path, verbose=True):
|
54
|
+
try:
|
55
|
+
shutil.rmtree(folder_path)
|
56
|
+
if verbose:
|
57
|
+
print(f'Successfully deleted {folder_path}')
|
58
|
+
except Exception as e:
|
59
|
+
if verbose:
|
60
|
+
print(f'Failed to delete {folder_path}. Reason: {e}')
|
61
|
+
|
62
|
+
def fremove(path, verbose=True):
|
44
63
|
"""
|
45
|
-
|
64
|
+
Remove a folder and all its contents or a single file.
|
46
65
|
Parameters:
|
47
|
-
|
66
|
+
path (str): The path to the folder or file to remove.
|
67
|
+
verbose (bool): If True, print success or failure messages. Default is True.
|
68
|
+
"""
|
69
|
+
try:
|
70
|
+
if os.path.isdir(path):
|
71
|
+
shutil.rmtree(path)
|
72
|
+
if verbose:
|
73
|
+
print(f'Successfully deleted folder {path}')
|
74
|
+
elif os.path.isfile(path):
|
75
|
+
os.remove(path)
|
76
|
+
if verbose:
|
77
|
+
print(f'Successfully deleted file {path}')
|
78
|
+
else:
|
79
|
+
if verbose:
|
80
|
+
print(f'Path {path} does not exist')
|
81
|
+
except Exception as e:
|
82
|
+
if verbose:
|
83
|
+
print(f'Failed to delete {path}. Reason: {e}')
|
84
|
+
|
85
|
+
|
86
|
+
def get_cwd(verbose:bool = True):
|
87
|
+
"""
|
88
|
+
get_cwd: to get the current working directory
|
89
|
+
Args:
|
90
|
+
verbose (bool, optional): to show which function is use. Defaults to True.
|
91
|
+
"""
|
92
|
+
try:
|
93
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
94
|
+
if verbose:
|
95
|
+
print("os.path.dirname(os.path.abspath(__file__)):", script_dir)
|
96
|
+
except NameError:
|
97
|
+
# This works in an interactive environment (like a Jupyter notebook)
|
98
|
+
script_dir = os.getcwd()
|
99
|
+
if verbose:
|
100
|
+
print("os.getcwd():", script_dir)
|
101
|
+
|
102
|
+
def search(query, limit=5, kind='text', output='df',verbose=False,download=True, dir_save=dir_save):
|
103
|
+
from duckduckgo_search import DDGS
|
104
|
+
if 'te' in kind.lower():
|
105
|
+
results = DDGS().text(query, max_results=limit)
|
106
|
+
res=pd.DataFrame(results)
|
107
|
+
res.rename(columns={"href":"links"},inplace=True)
|
108
|
+
if verbose:
|
109
|
+
print(f'searching "{query}": got the results below\n{res}')
|
110
|
+
if download:
|
111
|
+
try:
|
112
|
+
netfinder.downloader(url=res.links.tolist(), dir_save=dir_save, verbose=verbose)
|
113
|
+
except:
|
114
|
+
if verbose:
|
115
|
+
print(f"failed link")
|
116
|
+
return res
|
117
|
+
|
118
|
+
def echo(*args,**kwargs):
|
119
|
+
"""
|
120
|
+
query, model="gpt", verbose=True, log=True, dir_save=dir_save
|
121
|
+
a ai chat tool
|
122
|
+
Args:
|
123
|
+
query (str): _description_
|
124
|
+
model (str, optional): _description_. Defaults to "gpt".
|
125
|
+
verbose (bool, optional): _description_. Defaults to True.
|
126
|
+
log (bool, optional): _description_. Defaults to True.
|
127
|
+
dir_save (str, path, optional): _description_. Defaults to dir_save.
|
128
|
+
|
48
129
|
Returns:
|
49
|
-
|
130
|
+
str: the answer from ai
|
50
131
|
"""
|
132
|
+
global dir_save
|
133
|
+
|
134
|
+
query=None
|
135
|
+
model=kwargs.get('model', 'gpt')
|
136
|
+
verbose=kwargs.get('verbose', True)
|
137
|
+
log=kwargs.get('log', True)
|
138
|
+
dir_save=kwargs.get('dir_save', dir_save)
|
139
|
+
for arg in args:
|
140
|
+
if isinstance(arg, str):
|
141
|
+
if os.path.isdir(arg):
|
142
|
+
dir_save = arg
|
143
|
+
# elif os.path.isfile(arg):
|
144
|
+
# dir_save = dirname(arg)
|
145
|
+
elif len(arg) <= 5:
|
146
|
+
model = arg
|
147
|
+
else:
|
148
|
+
query = arg
|
149
|
+
elif isinstance(arg, dict):
|
150
|
+
verbose = arg.get("verbose", verbose)
|
151
|
+
log = arg.get("log", log)
|
152
|
+
def is_in_any(str_candi_short, str_full, ignore_case=True):
|
153
|
+
if isinstance(str_candi_short, str):
|
154
|
+
str_candi_short=[str_candi_short]
|
155
|
+
res_bool=[]
|
156
|
+
if ignore_case:
|
157
|
+
[res_bool.append(i in str_full.lower()) for i in str_candi_short ]
|
158
|
+
else:
|
159
|
+
[res_bool.append(i in str_full) for i in str_candi_short ]
|
160
|
+
return any(res_bool)
|
161
|
+
def valid_mod_name(str_fly):
|
162
|
+
if is_in_any(str_fly, "claude-3-haiku"):
|
163
|
+
return "claude-3-haiku"
|
164
|
+
elif is_in_any(str_fly, "gpt-3.5"):
|
165
|
+
return "gpt-3.5"
|
166
|
+
elif is_in_any(str_fly, "llama-3-70b"):
|
167
|
+
return "llama-3-70b"
|
168
|
+
elif is_in_any(str_fly, "mixtral-8x7b"):
|
169
|
+
return "mixtral-8x7b"
|
170
|
+
else:
|
171
|
+
print(f"not support your model{model}, supported models: 'claude','gpt(default)', 'llama','mixtral'")
|
172
|
+
return "gpt-3.5" # default model
|
173
|
+
model_valid = valid_mod_name(model)
|
174
|
+
res=DDGS().chat(query, model=model_valid)
|
175
|
+
if verbose:
|
176
|
+
pp(res)
|
177
|
+
if log:
|
178
|
+
dt_str=datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S')
|
179
|
+
res_ = f"\n\n####Q:{query}\n\n#####Ans:{dt_str}\n\n>{res}\n"
|
180
|
+
if bool(os.path.basename(dir_save)):
|
181
|
+
fpath = dir_save
|
182
|
+
else:
|
183
|
+
os.makedirs(dir_save, exist_ok=True)
|
184
|
+
fpath = os.path.join(dir_save, f"log_ai.md")
|
185
|
+
fupdate(fpath=fpath,content=res_)
|
186
|
+
print(f"log file:{fpath}")
|
187
|
+
return res
|
188
|
+
|
189
|
+
def chat(*args, **kwargs):
|
190
|
+
if len(args) == 1 and isinstance(args[0], str):
|
191
|
+
kwargs['query'] = args[0]
|
192
|
+
return echo(**kwargs)
|
193
|
+
|
194
|
+
def ai(*args, **kwargs):
|
195
|
+
if len(args) == 1 and isinstance(args[0], str):
|
196
|
+
kwargs['query'] = args[0]
|
197
|
+
return echo(**kwargs)
|
198
|
+
|
199
|
+
def detect_lang(text, output='lang',verbose=True):
|
200
|
+
dir_curr_script=os.path.dirname(os.path.abspath(__file__))
|
201
|
+
dir_lang_code=dir_curr_script+"/data/lang_code_iso639.json"
|
202
|
+
print(dir_curr_script,os.getcwd(),dir_lang_code)
|
203
|
+
lang_code_iso639=fload(dir_lang_code)
|
204
|
+
l_lang,l_code = [],[]
|
205
|
+
[[l_lang.append(v),l_code.append(k)] for v,k in lang_code_iso639.items()]
|
51
206
|
try:
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
207
|
+
if is_text(text):
|
208
|
+
code_detect=detect(text)
|
209
|
+
if 'c' in output.lower(): # return code
|
210
|
+
return l_code[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
211
|
+
else:
|
212
|
+
return l_lang[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
213
|
+
else:
|
214
|
+
print(f"{text} is not supported")
|
215
|
+
return 'no'
|
216
|
+
except:
|
217
|
+
return 'no'
|
218
|
+
|
219
|
+
def is_text(s):
|
220
|
+
has_alpha = any(char.isalpha() for char in s)
|
221
|
+
has_non_alpha = any(not char.isalpha() for char in s)
|
222
|
+
# no_special = not re.search(r'[^A-Za-z0-9\s]', s)
|
223
|
+
return has_alpha and has_non_alpha
|
224
|
+
|
225
|
+
def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR'):
|
226
|
+
"""
|
227
|
+
Compares a search term with a list of candidate strings and finds the best match based on similarity score.
|
228
|
+
|
229
|
+
Parameters:
|
230
|
+
search_term (str): The term to be searched for.
|
231
|
+
candidates (list of str): A list of candidate strings to compare against the search term.
|
232
|
+
ignore_case (bool): If True, the comparison ignores case differences.
|
233
|
+
verbose (bool): If True, prints the similarity score and the best match.
|
234
|
+
|
235
|
+
Returns:
|
236
|
+
tuple: A tuple containing the best match and its index in the candidates list.
|
237
|
+
"""
|
238
|
+
def to_lower(s, ignore_case=True):
|
239
|
+
#Converts a string or list of strings to lowercase if ignore_case is True.
|
240
|
+
if ignore_case:
|
241
|
+
if isinstance(s, str):
|
242
|
+
return s.lower()
|
243
|
+
elif isinstance(s, list):
|
244
|
+
return [elem.lower() for elem in s]
|
245
|
+
return s
|
246
|
+
str1_,str2_ = to_lower(search_term, ignore_case),to_lower(candidates, ignore_case)
|
247
|
+
if isinstance(str2_, list):
|
248
|
+
if 'part' in scorer.lower():
|
249
|
+
similarity_scores = [fuzz.partial_ratio(str1_, word) for word in str2_]
|
250
|
+
elif 'W' in scorer.lower():
|
251
|
+
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
252
|
+
elif 'Ratio' in scorer.lower():
|
253
|
+
similarity_scores = [fuzz.Ratio(str1_, word) for word in str2_]
|
254
|
+
else:
|
255
|
+
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
256
|
+
best_match_index = similarity_scores.index(max(similarity_scores))
|
257
|
+
best_match_score = similarity_scores[best_match_index]
|
258
|
+
else:
|
259
|
+
best_match_index = 0
|
260
|
+
if 'part' in scorer.lower():
|
261
|
+
best_match_score = fuzz.partial_ratio(str1_, str2_)
|
262
|
+
elif 'W' in scorer.lower():
|
263
|
+
best_match_score = fuzz.WRatio(str1_, str2_)
|
264
|
+
elif 'Ratio' in scorer.lower():
|
265
|
+
best_match_score = fuzz.Ratio(str1_, str2_)
|
266
|
+
else:
|
267
|
+
best_match_score = fuzz.WRatio(str1_, str2_)
|
268
|
+
if verbose:
|
269
|
+
print(f"\nbest_match is: {candidates[best_match_index],best_match_score}")
|
270
|
+
best_match = process.extract(search_term, candidates)
|
271
|
+
print(f"建议: {best_match}")
|
272
|
+
return candidates[best_match_index], best_match_index
|
273
|
+
|
274
|
+
# Example usaged
|
275
|
+
# str1 = "plos biology"
|
276
|
+
# str2 = ['PLoS Computational Biology', 'PLOS BIOLOGY']
|
277
|
+
# best_match, idx = strcmp(str1, str2, ignore_case=1)
|
278
|
+
|
279
|
+
def counter(list_, verbose=True):
|
280
|
+
c = Counter(list_)
|
281
|
+
# Print the name counts
|
282
|
+
for item, count in c.items():
|
283
|
+
if verbose:
|
284
|
+
print(f"{item}: {count}")
|
285
|
+
return c
|
286
|
+
# usage:
|
287
|
+
# print(f"Return an iterator over elements repeating each as many times as its count:\n{sorted(c.elements())}")
|
288
|
+
# print(f"Return a list of the n most common elements:\n{c.most_common()}")
|
289
|
+
# print(f"Compute the sum of the counts:\n{c.total()}")
|
290
|
+
|
291
|
+
|
58
292
|
|
59
293
|
def str2time(time_str, fmt='24'):
|
60
294
|
"""
|
@@ -81,7 +315,8 @@ def str2time(time_str, fmt='24'):
|
|
81
315
|
elif len(time_str_split)==3:
|
82
316
|
H,M,S=time_str_split
|
83
317
|
time_str_full=H+":"+M+":"+S
|
84
|
-
|
318
|
+
else:
|
319
|
+
time_str_full=time_str_
|
85
320
|
if 'am' in time_str.lower():
|
86
321
|
time_str_full+=" AM"
|
87
322
|
elif "pm"in time_str.lower():
|
@@ -94,10 +329,10 @@ def str2time(time_str, fmt='24'):
|
|
94
329
|
|
95
330
|
try:
|
96
331
|
# Try to parse the time string assuming it could be in 24-hour or 12-hour format
|
97
|
-
time_obj = datetime.strptime(time_str, '%H:%M:%S')
|
332
|
+
time_obj = datetime.strptime(time_len_corr(time_str), '%H:%M:%S')
|
98
333
|
except ValueError:
|
99
334
|
try:
|
100
|
-
time_obj = datetime.strptime(time_str, '%I:%M:%S %p')
|
335
|
+
time_obj = datetime.strptime(time_len_corr(time_str), '%I:%M:%S %p')
|
101
336
|
except ValueError as e:
|
102
337
|
raise ValueError(f"Unable to parse time string: {time_str}. Error: {e}")
|
103
338
|
|
@@ -228,18 +463,33 @@ def num2str(num, *args):
|
|
228
463
|
# print(num2str(7000.125, 2),type(num2str(7000.125, 2))) # Output: "7000.13"
|
229
464
|
# print(num2str(12345.6789, ","),type(num2str(12345.6789, ","))) # Output: "12,345.6789"
|
230
465
|
# print(num2str(7000.00, ","),type(num2str(7000.00, ","))) # Output: "7,000.00"
|
231
|
-
def sreplace(
|
466
|
+
def sreplace(*args,**kwargs):
|
232
467
|
"""
|
468
|
+
sreplace(text, by=None, robust=True)
|
233
469
|
Replace specified substrings in the input text with provided replacements.
|
234
470
|
Args:
|
235
471
|
text (str): The input text where replacements will be made.
|
236
|
-
|
472
|
+
by (dict, optional): A dictionary containing substrings to be replaced as keys
|
237
473
|
and their corresponding replacements as values. Defaults to {".com": "..come", "\n": " ", "\t": " ", " ": " "}.
|
238
474
|
robust (bool, optional): If True, additional default replacements for newline and tab characters will be applied.
|
239
475
|
Default is False.
|
240
476
|
Returns:
|
241
477
|
str: The text after replacements have been made.
|
242
478
|
"""
|
479
|
+
text = None
|
480
|
+
by = kwargs.get('by', None)
|
481
|
+
robust = kwargs.get('robust', True)
|
482
|
+
|
483
|
+
for arg in args:
|
484
|
+
if isinstance(arg,str):
|
485
|
+
text=arg
|
486
|
+
elif isinstance(arg,dict):
|
487
|
+
by=arg
|
488
|
+
elif isinstance(arg,bool):
|
489
|
+
robust=arg
|
490
|
+
else:
|
491
|
+
Error(f"{type(arg)} is not supported")
|
492
|
+
|
243
493
|
# Default replacements for newline and tab characters
|
244
494
|
default_replacements = {
|
245
495
|
"\a": "",
|
@@ -260,19 +510,18 @@ def sreplace(text, dict_replace=None, robust=True):
|
|
260
510
|
}
|
261
511
|
|
262
512
|
# If dict_replace is None, use the default dictionary
|
263
|
-
if
|
264
|
-
|
265
|
-
|
513
|
+
if by is None:
|
514
|
+
by = {}
|
266
515
|
# If robust is True, update the dictionary with default replacements
|
267
516
|
if robust:
|
268
|
-
|
517
|
+
by.update(default_replacements)
|
269
518
|
|
270
519
|
# Iterate over each key-value pair in the dictionary and replace substrings accordingly
|
271
|
-
for k, v in
|
520
|
+
for k, v in by.items():
|
272
521
|
text = text.replace(k, v)
|
273
522
|
return text
|
274
523
|
# usage:
|
275
|
-
# sreplace(text,
|
524
|
+
# sreplace(text, by=dict(old_str='new_str'), robust=True)
|
276
525
|
|
277
526
|
def paper_size(paper_type_str='a4'):
|
278
527
|
df=pd.DataFrame({'a0':[841,1189],'a1':[594,841],'a2':[420,594],'a3':[297,420],'a4':[210,297],'a5':[148,210],'a6':[105,148],'a7':[74,105],
|
@@ -285,6 +534,7 @@ def paper_size(paper_type_str='a4'):
|
|
285
534
|
if not paper_type:
|
286
535
|
paper_type='a4' # default
|
287
536
|
return df[paper_type].tolist()
|
537
|
+
|
288
538
|
def docx2pdf(dir_docx, dir_pdf=None):
|
289
539
|
if dir_pdf:
|
290
540
|
convert(dir_docx,dir_pdf)
|
@@ -414,10 +664,10 @@ def ssplit(text, by="space", verbose=False, **kws):
|
|
414
664
|
if verbose:
|
415
665
|
print(f"split_by_word_length(text, length)")
|
416
666
|
return split_by_word_length(text, **kws) # split_by_word_length(text, length)
|
417
|
-
elif "," in by:
|
418
|
-
|
419
|
-
|
420
|
-
|
667
|
+
# elif "," in by:
|
668
|
+
# if verbose:
|
669
|
+
# print(f"splited by ','")
|
670
|
+
# return text.split(",")
|
421
671
|
elif isinstance(by, list):
|
422
672
|
if verbose:
|
423
673
|
print(f"split_by_multiple_delimiters: ['|','&']")
|
@@ -500,7 +750,7 @@ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
|
|
500
750
|
df_dir_img_single_page = pd.DataFrame()
|
501
751
|
dir_single_page = []
|
502
752
|
if verbose:
|
503
|
-
|
753
|
+
pp(pdfinfo_from_path(dir_pdf))
|
504
754
|
if isinstance(page, tuple) and page:
|
505
755
|
page = list(page)
|
506
756
|
if isinstance(page,int):
|
@@ -573,7 +823,14 @@ def fload(fpath, kind=None, **kwargs):
|
|
573
823
|
def load_xlsx(fpath, **kwargs):
|
574
824
|
df = pd.read_excel(fpath, **kwargs)
|
575
825
|
return df
|
576
|
-
|
826
|
+
def load_ipynb(fpath,**kwargs):
|
827
|
+
as_version=kwargs.get("as_version",4)
|
828
|
+
with open(fpath, "r") as file:
|
829
|
+
nb = nbformat.read(file, as_version=as_version)
|
830
|
+
md_exporter = MarkdownExporter()
|
831
|
+
md_body, _ = md_exporter.from_notebook_node(nb)
|
832
|
+
return md_body
|
833
|
+
|
577
834
|
def load_pdf(fpath, page='all', verbose=False, **kwargs):
|
578
835
|
"""
|
579
836
|
Parameters:
|
@@ -635,7 +892,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
635
892
|
|
636
893
|
kind = kind.lstrip('.').lower()
|
637
894
|
img_types=[ 'bmp','eps', 'gif', 'icns', 'ico', 'im', 'jpg','jpeg', 'jpeg2000','msp', 'pcx', 'png', 'ppm', 'sgi', 'spider', 'tga','tiff','webp',"json"]
|
638
|
-
doc_types = ["docx", "txt", "md", "html", "json", "yaml", "xml", "csv", "xlsx", "pdf"]
|
895
|
+
doc_types = ["docx", "txt", "md", "html", "json", "yaml", "xml", "csv", "xlsx", "pdf","ipynb"]
|
639
896
|
supported_types = [*doc_types, *img_types]
|
640
897
|
if kind not in supported_types:
|
641
898
|
raise ValueError(f"Error:\n{kind} is not in the supported list {supported_types}")
|
@@ -655,6 +912,8 @@ def fload(fpath, kind=None, **kwargs):
|
|
655
912
|
return load_csv(fpath, **kwargs)
|
656
913
|
elif kind == "xlsx":
|
657
914
|
return load_xlsx(fpath, **kwargs)
|
915
|
+
elif kind == "ipynb":
|
916
|
+
return load_ipynb(fpath, **kwargs)
|
658
917
|
elif kind == "pdf":
|
659
918
|
print('usage:load_pdf(fpath, page="all", verbose=False)')
|
660
919
|
return load_pdf(fpath, **kwargs)
|
@@ -675,6 +934,31 @@ def fload(fpath, kind=None, **kwargs):
|
|
675
934
|
# xlsx_content = fload('sample.xlsx')
|
676
935
|
# docx_content = fload('sample.docx')
|
677
936
|
|
937
|
+
def fupdate(fpath, content=None):
|
938
|
+
"""
|
939
|
+
Update a file by adding new content at the top and moving the old content to the bottom.
|
940
|
+
Parameters
|
941
|
+
----------
|
942
|
+
fpath : str
|
943
|
+
The file path where the content should be updated.
|
944
|
+
content : str, optional
|
945
|
+
The new content to add at the top of the file. If not provided, the function will not add any new content.
|
946
|
+
Notes
|
947
|
+
-----
|
948
|
+
- If the file at `fpath` does not exist, it will be created.
|
949
|
+
- The new content will be added at the top, followed by the old content of the file.
|
950
|
+
"""
|
951
|
+
content = content or ""
|
952
|
+
if os.path.exists(fpath):
|
953
|
+
with open(fpath, 'r') as file:
|
954
|
+
old_content = file.read()
|
955
|
+
else:
|
956
|
+
old_content = ''
|
957
|
+
|
958
|
+
with open(fpath, 'w') as file:
|
959
|
+
file.write(content)
|
960
|
+
file.write(old_content)
|
961
|
+
|
678
962
|
def fsave(
|
679
963
|
fpath,
|
680
964
|
content,
|
@@ -682,6 +966,7 @@ def fsave(
|
|
682
966
|
font_name="Times",
|
683
967
|
font_size=10,
|
684
968
|
spacing=6,
|
969
|
+
mode='w',
|
685
970
|
**kwargs,
|
686
971
|
):
|
687
972
|
"""
|
@@ -697,8 +982,8 @@ def fsave(
|
|
697
982
|
Returns:
|
698
983
|
None
|
699
984
|
"""
|
700
|
-
def save_content(fpath, content):
|
701
|
-
with open(fpath,
|
985
|
+
def save_content(fpath, content, mode=mode):
|
986
|
+
with open(fpath, mode, encoding='utf-8') as file:
|
702
987
|
file.write(content)
|
703
988
|
|
704
989
|
|
@@ -717,19 +1002,19 @@ def fsave(
|
|
717
1002
|
doc.save(fpath)
|
718
1003
|
|
719
1004
|
|
720
|
-
def save_txt_md(fpath, content, sep="\n"):
|
1005
|
+
def save_txt_md(fpath, content, sep="\n",mode='w'):
|
721
1006
|
# Ensure content is a single string
|
722
1007
|
if isinstance(content, list):
|
723
1008
|
content = sep.join(content)
|
724
|
-
save_content(fpath, sep.join(content))
|
1009
|
+
save_content(fpath, sep.join(content),mode)
|
725
1010
|
|
726
1011
|
|
727
|
-
def save_html(fpath, content, font_name, font_size):
|
1012
|
+
def save_html(fpath, content, font_name, font_size,mode='w'):
|
728
1013
|
html_content = "<html><body>"
|
729
1014
|
for paragraph_text in content:
|
730
1015
|
html_content += f'<p style="font-family:{font_name}; font-size:{font_size}px;">{paragraph_text}</p>'
|
731
1016
|
html_content += "</body></html>"
|
732
|
-
save_content(fpath, html_content)
|
1017
|
+
save_content(fpath, html_content,mode)
|
733
1018
|
|
734
1019
|
|
735
1020
|
def save_pdf(fpath, content, font_name, font_size):
|
@@ -752,7 +1037,25 @@ def fsave(
|
|
752
1037
|
df = pd.DataFrame(data)
|
753
1038
|
df.to_excel(fpath, **kwargs)
|
754
1039
|
|
1040
|
+
def save_ipynb(fpath,data,**kwargs):
|
1041
|
+
# Split the content by code fences to distinguish between code and markdown
|
1042
|
+
parts = data.split('```')
|
1043
|
+
cells = []
|
755
1044
|
|
1045
|
+
for i, part in enumerate(parts):
|
1046
|
+
if i % 2 == 0:
|
1047
|
+
# Even index: markdown content
|
1048
|
+
cells.append(nbf.v4.new_markdown_cell(part.strip()))
|
1049
|
+
else:
|
1050
|
+
# Odd index: code content
|
1051
|
+
cells.append(nbf.v4.new_code_cell(part.strip()))
|
1052
|
+
# Create a new notebook
|
1053
|
+
nb = nbformat.v4.new_notebook()
|
1054
|
+
nb['cells'] = cells
|
1055
|
+
# Write the notebook to a file
|
1056
|
+
with open(fpath, 'w', encoding='utf-8') as ipynb_file:
|
1057
|
+
nbf.write(fpath, ipynb_file)
|
1058
|
+
|
756
1059
|
# def save_json(fpath, data, **kwargs):
|
757
1060
|
# with open(fpath, "w") as file:
|
758
1061
|
# json.dump(data, file, **kwargs)
|
@@ -811,39 +1114,41 @@ def fsave(
|
|
811
1114
|
"json",
|
812
1115
|
"xml",
|
813
1116
|
"yaml",
|
1117
|
+
"ipynb"
|
814
1118
|
]:
|
815
|
-
|
816
|
-
f"
|
1119
|
+
print(
|
1120
|
+
f"Warning:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
817
1121
|
)
|
818
1122
|
|
819
1123
|
if kind == "docx" or kind=="doc":
|
820
1124
|
save_docx(fpath, content, font_name, font_size, spacing)
|
821
1125
|
elif kind == "txt":
|
822
|
-
save_txt_md(fpath, content, sep="")
|
1126
|
+
save_txt_md(fpath, content, sep="",mode=mode)
|
823
1127
|
elif kind == "md":
|
824
|
-
save_txt_md(fpath, content, sep="")
|
1128
|
+
save_txt_md(fpath, content, sep="",mode=mode)
|
825
1129
|
elif kind == "html":
|
826
1130
|
save_html(fpath, content, font_name, font_size)
|
827
1131
|
elif kind == "pdf":
|
828
1132
|
save_pdf(fpath, content, font_name, font_size)
|
829
1133
|
elif kind == "csv":
|
830
|
-
save_csv(
|
831
|
-
fpath, content, **kwargs
|
832
|
-
) # Assuming content is in tabular form (list of dicts or DataFrame)
|
1134
|
+
save_csv(fpath, content, **kwargs)
|
833
1135
|
elif kind == "xlsx":
|
834
|
-
save_xlsx(
|
835
|
-
fpath, content, **kwargs
|
836
|
-
) # Assuming content is in tabular form (list of dicts or DataFrame)
|
1136
|
+
save_xlsx(fpath, content, **kwargs)
|
837
1137
|
elif kind == "json":
|
838
|
-
save_json(fpath, content)
|
1138
|
+
save_json(fpath, content)
|
839
1139
|
elif kind == "xml":
|
840
|
-
save_xml(fpath, content)
|
1140
|
+
save_xml(fpath, content)
|
841
1141
|
elif kind == "yaml":
|
842
|
-
save_yaml(fpath, content, **kwargs)
|
1142
|
+
save_yaml(fpath, content, **kwargs)
|
1143
|
+
elif kind == "ipynb":
|
1144
|
+
save_ipynb(fpath, content, **kwargs)
|
843
1145
|
else:
|
844
|
-
|
845
|
-
|
846
|
-
|
1146
|
+
try:
|
1147
|
+
netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
|
1148
|
+
except:
|
1149
|
+
print(
|
1150
|
+
f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
1151
|
+
)
|
847
1152
|
|
848
1153
|
|
849
1154
|
# # Example usage
|
@@ -867,14 +1172,92 @@ def fsave(
|
|
867
1172
|
def addpath(fpath):
|
868
1173
|
sys.path.insert(0,dir)
|
869
1174
|
def dirname(fpath):
|
1175
|
+
"""
|
1176
|
+
dirname: Extracting Directory Name from a File Path
|
1177
|
+
Args:
|
1178
|
+
fpath (str): the file or directory path
|
1179
|
+
Returns:
|
1180
|
+
str: directory, without filename
|
1181
|
+
"""
|
870
1182
|
dirname_=os.path.dirname(fpath)
|
871
1183
|
if not dirname_.endswith('/'):
|
872
1184
|
dirname_=dirname_+"/"
|
873
1185
|
return dirname_
|
874
|
-
|
1186
|
+
|
1187
|
+
def dir_name(fpath): # same as "dirname"
|
875
1188
|
return dirname(fpath)
|
876
1189
|
def basename(fpath):
|
1190
|
+
"""
|
1191
|
+
basename: # Output: file.txt
|
1192
|
+
Args:
|
1193
|
+
fpath (str): the file or directory path
|
1194
|
+
Returns:
|
1195
|
+
str: # Output: file.txt
|
1196
|
+
"""
|
877
1197
|
return os.path.basename(fpath)
|
1198
|
+
def flist(fpath, contains="all"):
|
1199
|
+
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
1200
|
+
if isinstance(contains, list):
|
1201
|
+
filt_files = []
|
1202
|
+
for filter_ in contains:
|
1203
|
+
filt_files.extend(flist(fpath, filter_))
|
1204
|
+
return filt_files
|
1205
|
+
else:
|
1206
|
+
if 'all' in contains.lower():
|
1207
|
+
return all_files
|
1208
|
+
else:
|
1209
|
+
filt_files = [f for f in all_files if isa(f, contains)]
|
1210
|
+
return filt_files
|
1211
|
+
def sort_kind(df, by="name", ascending=True):
|
1212
|
+
if df[by].dtype == 'object': # Check if the column contains string values
|
1213
|
+
if ascending:
|
1214
|
+
sorted_index = df[by].str.lower().argsort()
|
1215
|
+
else:
|
1216
|
+
sorted_index = df[by].str.lower().argsort()[::-1]
|
1217
|
+
else:
|
1218
|
+
if ascending:
|
1219
|
+
sorted_index = df[by].argsort()
|
1220
|
+
else:
|
1221
|
+
sorted_index = df[by].argsort()[::-1]
|
1222
|
+
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
1223
|
+
return sorted_df
|
1224
|
+
|
1225
|
+
def isa(*args,**kwargs):
|
1226
|
+
"""
|
1227
|
+
fpath, contains='img'
|
1228
|
+
containss file paths based on the specified contains.
|
1229
|
+
Args:
|
1230
|
+
fpath (str): Path to the file.
|
1231
|
+
contains (str): contains of file to contains. Default is 'img' for images. Other options include 'doc' for documents,
|
1232
|
+
'zip' for ZIP archives, and 'other' for other types of files.
|
1233
|
+
Returns:
|
1234
|
+
bool: True if the file matches the contains, False otherwise.
|
1235
|
+
"""
|
1236
|
+
for arg in args:
|
1237
|
+
if isinstance(arg, str):
|
1238
|
+
if '/' in arg or '\\' in arg:
|
1239
|
+
fpath = arg
|
1240
|
+
else:
|
1241
|
+
contains=arg
|
1242
|
+
if 'img' in contains.lower() or 'image' in contains.lower():
|
1243
|
+
return is_image(fpath)
|
1244
|
+
elif 'doc' in contains.lower():
|
1245
|
+
return is_document(fpath)
|
1246
|
+
elif 'zip' in contains.lower():
|
1247
|
+
return is_zip(fpath)
|
1248
|
+
elif 'dir' in contains.lower() or ('f' in contains.lower() and 'd' in contains.lower()):
|
1249
|
+
return os.path.isdir(fpath)
|
1250
|
+
elif 'fi' in contains.lower():#file
|
1251
|
+
return os.path.isfile(fpath)
|
1252
|
+
elif 'num' in contains.lower():#file
|
1253
|
+
return os.path.isfile(fpath)
|
1254
|
+
elif 'text' in contains.lower() or 'txt' in contains.lower():#file
|
1255
|
+
return is_text(fpath)
|
1256
|
+
elif 'color' in contains.lower():#file
|
1257
|
+
return is_str_color(fpath)
|
1258
|
+
else:
|
1259
|
+
print(f"{contains} was not set up correctly")
|
1260
|
+
return False
|
878
1261
|
|
879
1262
|
def listdir(
|
880
1263
|
rootdir,
|
@@ -885,83 +1268,6 @@ def listdir(
|
|
885
1268
|
orient="list",
|
886
1269
|
output="df"
|
887
1270
|
):
|
888
|
-
def sort_kind(df, by="name", ascending=True):
|
889
|
-
if df[by].dtype == 'object': # Check if the column contains string values
|
890
|
-
if ascending:
|
891
|
-
sorted_index = df[by].str.lower().argsort()
|
892
|
-
else:
|
893
|
-
sorted_index = df[by].str.lower().argsort()[::-1]
|
894
|
-
else:
|
895
|
-
if ascending:
|
896
|
-
sorted_index = df[by].argsort()
|
897
|
-
else:
|
898
|
-
sorted_index = df[by].argsort()[::-1]
|
899
|
-
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
900
|
-
return sorted_df
|
901
|
-
|
902
|
-
def flist(fpath, filter="all"):
|
903
|
-
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
904
|
-
if isinstance(filter, list):
|
905
|
-
filt_files = []
|
906
|
-
for filter_ in filter:
|
907
|
-
filt_files.extend(flist(fpath, filter_))
|
908
|
-
return filt_files
|
909
|
-
else:
|
910
|
-
if 'all' in filter.lower():
|
911
|
-
return all_files
|
912
|
-
else:
|
913
|
-
filt_files = [f for f in all_files if istype(f, filter)]
|
914
|
-
return filt_files
|
915
|
-
|
916
|
-
def istype(fpath, filter='img'):
|
917
|
-
"""
|
918
|
-
Filters file paths based on the specified filter.
|
919
|
-
Args:
|
920
|
-
fpath (str): Path to the file.
|
921
|
-
filter (str): Filter of file to filter. Default is 'img' for images. Other options include 'doc' for documents,
|
922
|
-
'zip' for ZIP archives, and 'other' for other types of files.
|
923
|
-
Returns:
|
924
|
-
bool: True if the file matches the filter, False otherwise.
|
925
|
-
"""
|
926
|
-
if 'img' in filter.lower():
|
927
|
-
return is_image(fpath)
|
928
|
-
elif 'doc' in filter.lower():
|
929
|
-
return is_document(fpath)
|
930
|
-
elif 'zip' in filter.lower():
|
931
|
-
return is_zip(fpath)
|
932
|
-
else:
|
933
|
-
return False
|
934
|
-
|
935
|
-
def is_image(fpath):
|
936
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
937
|
-
if mime_type and mime_type.startswith('image'):
|
938
|
-
return True
|
939
|
-
else:
|
940
|
-
return False
|
941
|
-
|
942
|
-
def is_document(fpath):
|
943
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
944
|
-
if mime_type and (
|
945
|
-
mime_type.startswith('text/') or
|
946
|
-
mime_type == 'application/pdf' or
|
947
|
-
mime_type == 'application/msword' or
|
948
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
949
|
-
mime_type == 'application/vnd.ms-excel' or
|
950
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
951
|
-
mime_type == 'application/vnd.ms-powerpoint' or
|
952
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
953
|
-
):
|
954
|
-
return True
|
955
|
-
else:
|
956
|
-
return False
|
957
|
-
|
958
|
-
def is_zip(fpath):
|
959
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
960
|
-
if mime_type == 'application/zip':
|
961
|
-
return True
|
962
|
-
else:
|
963
|
-
return False
|
964
|
-
|
965
1271
|
if not kind.startswith("."):
|
966
1272
|
kind = "." + kind
|
967
1273
|
|
@@ -990,8 +1296,10 @@ def listdir(
|
|
990
1296
|
os.path.isfile(item_path)
|
991
1297
|
)
|
992
1298
|
if kind in ['.doc','.img','.zip']: #选择大的类别
|
993
|
-
if kind != ".folder" and not
|
1299
|
+
if kind != ".folder" and not isa(item_path, kind):
|
994
1300
|
continue
|
1301
|
+
elif kind in ['.all']:
|
1302
|
+
return flist(fpath, contains=contains)
|
995
1303
|
else: #精确到文件的后缀
|
996
1304
|
if not is_folder and not is_file:
|
997
1305
|
continue
|
@@ -1058,32 +1366,45 @@ def list_func(lib_name, opt="call"):
|
|
1058
1366
|
funcs = dir(lib_name)
|
1059
1367
|
return funcs
|
1060
1368
|
def func_list(lib_name, opt="call"):
|
1061
|
-
|
1062
|
-
funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
|
1063
|
-
else:
|
1064
|
-
funcs = dir(lib_name)
|
1065
|
-
return funcs
|
1369
|
+
return list_func(lib_name, opt=opt)
|
1066
1370
|
|
1067
|
-
def newfolder(
|
1068
|
-
|
1371
|
+
def newfolder(*args, **kwargs):
|
1372
|
+
"""
|
1373
|
+
newfolder(pardir, chdir)
|
1374
|
+
Args:
|
1375
|
+
pardir (dir): parent dir
|
1376
|
+
chdir (str): children dir
|
1377
|
+
overwrite (bool): overwrite?
|
1378
|
+
Returns:
|
1379
|
+
mkdir, giving a option if exists_ok or not
|
1380
|
+
"""
|
1381
|
+
overwrite=kwargs.get("overwrite",False)
|
1382
|
+
for arg in args:
|
1383
|
+
if isinstance(arg, str):
|
1384
|
+
if "/" in arg or "\\" in arg:
|
1385
|
+
pardir=arg
|
1386
|
+
print(f'pardir{pardir}')
|
1387
|
+
else:
|
1388
|
+
chdir = arg
|
1389
|
+
print(f'chdir{chdir}')
|
1390
|
+
elif isinstance(arg,bool):
|
1391
|
+
overwrite=arg
|
1392
|
+
print(overwrite)
|
1393
|
+
else:
|
1394
|
+
print(f"{arg}Error: not support a {type(arg)} type")
|
1069
1395
|
rootdir = []
|
1070
1396
|
# Convert string to list
|
1071
1397
|
if isinstance(chdir, str):
|
1072
1398
|
chdir = [chdir]
|
1073
|
-
|
1074
1399
|
# Subfoldername should be unique
|
1075
1400
|
chdir = list(set(chdir))
|
1076
|
-
|
1077
1401
|
if isinstance(pardir, str): # Dir_parents should be 'str' type
|
1078
1402
|
pardir = os.path.normpath(pardir)
|
1079
|
-
|
1080
1403
|
# Get the slash type: "/" or "\"
|
1081
1404
|
stype = '/' if '/' in pardir else '\\'
|
1082
|
-
|
1083
1405
|
# Check if the parent directory exists and is a directory path
|
1084
1406
|
if os.path.isdir(pardir):
|
1085
1407
|
os.chdir(pardir) # Set current path
|
1086
|
-
|
1087
1408
|
# Check if subdirectories are not empty
|
1088
1409
|
if chdir:
|
1089
1410
|
chdir.sort()
|
@@ -1095,51 +1416,46 @@ def newfolder(pardir, chdir):
|
|
1095
1416
|
os.mkdir('./' + folder)
|
1096
1417
|
print(f'\n {folder} was created successfully!\n')
|
1097
1418
|
else:
|
1098
|
-
|
1099
|
-
|
1419
|
+
if overwrite:
|
1420
|
+
shutil.rmtree(child_tmp)
|
1421
|
+
os.mkdir('./' + folder)
|
1422
|
+
print(f'\n {folder} overwrite! \n')
|
1423
|
+
else:
|
1424
|
+
print(f'\n {folder} already exists! \n')
|
1100
1425
|
rootdir.append(child_tmp + stype) # Note down
|
1101
|
-
|
1102
1426
|
else:
|
1103
1427
|
print('\nWarning: Dir_child doesn\'t exist\n')
|
1104
|
-
|
1105
1428
|
else:
|
1106
1429
|
print('\nWarning: Dir_parent is not a directory path\n')
|
1107
|
-
|
1108
1430
|
# Dir is the main output, if only one dir, then str type is inconvenient
|
1109
1431
|
if len(rootdir) == 1:
|
1110
1432
|
rootdir = rootdir[0]
|
1111
|
-
|
1112
1433
|
return rootdir
|
1113
|
-
|
1114
1434
|
|
1115
1435
|
def figsave(*args,dpi=300):
|
1116
|
-
|
1436
|
+
dir_save = None
|
1117
1437
|
fname = None
|
1118
|
-
|
1119
1438
|
for arg in args:
|
1120
1439
|
if isinstance(arg, str):
|
1121
1440
|
if '/' in arg or '\\' in arg:
|
1122
|
-
|
1441
|
+
dir_save = arg
|
1123
1442
|
elif '/' not in arg and '\\' not in arg:
|
1124
1443
|
fname = arg
|
1125
|
-
|
1126
1444
|
# Backup original values
|
1127
|
-
if '/' in
|
1128
|
-
if
|
1129
|
-
|
1130
|
-
elif '\\' in
|
1131
|
-
if
|
1132
|
-
|
1445
|
+
if '/' in dir_save:
|
1446
|
+
if dir_save[-1] != '/':
|
1447
|
+
dir_save = dir_save + '/'
|
1448
|
+
elif '\\' in dir_save:
|
1449
|
+
if dir_save[-1] != '\\':
|
1450
|
+
dir_save = dir_save + '\\'
|
1133
1451
|
else:
|
1134
|
-
raise ValueError('Check the Path of
|
1135
|
-
|
1452
|
+
raise ValueError('Check the Path of dir_save Directory')
|
1136
1453
|
ftype = fname.split('.')[-1]
|
1137
1454
|
if len(fname.split('.')) == 1:
|
1138
1455
|
ftype = 'nofmt'
|
1139
|
-
fname =
|
1456
|
+
fname = dir_save + fname + '.' + ftype
|
1140
1457
|
else:
|
1141
|
-
fname =
|
1142
|
-
|
1458
|
+
fname = dir_save + fname
|
1143
1459
|
# Save figure based on file type
|
1144
1460
|
if ftype.lower() == 'eps':
|
1145
1461
|
plt.savefig(fname, format='eps', bbox_inches='tight')
|
@@ -1164,7 +1480,6 @@ def figsave(*args,dpi=300):
|
|
1164
1480
|
plt.savefig(fname, format='emf', dpi=dpi, bbox_inches='tight')
|
1165
1481
|
elif ftype.lower() == 'fig':
|
1166
1482
|
plt.savefig(fname, format='pdf', bbox_inches='tight',dpi=dpi)
|
1167
|
-
|
1168
1483
|
print(f'\nSaved @: dpi={dpi}\n{fname}')
|
1169
1484
|
|
1170
1485
|
|
@@ -1198,8 +1513,6 @@ def FuncStars(ax,
|
|
1198
1513
|
report=None,
|
1199
1514
|
report_scale=-0.1,
|
1200
1515
|
report_loc=None):
|
1201
|
-
|
1202
|
-
|
1203
1516
|
if ax is None:
|
1204
1517
|
ax = plt.gca()
|
1205
1518
|
if Ylim is None:
|
@@ -1210,11 +1523,9 @@ def FuncStars(ax,
|
|
1210
1523
|
report_loc = np.min(Ylim) + report_scale*np.abs(np.diff(Ylim))
|
1211
1524
|
if report_scale > 0:
|
1212
1525
|
report_scale = -np.abs(report_scale)
|
1213
|
-
|
1214
1526
|
yscale = np.float64(yscale)
|
1215
1527
|
y_loc = np.min(Ylim) + yscale*(np.max(Ylim)-np.min(Ylim))
|
1216
1528
|
xcenter = np.mean([x1, x2])
|
1217
|
-
|
1218
1529
|
# ns / *
|
1219
1530
|
if alpha < pval:
|
1220
1531
|
if nsshow == 'on':
|
@@ -1238,7 +1549,6 @@ def FuncStars(ax,
|
|
1238
1549
|
plt.text(xcenter, y_loc, symbol * 3,
|
1239
1550
|
ha='center', va='center_baseline',
|
1240
1551
|
fontsize=fontsize, fontname=fontname, color=symbolcolor)
|
1241
|
-
|
1242
1552
|
# lines indicators
|
1243
1553
|
if linego: # and 0 < pval <= 0.05:
|
1244
1554
|
print(pval)
|
@@ -1278,12 +1588,10 @@ def FuncStars(ax,
|
|
1278
1588
|
[np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * tailindicator[1],
|
1279
1589
|
np.min(Ylim) + 0.95*(np.max(Ylim)-np.min(Ylim)) - np.abs(np.diff(Ylim)) * 0.002],
|
1280
1590
|
linestyle=linestyle, color=linecolor, linewidth=linewidth)
|
1281
|
-
|
1282
1591
|
if values_below is not None:
|
1283
1592
|
plt.text(xcenter, y_loc * (-0.1), values_below,
|
1284
1593
|
ha='center', va='bottom', # 'center_baseline', rotation=rotation,
|
1285
1594
|
fontsize=fontsize_note, fontname=fontname, color='k')
|
1286
|
-
|
1287
1595
|
# report / comments
|
1288
1596
|
if report is not None:
|
1289
1597
|
plt.text(xcenter, report_loc, report,
|
@@ -1293,7 +1601,49 @@ def is_str_color(s):
|
|
1293
1601
|
# Regular expression pattern for hexadecimal color codes
|
1294
1602
|
color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
|
1295
1603
|
return re.match(color_code_pattern, s) is not None
|
1296
|
-
|
1604
|
+
def is_num(s):
|
1605
|
+
"""
|
1606
|
+
Check if a string can be converted to a number (int or float).
|
1607
|
+
Parameters:
|
1608
|
+
- s (str): The string to check.
|
1609
|
+
Returns:
|
1610
|
+
- bool: True if the string can be converted to a number, False otherwise.
|
1611
|
+
"""
|
1612
|
+
try:
|
1613
|
+
float(s) # Try converting the string to a float
|
1614
|
+
return True
|
1615
|
+
except ValueError:
|
1616
|
+
return False
|
1617
|
+
def isnum(s):
|
1618
|
+
return is_num(s)
|
1619
|
+
def is_image(fpath):
|
1620
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1621
|
+
if mime_type and mime_type.startswith('image'):
|
1622
|
+
return True
|
1623
|
+
else:
|
1624
|
+
return False
|
1625
|
+
def is_document(fpath):
|
1626
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1627
|
+
if mime_type and (
|
1628
|
+
mime_type.startswith('text/') or
|
1629
|
+
mime_type == 'application/pdf' or
|
1630
|
+
mime_type == 'application/msword' or
|
1631
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
1632
|
+
mime_type == 'application/vnd.ms-excel' or
|
1633
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
1634
|
+
mime_type == 'application/vnd.ms-powerpoint' or
|
1635
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
1636
|
+
):
|
1637
|
+
return True
|
1638
|
+
else:
|
1639
|
+
return False
|
1640
|
+
def is_zip(fpath):
|
1641
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1642
|
+
if mime_type == 'application/zip':
|
1643
|
+
return True
|
1644
|
+
else:
|
1645
|
+
return False
|
1646
|
+
|
1297
1647
|
def stdshade(ax=None,*args, **kwargs):
|
1298
1648
|
if (
|
1299
1649
|
isinstance(ax, np.ndarray)
|
@@ -1315,7 +1665,6 @@ def stdshade(ax=None,*args, **kwargs):
|
|
1315
1665
|
l_style2 = ["--", "-."]
|
1316
1666
|
l_style1 = ["-", ":"]
|
1317
1667
|
l_mark = ["o", "+", "*", ".", "x", "_", "|", "s", "d", "^", "v", ">", "<", "p", "h"]
|
1318
|
-
|
1319
1668
|
# Check each argument
|
1320
1669
|
for iarg in range(len(args)):
|
1321
1670
|
if (
|
@@ -1428,8 +1777,6 @@ def stdshade(ax=None,*args, **kwargs):
|
|
1428
1777
|
else:
|
1429
1778
|
line = ax.plot(x, yMean, color=acolor, ls=plotStyle, marker=plotMarker, **line_kws)
|
1430
1779
|
return line[0], fill
|
1431
|
-
|
1432
|
-
|
1433
1780
|
# =============================================================================
|
1434
1781
|
# # for plot figures {Qiu et al.2023}
|
1435
1782
|
# =============================================================================
|
@@ -1610,7 +1957,6 @@ def FuncCmpt(X1, X2, pmc='auto', pair='unpaired'):
|
|
1610
1957
|
X1, X2, cfg_pmc=cfg_pmc, pair=cfg_pair)
|
1611
1958
|
return p, output
|
1612
1959
|
|
1613
|
-
|
1614
1960
|
# ======compare 2 group test===================================================
|
1615
1961
|
# # Example
|
1616
1962
|
# X1 = [19, 22, 16, 29, 24]
|
@@ -1932,7 +2278,6 @@ def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
|
|
1932
2278
|
'both' in cfg_pair) else None
|
1933
2279
|
go_mix_within = factor if ('pa' in cfg_pair) or (
|
1934
2280
|
'np' not in cfg_pair) else None
|
1935
|
-
|
1936
2281
|
if res_tab['p-unc'][0] <= .05:
|
1937
2282
|
# Pairwise Comparisons
|
1938
2283
|
method_post_hoc = [
|
@@ -1946,7 +2291,6 @@ def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
|
|
1946
2291
|
for met in method_post_hoc:
|
1947
2292
|
post_curr = pg.pairwise_tests(data=data, dv=dv, between=go_mix_between, within=go_mix_within, subject=go_subject, parametric=go_pmc, marginal=True, alpha=0.05, alternative='two-sided',
|
1948
2293
|
padjust=met)
|
1949
|
-
|
1950
2294
|
res_posthoc = pd.concat([res_posthoc, post_curr],
|
1951
2295
|
ignore_index=True)
|
1952
2296
|
else:
|
@@ -1966,7 +2310,6 @@ def FuncMultiCmpt(pmc='pmc', pair='unpair', data=None, dv=None, factor=None,
|
|
1966
2310
|
output['res_tab']['APA'] = output['APA'] # note APA in the table
|
1967
2311
|
# print(output['stat'])
|
1968
2312
|
# print(output['res_tab'])
|
1969
|
-
|
1970
2313
|
return output
|
1971
2314
|
|
1972
2315
|
|
@@ -2192,18 +2535,14 @@ def str2list(str_):
|
|
2192
2535
|
def load_img(fpath):
|
2193
2536
|
"""
|
2194
2537
|
Load an image from the specified file path.
|
2195
|
-
|
2196
2538
|
Args:
|
2197
2539
|
fpath (str): The file path to the image.
|
2198
|
-
|
2199
2540
|
Returns:
|
2200
2541
|
PIL.Image: The loaded image.
|
2201
|
-
|
2202
2542
|
Raises:
|
2203
2543
|
FileNotFoundError: If the specified file is not found.
|
2204
2544
|
OSError: If the specified file cannot be opened or is not a valid image file.
|
2205
2545
|
"""
|
2206
|
-
|
2207
2546
|
try:
|
2208
2547
|
img = Image.open(fpath)
|
2209
2548
|
return img
|
@@ -2216,12 +2555,10 @@ def apply_filter(img, *args):
|
|
2216
2555
|
# def apply_filter(img, filter_name, filter_value=None):
|
2217
2556
|
"""
|
2218
2557
|
Apply the specified filter to the image.
|
2219
|
-
|
2220
2558
|
Args:
|
2221
2559
|
img (PIL.Image): The input image.
|
2222
2560
|
filter_name (str): The name of the filter to apply.
|
2223
2561
|
**kwargs: Additional parameters specific to the filter.
|
2224
|
-
|
2225
2562
|
Returns:
|
2226
2563
|
PIL.Image: The filtered image.
|
2227
2564
|
"""
|
@@ -2425,10 +2762,8 @@ def imgsets(
|
|
2425
2762
|
def auto_enhance(img):
|
2426
2763
|
"""
|
2427
2764
|
Automatically enhances the image based on its characteristics.
|
2428
|
-
|
2429
2765
|
Args:
|
2430
2766
|
img (PIL.Image): The input image.
|
2431
|
-
|
2432
2767
|
Returns:
|
2433
2768
|
dict: A dictionary containing the optimal enhancement values.
|
2434
2769
|
"""
|
@@ -2444,47 +2779,33 @@ def imgsets(
|
|
2444
2779
|
bit_depth = 16
|
2445
2780
|
else:
|
2446
2781
|
raise ValueError("Unsupported image mode")
|
2447
|
-
|
2448
2782
|
# Calculate the brightness and contrast for each channel
|
2449
2783
|
num_channels = len(img.getbands())
|
2450
2784
|
brightness_factors = []
|
2451
2785
|
contrast_factors = []
|
2452
2786
|
for channel in range(num_channels):
|
2453
2787
|
channel_histogram = img.split()[channel].histogram()
|
2454
|
-
brightness = sum(i * w for i, w in enumerate(channel_histogram))
|
2455
|
-
channel_histogram
|
2456
|
-
)
|
2788
|
+
brightness = sum(i * w for i, w in enumerate(channel_histogram))/sum(channel_histogram)
|
2457
2789
|
channel_min, channel_max = img.split()[channel].getextrema()
|
2458
2790
|
contrast = channel_max - channel_min
|
2459
|
-
|
2460
2791
|
# Adjust calculations based on bit depth
|
2461
2792
|
normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
|
2462
|
-
brightness_factor = (
|
2463
|
-
|
2464
|
-
)
|
2465
|
-
contrast_factor = (
|
2466
|
-
1.0 + (contrast - normalization_factor / 2) / normalization_factor
|
2467
|
-
)
|
2468
|
-
|
2793
|
+
brightness_factor = (1.0 + (brightness - normalization_factor / 2) / normalization_factor)
|
2794
|
+
contrast_factor = (1.0 + (contrast - normalization_factor / 2) / normalization_factor)
|
2469
2795
|
brightness_factors.append(brightness_factor)
|
2470
2796
|
contrast_factors.append(contrast_factor)
|
2471
|
-
|
2472
2797
|
# Calculate the average brightness and contrast factors across channels
|
2473
2798
|
avg_brightness_factor = sum(brightness_factors) / num_channels
|
2474
2799
|
avg_contrast_factor = sum(contrast_factors) / num_channels
|
2475
|
-
|
2476
2800
|
return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
|
2477
|
-
|
2478
2801
|
# Load image if input is a file path
|
2479
2802
|
if isinstance(img, str):
|
2480
2803
|
img = load_img(img)
|
2481
|
-
|
2482
2804
|
img_update = img.copy()
|
2483
2805
|
# Auto-enhance image if requested
|
2484
2806
|
if auto:
|
2485
2807
|
auto_params = auto_enhance(img_update)
|
2486
2808
|
sets.update(auto_params)
|
2487
|
-
|
2488
2809
|
if sets is None:
|
2489
2810
|
sets = {}
|
2490
2811
|
for k, value in sets.items():
|
@@ -2559,12 +2880,9 @@ def imgsets(
|
|
2559
2880
|
if len(value)==3:
|
2560
2881
|
value+=(255,)
|
2561
2882
|
img_update = remove(img_update, bgcolor=value)
|
2562
|
-
|
2563
2883
|
if filter_kws:
|
2564
2884
|
for filter_name, filter_value in filter_kws.items():
|
2565
2885
|
img_update = apply_filter(img_update, filter_name, filter_value)
|
2566
|
-
|
2567
|
-
|
2568
2886
|
# Display the image if requested
|
2569
2887
|
if show:
|
2570
2888
|
if figsize is None:
|
@@ -2573,7 +2891,6 @@ def imgsets(
|
|
2573
2891
|
plt.figure(figsize=figsize, dpi=dpi)
|
2574
2892
|
plt.imshow(img_update)
|
2575
2893
|
plt.axis("on") if show_axis else plt.axis("off")
|
2576
|
-
|
2577
2894
|
return img_update
|
2578
2895
|
# # usage:
|
2579
2896
|
# img = imgsets(
|
@@ -2594,7 +2911,6 @@ def figsets(*args):
|
|
2594
2911
|
"scatter","ieee","no-latex","std-colors","high-vis","bright","dark_background","science",
|
2595
2912
|
"high-vis","vibrant","muted","retro","grid","high-contrast","light","cjk-tc-font","cjk-kr-font",
|
2596
2913
|
]
|
2597
|
-
|
2598
2914
|
def sets_priority(ax,key, value):
|
2599
2915
|
if ("fo" in key) and (("size" in key) or ("sz" in key)):
|
2600
2916
|
fontsize=value
|