py2ls 0.1.7.9__py3-none-any.whl → 0.1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py
CHANGED
@@ -4,23 +4,23 @@ import pandas as pd
|
|
4
4
|
import json
|
5
5
|
import matplotlib
|
6
6
|
import matplotlib.pyplot as plt
|
7
|
-
import matplotlib.ticker as tck
|
7
|
+
import matplotlib.ticker as tck
|
8
8
|
from cycler import cycler
|
9
9
|
from mpl_toolkits.mplot3d import Axes3D
|
10
10
|
import seaborn as sns
|
11
11
|
|
12
|
-
import sys, os,shutil,re, yaml,json,subprocess
|
12
|
+
import sys, os, shutil, re, yaml, json, subprocess
|
13
13
|
import importlib.util
|
14
14
|
import time
|
15
15
|
from dateutil import parser
|
16
16
|
from datetime import datetime
|
17
17
|
|
18
|
-
from PIL import Image,ImageEnhance, ImageOps,ImageFilter
|
19
|
-
from rembg import remove,new_session
|
18
|
+
from PIL import Image, ImageEnhance, ImageOps, ImageFilter
|
19
|
+
from rembg import remove, new_session
|
20
20
|
|
21
21
|
import docx
|
22
|
-
from fpdf import FPDF
|
23
|
-
from lxml import etree
|
22
|
+
from fpdf import FPDF
|
23
|
+
from lxml import etree
|
24
24
|
from docx import Document
|
25
25
|
from PyPDF2 import PdfReader
|
26
26
|
from pptx import Presentation
|
@@ -40,24 +40,26 @@ from tqdm import tqdm
|
|
40
40
|
import mimetypes
|
41
41
|
from pprint import pp
|
42
42
|
from collections import Counter
|
43
|
-
from fuzzywuzzy import fuzz,process
|
43
|
+
from fuzzywuzzy import fuzz, process
|
44
44
|
from langdetect import detect
|
45
45
|
from duckduckgo_search import DDGS
|
46
46
|
|
47
47
|
from py2ls import netfinder
|
48
48
|
|
49
49
|
try:
|
50
|
-
get_ipython().run_line_magic(
|
51
|
-
get_ipython().run_line_magic(
|
50
|
+
get_ipython().run_line_magic("load_ext", "autoreload")
|
51
|
+
get_ipython().run_line_magic("autoreload", "2")
|
52
52
|
except NameError:
|
53
53
|
pass
|
54
54
|
|
55
|
+
|
55
56
|
def is_package_installed(package_name):
|
56
57
|
"""Check if a package is installed."""
|
57
58
|
package_spec = importlib.util.find_spec(package_name)
|
58
59
|
return package_spec is not None
|
59
|
-
|
60
|
-
|
60
|
+
|
61
|
+
|
62
|
+
def upgrade(module="py2ls"):
|
61
63
|
# try:
|
62
64
|
# pkg_resources.get_distribution(module)
|
63
65
|
# except pkg_resources.DistributionNotFound:
|
@@ -68,37 +70,45 @@ def upgrade(module='py2ls'):
|
|
68
70
|
except subprocess.CalledProcessError as e:
|
69
71
|
print(f"An error occurred while installing {module}: {e}")
|
70
72
|
try:
|
71
|
-
subprocess.check_call(
|
73
|
+
subprocess.check_call(
|
74
|
+
[sys.executable, "-m", "pip", "install", "--upgrade", module]
|
75
|
+
)
|
72
76
|
except subprocess.CalledProcessError as e:
|
73
77
|
print(f"An error occurred while upgrading py2ls: {e}")
|
74
78
|
|
75
79
|
|
76
|
-
dir_save=
|
80
|
+
dir_save = "/Users/macjianfeng/Dropbox/Downloads/"
|
77
81
|
|
78
82
|
|
79
83
|
def get_version(pkg):
|
80
84
|
import importlib.metadata
|
85
|
+
|
81
86
|
def get_v(pkg_name):
|
82
87
|
try:
|
83
88
|
version = importlib.metadata.version(pkg_name)
|
84
89
|
print(f"version {pkg_name} == {version}")
|
85
90
|
except importlib.metadata.PackageNotFoundError:
|
86
91
|
print(f"Package '{pkg_name}' not found")
|
87
|
-
|
92
|
+
|
93
|
+
if isinstance(pkg, str):
|
88
94
|
get_v(pkg)
|
89
|
-
elif isinstance(pkg,list):
|
95
|
+
elif isinstance(pkg, list):
|
90
96
|
[get_v(pkg_) for pkg_ in pkg]
|
91
|
-
|
97
|
+
|
98
|
+
|
99
|
+
# usage:
|
92
100
|
# get_version(['pandas','numpy','py2ls'])
|
93
|
-
|
101
|
+
|
102
|
+
|
94
103
|
def rm_folder(folder_path, verbose=True):
|
95
104
|
try:
|
96
105
|
shutil.rmtree(folder_path)
|
97
106
|
if verbose:
|
98
|
-
print(f
|
107
|
+
print(f"Successfully deleted {folder_path}")
|
99
108
|
except Exception as e:
|
100
109
|
if verbose:
|
101
|
-
print(f
|
110
|
+
print(f"Failed to delete {folder_path}. Reason: {e}")
|
111
|
+
|
102
112
|
|
103
113
|
def fremove(path, verbose=True):
|
104
114
|
"""
|
@@ -111,20 +121,20 @@ def fremove(path, verbose=True):
|
|
111
121
|
if os.path.isdir(path):
|
112
122
|
shutil.rmtree(path)
|
113
123
|
if verbose:
|
114
|
-
print(f
|
124
|
+
print(f"Successfully deleted folder {path}")
|
115
125
|
elif os.path.isfile(path):
|
116
126
|
os.remove(path)
|
117
127
|
if verbose:
|
118
|
-
print(f
|
128
|
+
print(f"Successfully deleted file {path}")
|
119
129
|
else:
|
120
130
|
if verbose:
|
121
|
-
print(f
|
131
|
+
print(f"Path {path} does not exist")
|
122
132
|
except Exception as e:
|
123
133
|
if verbose:
|
124
|
-
print(f
|
134
|
+
print(f"Failed to delete {path}. Reason: {e}")
|
125
135
|
|
126
136
|
|
127
|
-
def get_cwd(verbose:bool = True):
|
137
|
+
def get_cwd(verbose: bool = True):
|
128
138
|
"""
|
129
139
|
get_cwd: to get the current working directory
|
130
140
|
Args:
|
@@ -138,26 +148,39 @@ def get_cwd(verbose:bool = True):
|
|
138
148
|
# This works in an interactive environment (like a Jupyter notebook)
|
139
149
|
script_dir = os.getcwd()
|
140
150
|
if verbose:
|
141
|
-
print("os.getcwd():", script_dir)
|
151
|
+
print("os.getcwd():", script_dir)
|
142
152
|
return script_dir
|
143
153
|
|
144
|
-
|
154
|
+
|
155
|
+
def search(
|
156
|
+
query,
|
157
|
+
limit=5,
|
158
|
+
kind="text",
|
159
|
+
output="df",
|
160
|
+
verbose=False,
|
161
|
+
download=True,
|
162
|
+
dir_save=dir_save,
|
163
|
+
):
|
145
164
|
from duckduckgo_search import DDGS
|
146
|
-
|
165
|
+
|
166
|
+
if "te" in kind.lower():
|
147
167
|
results = DDGS().text(query, max_results=limit)
|
148
|
-
res=pd.DataFrame(results)
|
149
|
-
res.rename(columns={"href":"links"},inplace=True)
|
168
|
+
res = pd.DataFrame(results)
|
169
|
+
res.rename(columns={"href": "links"}, inplace=True)
|
150
170
|
if verbose:
|
151
171
|
print(f'searching "{query}": got the results below\n{res}')
|
152
172
|
if download:
|
153
173
|
try:
|
154
|
-
netfinder.downloader(
|
174
|
+
netfinder.downloader(
|
175
|
+
url=res.links.tolist(), dir_save=dir_save, verbose=verbose
|
176
|
+
)
|
155
177
|
except:
|
156
178
|
if verbose:
|
157
179
|
print(f"failed link")
|
158
180
|
return res
|
159
181
|
|
160
|
-
|
182
|
+
|
183
|
+
def echo(*args, **kwargs):
|
161
184
|
"""
|
162
185
|
query, model="gpt", verbose=True, log=True, dir_save=dir_save
|
163
186
|
a ai chat tool
|
@@ -172,12 +195,12 @@ def echo(*args,**kwargs):
|
|
172
195
|
str: the answer from ai
|
173
196
|
"""
|
174
197
|
global dir_save
|
175
|
-
|
176
|
-
query=None
|
177
|
-
model=kwargs.get(
|
178
|
-
verbose=kwargs.get(
|
179
|
-
log=kwargs.get(
|
180
|
-
dir_save=kwargs.get(
|
198
|
+
|
199
|
+
query = None
|
200
|
+
model = kwargs.get("model", "gpt")
|
201
|
+
verbose = kwargs.get("verbose", True)
|
202
|
+
log = kwargs.get("log", True)
|
203
|
+
dir_save = kwargs.get("dir_save", dir_save)
|
181
204
|
for arg in args:
|
182
205
|
if isinstance(arg, str):
|
183
206
|
if os.path.isdir(arg):
|
@@ -191,15 +214,17 @@ def echo(*args,**kwargs):
|
|
191
214
|
elif isinstance(arg, dict):
|
192
215
|
verbose = arg.get("verbose", verbose)
|
193
216
|
log = arg.get("log", log)
|
217
|
+
|
194
218
|
def is_in_any(str_candi_short, str_full, ignore_case=True):
|
195
219
|
if isinstance(str_candi_short, str):
|
196
|
-
str_candi_short=[str_candi_short]
|
197
|
-
res_bool=[]
|
220
|
+
str_candi_short = [str_candi_short]
|
221
|
+
res_bool = []
|
198
222
|
if ignore_case:
|
199
|
-
[res_bool.append(i in str_full.lower())
|
223
|
+
[res_bool.append(i in str_full.lower()) for i in str_candi_short]
|
200
224
|
else:
|
201
|
-
[res_bool.append(i in str_full)
|
225
|
+
[res_bool.append(i in str_full) for i in str_candi_short]
|
202
226
|
return any(res_bool)
|
227
|
+
|
203
228
|
def valid_mod_name(str_fly):
|
204
229
|
if is_in_any(str_fly, "claude-3-haiku"):
|
205
230
|
return "claude-3-haiku"
|
@@ -210,49 +235,56 @@ def echo(*args,**kwargs):
|
|
210
235
|
elif is_in_any(str_fly, "mixtral-8x7b"):
|
211
236
|
return "mixtral-8x7b"
|
212
237
|
else:
|
213
|
-
print(
|
214
|
-
|
238
|
+
print(
|
239
|
+
f"not support your model{model}, supported models: 'claude','gpt(default)', 'llama','mixtral'"
|
240
|
+
)
|
241
|
+
return "gpt-3.5" # default model
|
242
|
+
|
215
243
|
model_valid = valid_mod_name(model)
|
216
|
-
res=DDGS().chat(query, model=model_valid)
|
244
|
+
res = DDGS().chat(query, model=model_valid)
|
217
245
|
if verbose:
|
218
246
|
pp(res)
|
219
247
|
if log:
|
220
|
-
dt_str=datetime.fromtimestamp(time.time()).strftime(
|
248
|
+
dt_str = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d_%H:%M:%S")
|
221
249
|
res_ = f"\n\n####Q:{query}\n\n#####Ans:{dt_str}\n\n>{res}\n"
|
222
250
|
if bool(os.path.basename(dir_save)):
|
223
251
|
fpath = dir_save
|
224
252
|
else:
|
225
253
|
os.makedirs(dir_save, exist_ok=True)
|
226
254
|
fpath = os.path.join(dir_save, f"log_ai.md")
|
227
|
-
fupdate(fpath=fpath,content=res_)
|
255
|
+
fupdate(fpath=fpath, content=res_)
|
228
256
|
print(f"log file:{fpath}")
|
229
257
|
return res
|
230
258
|
|
259
|
+
|
231
260
|
def chat(*args, **kwargs):
|
232
261
|
return echo(*args, **kwargs)
|
233
262
|
|
263
|
+
|
234
264
|
def ai(*args, **kwargs):
|
235
265
|
return echo(*args, **kwargs)
|
236
266
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
267
|
+
|
268
|
+
def detect_lang(text, output="lang", verbose=True):
|
269
|
+
dir_curr_script = os.path.dirname(os.path.abspath(__file__))
|
270
|
+
dir_lang_code = dir_curr_script + "/data/lang_code_iso639.json"
|
271
|
+
print(dir_curr_script, os.getcwd(), dir_lang_code)
|
272
|
+
lang_code_iso639 = fload(dir_lang_code)
|
273
|
+
l_lang, l_code = [], []
|
274
|
+
[[l_lang.append(v), l_code.append(k)] for v, k in lang_code_iso639.items()]
|
244
275
|
try:
|
245
276
|
if is_text(text):
|
246
|
-
code_detect=detect(text)
|
247
|
-
if
|
248
|
-
return l_code[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
277
|
+
code_detect = detect(text)
|
278
|
+
if "c" in output.lower(): # return code
|
279
|
+
return l_code[strcmp(code_detect, l_code, verbose=verbose)[1]]
|
249
280
|
else:
|
250
|
-
return l_lang[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
281
|
+
return l_lang[strcmp(code_detect, l_code, verbose=verbose)[1]]
|
251
282
|
else:
|
252
283
|
print(f"{text} is not supported")
|
253
|
-
return
|
284
|
+
return "no"
|
254
285
|
except:
|
255
|
-
return
|
286
|
+
return "no"
|
287
|
+
|
256
288
|
|
257
289
|
def is_text(s):
|
258
290
|
has_alpha = any(char.isalpha() for char in s)
|
@@ -260,7 +292,8 @@ def is_text(s):
|
|
260
292
|
# no_special = not re.search(r'[^A-Za-z0-9\s]', s)
|
261
293
|
return has_alpha and has_non_alpha
|
262
294
|
|
263
|
-
|
295
|
+
|
296
|
+
def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer="WR"):
|
264
297
|
"""
|
265
298
|
Compares a search term with a list of candidate strings and finds the best match based on similarity score.
|
266
299
|
|
@@ -273,21 +306,23 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR')
|
|
273
306
|
Returns:
|
274
307
|
tuple: A tuple containing the best match and its index in the candidates list.
|
275
308
|
"""
|
309
|
+
|
276
310
|
def to_lower(s, ignore_case=True):
|
277
|
-
#Converts a string or list of strings to lowercase if ignore_case is True.
|
311
|
+
# Converts a string or list of strings to lowercase if ignore_case is True.
|
278
312
|
if ignore_case:
|
279
313
|
if isinstance(s, str):
|
280
314
|
return s.lower()
|
281
315
|
elif isinstance(s, list):
|
282
316
|
return [elem.lower() for elem in s]
|
283
317
|
return s
|
284
|
-
|
318
|
+
|
319
|
+
str1_, str2_ = to_lower(search_term, ignore_case), to_lower(candidates, ignore_case)
|
285
320
|
if isinstance(str2_, list):
|
286
|
-
if
|
321
|
+
if "part" in scorer.lower():
|
287
322
|
similarity_scores = [fuzz.partial_ratio(str1_, word) for word in str2_]
|
288
|
-
elif
|
323
|
+
elif "W" in scorer.lower():
|
289
324
|
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
290
|
-
elif
|
325
|
+
elif "Ratio" in scorer.lower():
|
291
326
|
similarity_scores = [fuzz.Ratio(str1_, word) for word in str2_]
|
292
327
|
else:
|
293
328
|
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
@@ -295,11 +330,11 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR')
|
|
295
330
|
best_match_score = similarity_scores[best_match_index]
|
296
331
|
else:
|
297
332
|
best_match_index = 0
|
298
|
-
if
|
333
|
+
if "part" in scorer.lower():
|
299
334
|
best_match_score = fuzz.partial_ratio(str1_, str2_)
|
300
|
-
elif
|
335
|
+
elif "W" in scorer.lower():
|
301
336
|
best_match_score = fuzz.WRatio(str1_, str2_)
|
302
|
-
elif
|
337
|
+
elif "Ratio" in scorer.lower():
|
303
338
|
best_match_score = fuzz.Ratio(str1_, str2_)
|
304
339
|
else:
|
305
340
|
best_match_score = fuzz.WRatio(str1_, str2_)
|
@@ -309,11 +344,13 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR')
|
|
309
344
|
print(f"建议: {best_match}")
|
310
345
|
return candidates[best_match_index], best_match_index
|
311
346
|
|
347
|
+
|
312
348
|
# Example usaged
|
313
349
|
# str1 = "plos biology"
|
314
350
|
# str2 = ['PLoS Computational Biology', 'PLOS BIOLOGY']
|
315
351
|
# best_match, idx = strcmp(str1, str2, ignore_case=1)
|
316
352
|
|
353
|
+
|
317
354
|
def counter(list_, verbose=True):
|
318
355
|
c = Counter(list_)
|
319
356
|
# Print the name counts
|
@@ -321,14 +358,15 @@ def counter(list_, verbose=True):
|
|
321
358
|
if verbose:
|
322
359
|
print(f"{item}: {count}")
|
323
360
|
return c
|
361
|
+
|
362
|
+
|
324
363
|
# usage:
|
325
364
|
# print(f"Return an iterator over elements repeating each as many times as its count:\n{sorted(c.elements())}")
|
326
365
|
# print(f"Return a list of the n most common elements:\n{c.most_common()}")
|
327
366
|
# print(f"Compute the sum of the counts:\n{c.total()}")
|
328
367
|
|
329
368
|
|
330
|
-
|
331
|
-
def str2time(time_str, fmt='24'):
|
369
|
+
def str2time(time_str, fmt="24"):
|
332
370
|
"""
|
333
371
|
Convert a time string into the specified format.
|
334
372
|
Parameters:
|
@@ -342,42 +380,47 @@ def str2time(time_str, fmt='24'):
|
|
342
380
|
%p represents AM or PM.
|
343
381
|
- str: The converted time string.
|
344
382
|
"""
|
383
|
+
|
345
384
|
def time_len_corr(time_str):
|
346
|
-
time_str_=
|
347
|
-
|
348
|
-
|
385
|
+
time_str_ = (
|
386
|
+
ssplit(time_str, by=[":", " ", "digital_num"]) if ":" in time_str else None
|
387
|
+
)
|
388
|
+
time_str_split = []
|
389
|
+
[time_str_split.append(i) for i in time_str_ if is_num(i)]
|
349
390
|
if time_str_split:
|
350
|
-
if len(time_str_split)==2:
|
351
|
-
H,M=time_str_split
|
352
|
-
time_str_full=H+":"+M+":00"
|
353
|
-
elif len(time_str_split)==3:
|
354
|
-
H,M,S=time_str_split
|
355
|
-
time_str_full=H+":"+M+":"+S
|
391
|
+
if len(time_str_split) == 2:
|
392
|
+
H, M = time_str_split
|
393
|
+
time_str_full = H + ":" + M + ":00"
|
394
|
+
elif len(time_str_split) == 3:
|
395
|
+
H, M, S = time_str_split
|
396
|
+
time_str_full = H + ":" + M + ":" + S
|
356
397
|
else:
|
357
|
-
time_str_full=time_str_
|
358
|
-
if
|
359
|
-
time_str_full+=" AM"
|
360
|
-
elif "pm"in time_str.lower():
|
361
|
-
time_str_full +=" PM"
|
398
|
+
time_str_full = time_str_
|
399
|
+
if "am" in time_str.lower():
|
400
|
+
time_str_full += " AM"
|
401
|
+
elif "pm" in time_str.lower():
|
402
|
+
time_str_full += " PM"
|
362
403
|
return time_str_full
|
363
|
-
|
404
|
+
|
405
|
+
if "12" in fmt:
|
364
406
|
fmt = "%I:%M:%S %p"
|
365
|
-
elif
|
407
|
+
elif "24" in fmt:
|
366
408
|
fmt = "%H:%M:%S"
|
367
409
|
|
368
410
|
try:
|
369
411
|
# Try to parse the time string assuming it could be in 24-hour or 12-hour format
|
370
|
-
time_obj = datetime.strptime(time_len_corr(time_str),
|
412
|
+
time_obj = datetime.strptime(time_len_corr(time_str), "%H:%M:%S")
|
371
413
|
except ValueError:
|
372
414
|
try:
|
373
|
-
time_obj = datetime.strptime(time_len_corr(time_str),
|
415
|
+
time_obj = datetime.strptime(time_len_corr(time_str), "%I:%M:%S %p")
|
374
416
|
except ValueError as e:
|
375
417
|
raise ValueError(f"Unable to parse time string: {time_str}. Error: {e}")
|
376
|
-
|
418
|
+
|
377
419
|
# Format the time object to the desired output format
|
378
420
|
formatted_time = time_obj.strftime(fmt)
|
379
421
|
return formatted_time
|
380
422
|
|
423
|
+
|
381
424
|
# # Example usage:
|
382
425
|
# time_str1 = "14:30:45"
|
383
426
|
# time_str2 = "02:30:45 PM"
|
@@ -388,7 +431,8 @@ def str2time(time_str, fmt='24'):
|
|
388
431
|
# print(formatted_time1) # Output: 02:30:45 PM
|
389
432
|
# print(formatted_time2) # Output: 14:30:45
|
390
433
|
|
391
|
-
|
434
|
+
|
435
|
+
def str2date(date_str, fmt="%Y-%m-%d_%H:%M:%S"):
|
392
436
|
"""
|
393
437
|
Convert a date string into the specified format.
|
394
438
|
Parameters:
|
@@ -404,11 +448,14 @@ def str2date(date_str, fmt='%Y-%m-%d_%H:%M:%S'):
|
|
404
448
|
# Format the date object to the desired output format
|
405
449
|
formatted_date = date_obj.strftime(fmt)
|
406
450
|
return formatted_date
|
451
|
+
|
452
|
+
|
407
453
|
# str1=str2date(num2str(20240625),fmt="%a %d-%B-%Y")
|
408
454
|
# print(str1)
|
409
455
|
# str2=str2num(str2date(str1,fmt='%a %Y%m%d'))
|
410
456
|
# print(str2)
|
411
457
|
|
458
|
+
|
412
459
|
def str2num(s, *args):
|
413
460
|
delimiter = None
|
414
461
|
round_digits = None
|
@@ -425,11 +472,11 @@ def str2num(s, *args):
|
|
425
472
|
except ValueError:
|
426
473
|
try:
|
427
474
|
numerized = numerize(s)
|
428
|
-
num = int(numerized) if
|
475
|
+
num = int(numerized) if "." not in numerized else float(numerized)
|
429
476
|
except Exception as e:
|
430
477
|
# Attempt to handle multiple number segments
|
431
478
|
try:
|
432
|
-
number_segments = ssplit(s,by=
|
479
|
+
number_segments = ssplit(s, by="number_strings")
|
433
480
|
nums = []
|
434
481
|
for segment in number_segments:
|
435
482
|
try:
|
@@ -439,7 +486,9 @@ def str2num(s, *args):
|
|
439
486
|
if len(nums) == 1:
|
440
487
|
num = nums[0]
|
441
488
|
else:
|
442
|
-
raise ValueError(
|
489
|
+
raise ValueError(
|
490
|
+
"Multiple number segments found, cannot determine single numeric value"
|
491
|
+
)
|
443
492
|
except Exception as e:
|
444
493
|
raise ValueError(f"Cannot convert {s} to a number: {e}")
|
445
494
|
|
@@ -454,6 +503,8 @@ def str2num(s, *args):
|
|
454
503
|
return num_str
|
455
504
|
|
456
505
|
return num
|
506
|
+
|
507
|
+
|
457
508
|
# Examples
|
458
509
|
# print(str2num("123")) # Output: 123
|
459
510
|
# print(str2num("123.456", 2)) # Output: 123.46
|
@@ -495,13 +546,15 @@ def num2str(num, *args):
|
|
495
546
|
num_str = "{:,}".format(int(num_str_parts[0]))
|
496
547
|
|
497
548
|
return num_str
|
549
|
+
|
550
|
+
|
498
551
|
# Examples
|
499
552
|
# print(num2str(123),type(num2str(123))) # Output: "123"
|
500
553
|
# print(num2str(123.456, 2),type(num2str(123.456, 2))) # Output: "123.46"
|
501
554
|
# print(num2str(7000.125, 2),type(num2str(7000.125, 2))) # Output: "7000.13"
|
502
555
|
# print(num2str(12345.6789, ","),type(num2str(12345.6789, ","))) # Output: "12,345.6789"
|
503
556
|
# print(num2str(7000.00, ","),type(num2str(7000.00, ","))) # Output: "7,000.00"
|
504
|
-
def sreplace(*args
|
557
|
+
def sreplace(*args, **kwargs):
|
505
558
|
"""
|
506
559
|
sreplace(text, by=None, robust=True)
|
507
560
|
Replace specified substrings in the input text with provided replacements.
|
@@ -515,19 +568,19 @@ def sreplace(*args,**kwargs):
|
|
515
568
|
str: The text after replacements have been made.
|
516
569
|
"""
|
517
570
|
text = None
|
518
|
-
by = kwargs.get(
|
519
|
-
robust = kwargs.get(
|
520
|
-
|
571
|
+
by = kwargs.get("by", None)
|
572
|
+
robust = kwargs.get("robust", True)
|
573
|
+
|
521
574
|
for arg in args:
|
522
|
-
if isinstance(arg,str):
|
523
|
-
text=arg
|
524
|
-
elif isinstance(arg,dict):
|
525
|
-
by=arg
|
526
|
-
elif isinstance(arg,bool):
|
527
|
-
robust=arg
|
575
|
+
if isinstance(arg, str):
|
576
|
+
text = arg
|
577
|
+
elif isinstance(arg, dict):
|
578
|
+
by = arg
|
579
|
+
elif isinstance(arg, bool):
|
580
|
+
robust = arg
|
528
581
|
else:
|
529
582
|
Error(f"{type(arg)} is not supported")
|
530
|
-
|
583
|
+
|
531
584
|
# Default replacements for newline and tab characters
|
532
585
|
default_replacements = {
|
533
586
|
"\a": "",
|
@@ -558,47 +611,76 @@ def sreplace(*args,**kwargs):
|
|
558
611
|
for k, v in by.items():
|
559
612
|
text = text.replace(k, v)
|
560
613
|
return text
|
614
|
+
|
615
|
+
|
561
616
|
# usage:
|
562
617
|
# sreplace(text, by=dict(old_str='new_str'), robust=True)
|
563
618
|
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
619
|
+
|
620
|
+
def paper_size(paper_type_str="a4"):
|
621
|
+
df = pd.DataFrame(
|
622
|
+
{
|
623
|
+
"a0": [841, 1189],
|
624
|
+
"a1": [594, 841],
|
625
|
+
"a2": [420, 594],
|
626
|
+
"a3": [297, 420],
|
627
|
+
"a4": [210, 297],
|
628
|
+
"a5": [148, 210],
|
629
|
+
"a6": [105, 148],
|
630
|
+
"a7": [74, 105],
|
631
|
+
"b0": [1028, 1456],
|
632
|
+
"b1": [707, 1000],
|
633
|
+
"b2": [514, 728],
|
634
|
+
"b3": [364, 514],
|
635
|
+
"b4": [257, 364],
|
636
|
+
"b5": [182, 257],
|
637
|
+
"b6": [128, 182],
|
638
|
+
"letter": [215.9, 279.4],
|
639
|
+
"legal": [215.9, 355.6],
|
640
|
+
"business card": [85.6, 53.98],
|
641
|
+
"photo china passport": [33, 48],
|
642
|
+
"passport single": [125, 88],
|
643
|
+
"visa": [105, 74],
|
644
|
+
"sim": [25, 15],
|
645
|
+
}
|
646
|
+
)
|
569
647
|
for name in df.columns:
|
570
648
|
if paper_type_str in name.lower():
|
571
|
-
paper_type=name
|
649
|
+
paper_type = name
|
572
650
|
if not paper_type:
|
573
|
-
paper_type=
|
651
|
+
paper_type = "a4" # default
|
574
652
|
return df[paper_type].tolist()
|
575
653
|
|
654
|
+
|
576
655
|
def docx2pdf(dir_docx, dir_pdf=None):
|
577
656
|
if dir_pdf:
|
578
|
-
convert(dir_docx,dir_pdf)
|
657
|
+
convert(dir_docx, dir_pdf)
|
579
658
|
else:
|
580
659
|
convert(dir_docx)
|
581
660
|
|
582
|
-
|
661
|
+
|
662
|
+
def img2pdf(dir_img, kind="jpeg", page=None, dir_save=None, page_size="a4", dpi=300):
|
583
663
|
def mm_to_point(size):
|
584
|
-
return (image2pdf.mm_to_pt(size[0]),image2pdf.mm_to_pt(size[1]))
|
664
|
+
return (image2pdf.mm_to_pt(size[0]), image2pdf.mm_to_pt(size[1]))
|
665
|
+
|
585
666
|
def set_dpi(x):
|
586
|
-
dpix=dpiy=x
|
667
|
+
dpix = dpiy = x
|
587
668
|
return image2pdf.get_fixed_dpi_layout_fun((dpix, dpiy))
|
669
|
+
|
588
670
|
if not kind.startswith("."):
|
589
|
-
kind="."+kind
|
671
|
+
kind = "." + kind
|
590
672
|
if dir_save is None:
|
591
|
-
dir_save = dir_img.replace(kind,
|
592
|
-
imgs = []
|
673
|
+
dir_save = dir_img.replace(kind, ".pdf")
|
674
|
+
imgs = []
|
593
675
|
if os.path.isdir(dir_img):
|
594
676
|
if not dir_save.endswith(".pdf"):
|
595
|
-
dir_save+="#merged_img2pdf.pdf"
|
677
|
+
dir_save += "#merged_img2pdf.pdf"
|
596
678
|
if page is None:
|
597
|
-
select_range = listdir(dir_img,kind=kind).fpath
|
679
|
+
select_range = listdir(dir_img, kind=kind).fpath
|
598
680
|
else:
|
599
|
-
if not isinstance(page, (np.ndarray,list,range)):
|
600
|
-
page=[page]
|
601
|
-
select_range = listdir(dir_img,kind=kind)[
|
681
|
+
if not isinstance(page, (np.ndarray, list, range)):
|
682
|
+
page = [page]
|
683
|
+
select_range = listdir(dir_img, kind=kind)["fpath"][page]
|
602
684
|
for fname in select_range:
|
603
685
|
if not fname.endswith(kind):
|
604
686
|
continue
|
@@ -607,24 +689,27 @@ def img2pdf(dir_img, kind="jpeg",page=None, dir_save=None, page_size="a4", dpi=3
|
|
607
689
|
continue
|
608
690
|
imgs.append(path)
|
609
691
|
else:
|
610
|
-
imgs=[os.path.isdir(dir_img),dir_img]
|
692
|
+
imgs = [os.path.isdir(dir_img), dir_img]
|
611
693
|
|
612
694
|
if page_size:
|
613
|
-
if isinstance(page_size,str):
|
614
|
-
pdf_in_mm=mm_to_point(paper_size(page_size))
|
695
|
+
if isinstance(page_size, str):
|
696
|
+
pdf_in_mm = mm_to_point(paper_size(page_size))
|
615
697
|
else:
|
616
698
|
print("default: page_size = (210,297)")
|
617
|
-
pdf_in_mm=mm_to_point(page_size)
|
699
|
+
pdf_in_mm = mm_to_point(page_size)
|
618
700
|
print(f"page size was set to {page_size}")
|
619
|
-
p_size= image2pdf.get_layout_fun(pdf_in_mm)
|
701
|
+
p_size = image2pdf.get_layout_fun(pdf_in_mm)
|
620
702
|
else:
|
621
703
|
p_size = set_dpi(dpi)
|
622
|
-
with open(dir_save,"wb") as f:
|
704
|
+
with open(dir_save, "wb") as f:
|
623
705
|
f.write(image2pdf.convert(imgs, layout_fun=p_size))
|
706
|
+
|
707
|
+
|
624
708
|
# usage:
|
625
709
|
# dir_img="/Users/macjianfeng/Dropbox/00-Personal/2015-History/2012-2015_兰州大学/120901-大学课件/生物统计学 陆卫/复习题/"
|
626
710
|
# img2pdf(dir_img,kind='tif', page=range(3,7,2))
|
627
711
|
|
712
|
+
|
628
713
|
def pdf2ppt(dir_pdf, dir_ppt):
|
629
714
|
prs = Presentation()
|
630
715
|
|
@@ -639,21 +724,26 @@ def pdf2ppt(dir_pdf, dir_ppt):
|
|
639
724
|
text = page.extract_text()
|
640
725
|
|
641
726
|
# Add a slide for each page's content
|
642
|
-
slide_layout = prs.slide_layouts[
|
727
|
+
slide_layout = prs.slide_layouts[
|
728
|
+
5
|
729
|
+
] # Use slide layout that suits your needs
|
643
730
|
slide = prs.slides.add_slide(slide_layout)
|
644
731
|
slide.shapes.title.text = f"Page {page_num + 1}"
|
645
|
-
slide.shapes.add_textbox(
|
732
|
+
slide.shapes.add_textbox(
|
733
|
+
Inches(1), Inches(1.5), Inches(8), Inches(5)
|
734
|
+
).text = text
|
646
735
|
|
647
736
|
# Save the PowerPoint presentation
|
648
737
|
prs.save(dir_ppt)
|
649
738
|
print(f"Conversion from {dir_pdf} to {dir_ppt} complete.")
|
650
739
|
|
651
740
|
|
652
|
-
def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
741
|
+
def ssplit(text, by="space", verbose=False, strict=False, **kws):
|
653
742
|
if isinstance(text, list):
|
654
|
-
nested_list= [ssplit(i,by=by,verbose=verbose
|
743
|
+
nested_list = [ssplit(i, by=by, verbose=verbose, **kws) for i in text]
|
655
744
|
flat_list = [item for sublist in nested_list for item in sublist]
|
656
745
|
return flat_list
|
746
|
+
|
657
747
|
def split_by_word_length(text, length):
|
658
748
|
return [word for word in text.split() if len(word) == length]
|
659
749
|
|
@@ -677,10 +767,10 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
677
767
|
return split_text
|
678
768
|
|
679
769
|
def split_by_regex_lookahead(text, pattern):
|
680
|
-
return re.split(f
|
681
|
-
|
770
|
+
return re.split(f"(?<={pattern})", text)
|
771
|
+
|
682
772
|
def split_by_regex_end(text, pattern):
|
683
|
-
return re.split(f
|
773
|
+
return re.split(f"(?={pattern})", text)
|
684
774
|
|
685
775
|
# def split_by_sentence_endings(text):
|
686
776
|
# return re.split(r"(?<=[.!?])", text)
|
@@ -688,24 +778,27 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
688
778
|
# return re.split(r"([^\x00-\x7F\w\s,.!?:\"'()\-]+)", text)
|
689
779
|
# return re.split(r"[^\x00-\x7F]+", text)
|
690
780
|
return re.split(r"([^\x00-\x7F]+)", text)
|
781
|
+
|
691
782
|
def split_by_consecutive_non_alphanumeric(text):
|
692
783
|
return re.split(r"\W+", text)
|
693
784
|
|
694
785
|
def split_by_fixed_length_chunks(text, length):
|
695
786
|
return [text[i : i + length] for i in range(0, len(text), length)]
|
696
|
-
|
787
|
+
|
788
|
+
def split_by_sent_num(text, n=10):
|
697
789
|
# split text into sentences
|
698
|
-
text_split_by_sent=sent_tokenize(text)
|
699
|
-
cut_loc_array=np.arange(0,len(text_split_by_sent),n)
|
700
|
-
if cut_loc_array[-1]!=len(text_split_by_sent):
|
701
|
-
cut_loc=np.append(cut_loc_array,len(text_split_by_sent))
|
790
|
+
text_split_by_sent = sent_tokenize(text)
|
791
|
+
cut_loc_array = np.arange(0, len(text_split_by_sent), n)
|
792
|
+
if cut_loc_array[-1] != len(text_split_by_sent):
|
793
|
+
cut_loc = np.append(cut_loc_array, len(text_split_by_sent))
|
702
794
|
else:
|
703
795
|
cut_loc = cut_loc_array
|
704
796
|
# get text in section (e.g., every 10 sentences)
|
705
|
-
text_section=[]
|
706
|
-
for i,j in pairwise(cut_loc):
|
797
|
+
text_section = []
|
798
|
+
for i, j in pairwise(cut_loc):
|
707
799
|
text_section.append(text_split_by_sent[i:j])
|
708
800
|
return text_section
|
801
|
+
|
709
802
|
def split_general(text, by, verbose=False, ignore_case=False):
|
710
803
|
if ignore_case:
|
711
804
|
if verbose:
|
@@ -717,8 +810,10 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
717
810
|
if verbose:
|
718
811
|
print(f"used {by} to split, ignore_case=False")
|
719
812
|
return text.split(by)
|
813
|
+
|
720
814
|
def reg_split(text, pattern):
|
721
815
|
return re.split(pattern, text)
|
816
|
+
|
722
817
|
if ("sp" in by or "white" in by) and not strict:
|
723
818
|
if verbose:
|
724
819
|
print(f"splited by space")
|
@@ -735,14 +830,20 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
735
830
|
if verbose:
|
736
831
|
print(f"split_by_multiple_delimiters: ['|','&']")
|
737
832
|
return split_by_multiple_delimiters(text, by)
|
738
|
-
elif
|
833
|
+
elif (
|
834
|
+
all([("digi" in by or "num" in by), not "sent" in by, not "str" in by])
|
835
|
+
and not strict
|
836
|
+
):
|
739
837
|
if verbose:
|
740
838
|
print(f"splited by digital (numbers)")
|
741
839
|
return re.split(r"(\d+)", text)
|
742
|
-
elif all([("digi" in by or "num" in by),
|
840
|
+
elif all([("digi" in by or "num" in by), "str" in by]) and not strict:
|
743
841
|
if verbose:
|
744
842
|
print(f"Splitting by (number strings)")
|
745
|
-
pattern = re.compile(
|
843
|
+
pattern = re.compile(
|
844
|
+
r"\b((?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?)(?:[-\s]?(?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?))*)\b",
|
845
|
+
re.IGNORECASE,
|
846
|
+
)
|
746
847
|
return re.split(pattern, text)
|
747
848
|
elif ("pun" in by) and not strict:
|
748
849
|
if verbose:
|
@@ -760,12 +861,12 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
760
861
|
if verbose:
|
761
862
|
print(f"splited by word")
|
762
863
|
return word_tokenize(text)
|
763
|
-
elif ("sen" in by and not
|
864
|
+
elif ("sen" in by and not "num" in by) and not strict:
|
764
865
|
if verbose:
|
765
866
|
print(f"splited by sentence")
|
766
867
|
return sent_tokenize(text)
|
767
|
-
elif (
|
768
|
-
return split_by_sent_num(text
|
868
|
+
elif ("sen" in by and "num" in by) and not strict:
|
869
|
+
return split_by_sent_num(text, **kws)
|
769
870
|
elif ("cha" in by) and not strict:
|
770
871
|
if verbose:
|
771
872
|
print(f"splited by chracters")
|
@@ -803,32 +904,32 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
803
904
|
if verbose:
|
804
905
|
print(f"splited by customed, re; => {by}")
|
805
906
|
return reg_split(text, **kws)
|
806
|
-
elif (
|
907
|
+
elif ("lang" in by or "eng" in by) and not strict:
|
807
908
|
return split_non_ascii(text)
|
808
909
|
else:
|
809
910
|
return split_general(text, by, verbose=verbose, **kws)
|
810
911
|
|
811
912
|
|
812
|
-
def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
|
913
|
+
def pdf2img(dir_pdf, dir_save=None, page=None, kind="png", verbose=True, **kws):
|
813
914
|
df_dir_img_single_page = pd.DataFrame()
|
814
915
|
dir_single_page = []
|
815
916
|
if verbose:
|
816
917
|
pp(pdfinfo_from_path(dir_pdf))
|
817
918
|
if isinstance(page, tuple) and page:
|
818
919
|
page = list(page)
|
819
|
-
if isinstance(page,int):
|
820
|
-
page=[page]
|
920
|
+
if isinstance(page, int):
|
921
|
+
page = [page]
|
821
922
|
if page is None:
|
822
923
|
page = [pdfinfo_from_path(dir_pdf)["Pages"]]
|
823
|
-
if len(page)==1 and page != pdfinfo_from_path(dir_pdf)["Pages"]:
|
824
|
-
page=[page[0], page[0]]
|
924
|
+
if len(page) == 1 and page != pdfinfo_from_path(dir_pdf)["Pages"]:
|
925
|
+
page = [page[0], page[0]]
|
825
926
|
else:
|
826
|
-
page=[1, page[0]]
|
927
|
+
page = [1, page[0]]
|
827
928
|
pages = convert_from_path(dir_pdf, first_page=page[0], last_page=page[1], **kws)
|
828
929
|
if dir_save is None:
|
829
930
|
dir_save = newfolder(dirname(dir_pdf), basename(dir_pdf).split(".")[0] + "_img")
|
830
931
|
for i, page in enumerate(pages):
|
831
|
-
if verbose:
|
932
|
+
if verbose:
|
832
933
|
print(f"processing page: {i+1}")
|
833
934
|
if i < 9:
|
834
935
|
dir_img_each_page = dir_save + f"page_0{i+1}.png"
|
@@ -839,6 +940,7 @@ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
|
|
839
940
|
df_dir_img_single_page["fpath"] = dir_single_page
|
840
941
|
return df_dir_img_single_page
|
841
942
|
|
943
|
+
|
842
944
|
# dir_pdf = "/Users/macjianfeng/Dropbox/github/python/240308_Python Data Science Handbook.pdf"
|
843
945
|
# df_page = pdf2img(dir_pdf, page=[1, 5],dpi=300)
|
844
946
|
def get_encoding(fpath, alternative_encodings=None, verbose=False):
|
@@ -855,14 +957,37 @@ def get_encoding(fpath, alternative_encodings=None, verbose=False):
|
|
855
957
|
"""
|
856
958
|
if alternative_encodings is None:
|
857
959
|
alternative_encodings = [
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
960
|
+
"utf-8",
|
961
|
+
"latin1",
|
962
|
+
"windows-1252",
|
963
|
+
"iso-8859-1",
|
964
|
+
"iso-8859-2",
|
965
|
+
"iso-8859-3",
|
966
|
+
"iso-8859-4",
|
967
|
+
"iso-8859-5",
|
968
|
+
"iso-8859-6",
|
969
|
+
"iso-8859-7",
|
970
|
+
"iso-8859-8",
|
971
|
+
"iso-8859-9",
|
972
|
+
"windows-1250",
|
973
|
+
"windows-1251",
|
974
|
+
"windows-1253",
|
975
|
+
"windows-1254",
|
976
|
+
"windows-1255",
|
977
|
+
"windows-1256",
|
978
|
+
"windows-1257",
|
979
|
+
"windows-1258",
|
980
|
+
"big5",
|
981
|
+
"gb18030",
|
982
|
+
"shift_jis",
|
983
|
+
"euc_jp",
|
984
|
+
"koi8_r",
|
985
|
+
"mac_roman",
|
986
|
+
"mac_central_europe",
|
987
|
+
"mac_greek",
|
988
|
+
"mac_cyrillic",
|
989
|
+
"mac_arabic",
|
990
|
+
"mac_hebrew",
|
866
991
|
]
|
867
992
|
|
868
993
|
if not os.path.isfile(fpath):
|
@@ -870,7 +995,7 @@ def get_encoding(fpath, alternative_encodings=None, verbose=False):
|
|
870
995
|
|
871
996
|
for enc in alternative_encodings:
|
872
997
|
try:
|
873
|
-
with open(fpath, mode=
|
998
|
+
with open(fpath, mode="r", encoding=enc) as file:
|
874
999
|
file.read() # Try to read the file
|
875
1000
|
if verbose:
|
876
1001
|
print(f"Successfully detected encoding: {enc}")
|
@@ -895,6 +1020,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
895
1020
|
Returns:
|
896
1021
|
content: The content loaded from the file.
|
897
1022
|
"""
|
1023
|
+
|
898
1024
|
def load_txt_md(fpath):
|
899
1025
|
with open(fpath, "r") as file:
|
900
1026
|
content = file.read()
|
@@ -920,7 +1046,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
920
1046
|
root = tree.getroot()
|
921
1047
|
return etree.tostring(root, pretty_print=True).decode()
|
922
1048
|
|
923
|
-
def load_csv(fpath, engine=
|
1049
|
+
def load_csv(fpath, engine="pyarrow", **kwargs):
|
924
1050
|
print(f"engine={engine}")
|
925
1051
|
df = pd.read_csv(fpath, engine=engine, **kwargs)
|
926
1052
|
return df
|
@@ -928,35 +1054,36 @@ def fload(fpath, kind=None, **kwargs):
|
|
928
1054
|
def load_xlsx(fpath, **kwargs):
|
929
1055
|
df = pd.read_excel(fpath, **kwargs)
|
930
1056
|
return df
|
931
|
-
|
932
|
-
|
1057
|
+
|
1058
|
+
def load_ipynb(fpath, **kwargs):
|
1059
|
+
as_version = kwargs.get("as_version", 4)
|
933
1060
|
with open(fpath, "r") as file:
|
934
1061
|
nb = nbformat.read(file, as_version=as_version)
|
935
1062
|
md_exporter = MarkdownExporter()
|
936
1063
|
md_body, _ = md_exporter.from_notebook_node(nb)
|
937
1064
|
return md_body
|
938
|
-
|
939
|
-
def load_pdf(fpath, page=
|
1065
|
+
|
1066
|
+
def load_pdf(fpath, page="all", verbose=False, **kwargs):
|
940
1067
|
"""
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
1068
|
+
Parameters:
|
1069
|
+
fpath: The path to the PDF file to be loaded.
|
1070
|
+
page (optional):
|
1071
|
+
Specifies which page or pages to extract text from. By default, it's set to "all", which means text from all
|
1072
|
+
pages will be returned. It can also be an integer to specify a single page number or a list of integers to
|
1073
|
+
specify multiple pages.
|
1074
|
+
verbose (optional):
|
1075
|
+
If True, prints the total number of pages processed.
|
1076
|
+
Functionality:
|
1077
|
+
It initializes an empty dictionary text_dict to store page numbers as keys and their corresponding text as values.
|
1078
|
+
It iterates through each page of the PDF file using a for loop.
|
1079
|
+
For each page, it extracts the text using PyPDF2's extract_text() method and stores it in text_dict with the page number incremented by 1 as the key.
|
1080
|
+
If the page parameter is an integer, it converts it into a list containing that single page number to ensure consistency in handling.
|
1081
|
+
If the page parameter is a NumPy array, it converts it to a list using the tolist() method to ensure compatibility with list operations.
|
1082
|
+
If verbose is True, it prints the total number of pages processed.
|
1083
|
+
If page is a list, it combines the text of the specified pages into a single string combined_text and returns it.
|
1084
|
+
If page is set to "all", it returns the entire text_dict containing text of all pages.
|
1085
|
+
If page is an integer, it returns the text of the specified page number.
|
1086
|
+
If the specified page is not found, it returns the string "Page is not found".
|
960
1087
|
"""
|
961
1088
|
text_dict = {}
|
962
1089
|
with open(fpath, "rb") as file:
|
@@ -989,18 +1116,52 @@ def fload(fpath, kind=None, **kwargs):
|
|
989
1116
|
def load_docx(fpath):
|
990
1117
|
doc = Document(fpath)
|
991
1118
|
content = [para.text for para in doc.paragraphs]
|
992
|
-
return content
|
1119
|
+
return content
|
993
1120
|
|
994
1121
|
if kind is None:
|
995
1122
|
_, kind = os.path.splitext(fpath)
|
996
1123
|
kind = kind.lower()
|
997
1124
|
|
998
|
-
kind = kind.lstrip(
|
999
|
-
img_types=[
|
1000
|
-
|
1125
|
+
kind = kind.lstrip(".").lower()
|
1126
|
+
img_types = [
|
1127
|
+
"bmp",
|
1128
|
+
"eps",
|
1129
|
+
"gif",
|
1130
|
+
"icns",
|
1131
|
+
"ico",
|
1132
|
+
"im",
|
1133
|
+
"jpg",
|
1134
|
+
"jpeg",
|
1135
|
+
"jpeg2000",
|
1136
|
+
"msp",
|
1137
|
+
"pcx",
|
1138
|
+
"png",
|
1139
|
+
"ppm",
|
1140
|
+
"sgi",
|
1141
|
+
"spider",
|
1142
|
+
"tga",
|
1143
|
+
"tiff",
|
1144
|
+
"webp",
|
1145
|
+
"json",
|
1146
|
+
]
|
1147
|
+
doc_types = [
|
1148
|
+
"docx",
|
1149
|
+
"txt",
|
1150
|
+
"md",
|
1151
|
+
"html",
|
1152
|
+
"json",
|
1153
|
+
"yaml",
|
1154
|
+
"xml",
|
1155
|
+
"csv",
|
1156
|
+
"xlsx",
|
1157
|
+
"pdf",
|
1158
|
+
"ipynb",
|
1159
|
+
]
|
1001
1160
|
supported_types = [*doc_types, *img_types]
|
1002
1161
|
if kind not in supported_types:
|
1003
|
-
raise ValueError(
|
1162
|
+
raise ValueError(
|
1163
|
+
f"Error:\n{kind} is not in the supported list {supported_types}"
|
1164
|
+
)
|
1004
1165
|
if kind == "docx":
|
1005
1166
|
return load_docx(fpath)
|
1006
1167
|
elif kind == "txt" or kind == "md":
|
@@ -1024,9 +1185,12 @@ def fload(fpath, kind=None, **kwargs):
|
|
1024
1185
|
return load_pdf(fpath, **kwargs)
|
1025
1186
|
elif kind.lower() in img_types:
|
1026
1187
|
print(f'Image ".{kind}" is loaded.')
|
1027
|
-
return load_img(fpath)
|
1188
|
+
return load_img(fpath)
|
1028
1189
|
else:
|
1029
|
-
raise ValueError(
|
1190
|
+
raise ValueError(
|
1191
|
+
f"Error:\n{kind} is not in the supported list {supported_types}"
|
1192
|
+
)
|
1193
|
+
|
1030
1194
|
|
1031
1195
|
# Example usage
|
1032
1196
|
# txt_content = fload('sample.txt')
|
@@ -1039,6 +1203,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
1039
1203
|
# xlsx_content = fload('sample.xlsx')
|
1040
1204
|
# docx_content = fload('sample.docx')
|
1041
1205
|
|
1206
|
+
|
1042
1207
|
def fupdate(fpath, content=None):
|
1043
1208
|
"""
|
1044
1209
|
Update a file by adding new content at the top and moving the old content to the bottom.
|
@@ -1055,34 +1220,37 @@ def fupdate(fpath, content=None):
|
|
1055
1220
|
"""
|
1056
1221
|
content = content or ""
|
1057
1222
|
if os.path.exists(fpath):
|
1058
|
-
with open(fpath,
|
1223
|
+
with open(fpath, "r") as file:
|
1059
1224
|
old_content = file.read()
|
1060
1225
|
else:
|
1061
|
-
old_content =
|
1062
|
-
|
1063
|
-
with open(fpath,
|
1226
|
+
old_content = ""
|
1227
|
+
|
1228
|
+
with open(fpath, "w") as file:
|
1064
1229
|
file.write(content)
|
1065
1230
|
file.write(old_content)
|
1231
|
+
|
1232
|
+
|
1066
1233
|
def fappend(fpath, content=None):
|
1067
1234
|
"""
|
1068
1235
|
append new content at the end.
|
1069
1236
|
"""
|
1070
1237
|
content = content or ""
|
1071
1238
|
if os.path.exists(fpath):
|
1072
|
-
with open(fpath,
|
1239
|
+
with open(fpath, "r") as file:
|
1073
1240
|
old_content = file.read()
|
1074
1241
|
else:
|
1075
|
-
old_content =
|
1076
|
-
|
1077
|
-
with open(fpath,
|
1242
|
+
old_content = ""
|
1243
|
+
|
1244
|
+
with open(fpath, "w") as file:
|
1078
1245
|
file.write(old_content)
|
1079
1246
|
file.write(content)
|
1080
|
-
|
1247
|
+
|
1248
|
+
|
1081
1249
|
def fsave(
|
1082
1250
|
fpath,
|
1083
1251
|
content,
|
1084
|
-
mode=
|
1085
|
-
how
|
1252
|
+
mode="w",
|
1253
|
+
how="overwrite",
|
1086
1254
|
kind=None,
|
1087
1255
|
font_name="Times",
|
1088
1256
|
font_size=10,
|
@@ -1102,16 +1270,16 @@ def fsave(
|
|
1102
1270
|
Returns:
|
1103
1271
|
None
|
1104
1272
|
"""
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1273
|
+
|
1274
|
+
def save_content(fpath, content, mode=mode, how="overwrite"):
|
1275
|
+
if "wri" in how.lower():
|
1276
|
+
with open(fpath, mode, encoding="utf-8") as file:
|
1108
1277
|
file.write(content)
|
1109
|
-
elif
|
1278
|
+
elif "upd" in how.lower():
|
1110
1279
|
fupdate(fpath, content=content)
|
1111
|
-
elif
|
1280
|
+
elif "app" in how.lower():
|
1112
1281
|
fappend(fpath, content=content)
|
1113
1282
|
|
1114
|
-
|
1115
1283
|
def save_docx(fpath, content, font_name, font_size, spacing):
|
1116
1284
|
if isinstance(content, str):
|
1117
1285
|
content = content.split(". ")
|
@@ -1126,45 +1294,40 @@ def fsave(
|
|
1126
1294
|
paragraph.space_after = docx.shared.Pt(spacing)
|
1127
1295
|
doc.save(fpath)
|
1128
1296
|
|
1129
|
-
|
1130
|
-
|
1131
|
-
# Ensure content is a single string
|
1297
|
+
def save_txt_md(fpath, content, sep="\n", mode="w"):
|
1298
|
+
# Ensure content is a single string
|
1132
1299
|
if isinstance(content, list):
|
1133
1300
|
content = sep.join(content)
|
1134
|
-
save_content(fpath, sep.join(content),mode)
|
1301
|
+
save_content(fpath, sep.join(content), mode)
|
1135
1302
|
|
1136
|
-
|
1137
|
-
def save_html(fpath, content, font_name, font_size,mode='w'):
|
1303
|
+
def save_html(fpath, content, font_name, font_size, mode="w"):
|
1138
1304
|
html_content = "<html><body>"
|
1139
1305
|
for paragraph_text in content:
|
1140
1306
|
html_content += f'<p style="font-family:{font_name}; font-size:{font_size}px;">{paragraph_text}</p>'
|
1141
1307
|
html_content += "</body></html>"
|
1142
|
-
save_content(fpath, html_content,mode)
|
1143
|
-
|
1308
|
+
save_content(fpath, html_content, mode)
|
1144
1309
|
|
1145
1310
|
def save_pdf(fpath, content, font_name, font_size):
|
1146
1311
|
pdf = FPDF()
|
1147
1312
|
pdf.add_page()
|
1148
1313
|
# pdf.add_font('Arial','',r'/System/Library/Fonts/Supplemental/Arial.ttf',uni=True)
|
1149
|
-
pdf.set_font(font_name,
|
1314
|
+
pdf.set_font(font_name, "", font_size)
|
1150
1315
|
for paragraph_text in content:
|
1151
1316
|
pdf.multi_cell(0, 10, paragraph_text)
|
1152
|
-
pdf.ln(h
|
1153
|
-
pdf.output(fpath,
|
1154
|
-
|
1317
|
+
pdf.ln(h="")
|
1318
|
+
pdf.output(fpath, "F")
|
1155
1319
|
|
1156
1320
|
def save_csv(fpath, data, **kwargs):
|
1157
1321
|
df = pd.DataFrame(data)
|
1158
1322
|
df.to_csv(fpath, **kwargs)
|
1159
1323
|
|
1160
|
-
|
1161
1324
|
def save_xlsx(fpath, data, **kwargs):
|
1162
1325
|
df = pd.DataFrame(data)
|
1163
1326
|
df.to_excel(fpath, **kwargs)
|
1164
1327
|
|
1165
|
-
def save_ipynb(fpath,data
|
1328
|
+
def save_ipynb(fpath, data, **kwargs):
|
1166
1329
|
# Split the content by code fences to distinguish between code and markdown
|
1167
|
-
parts = data.split(
|
1330
|
+
parts = data.split("```")
|
1168
1331
|
cells = []
|
1169
1332
|
|
1170
1333
|
for i, part in enumerate(parts):
|
@@ -1176,31 +1339,32 @@ def fsave(
|
|
1176
1339
|
cells.append(nbformat.v4.new_code_cell(part.strip()))
|
1177
1340
|
# Create a new notebook
|
1178
1341
|
nb = nbformat.v4.new_notebook()
|
1179
|
-
nb[
|
1342
|
+
nb["cells"] = cells
|
1180
1343
|
# Write the notebook to a file
|
1181
|
-
with open(fpath,
|
1344
|
+
with open(fpath, "w", encoding="utf-8") as ipynb_file:
|
1182
1345
|
nbformat.write(nb, ipynb_file)
|
1183
|
-
|
1346
|
+
|
1184
1347
|
# def save_json(fpath, data, **kwargs):
|
1185
1348
|
# with open(fpath, "w") as file:
|
1186
1349
|
# json.dump(data, file, **kwargs)
|
1187
1350
|
|
1188
|
-
def save_json(fpath_fname, var_dict_or_df):
|
1351
|
+
def save_json(fpath_fname, var_dict_or_df):
|
1189
1352
|
with open(fpath_fname, "w") as f_json:
|
1190
1353
|
# Check if var_dict_or_df is a DataFrame
|
1191
1354
|
if isinstance(var_dict_or_df, pd.DataFrame):
|
1192
1355
|
# Convert DataFrame to a list of dictionaries
|
1193
1356
|
var_dict_or_df = var_dict_or_df.to_dict(orient="dict")
|
1194
|
-
|
1357
|
+
|
1195
1358
|
# Check if var_dict_or_df is a dictionary
|
1196
1359
|
if isinstance(var_dict_or_df, dict):
|
1197
1360
|
# Convert NumPy arrays to lists
|
1198
1361
|
for key, value in var_dict_or_df.items():
|
1199
1362
|
if isinstance(value, np.ndarray):
|
1200
1363
|
var_dict_or_df[key] = value.tolist()
|
1201
|
-
|
1364
|
+
|
1202
1365
|
# Save the dictionary or list of dictionaries to a JSON file
|
1203
1366
|
json.dump(var_dict_or_df, f_json, indent=4)
|
1367
|
+
|
1204
1368
|
# # Example usage:
|
1205
1369
|
# sets = {"title": "mse_path_ MSE"}
|
1206
1370
|
# jsonsave("/.json", sets)
|
@@ -1210,7 +1374,6 @@ def fsave(
|
|
1210
1374
|
with open(fpath, "w") as file:
|
1211
1375
|
yaml.dump(data, file, **kwargs)
|
1212
1376
|
|
1213
|
-
|
1214
1377
|
def save_xml(fpath, data):
|
1215
1378
|
root = etree.Element("root")
|
1216
1379
|
if isinstance(data, dict):
|
@@ -1239,18 +1402,18 @@ def fsave(
|
|
1239
1402
|
"json",
|
1240
1403
|
"xml",
|
1241
1404
|
"yaml",
|
1242
|
-
"ipynb"
|
1405
|
+
"ipynb",
|
1243
1406
|
]:
|
1244
1407
|
print(
|
1245
1408
|
f"Warning:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
1246
1409
|
)
|
1247
1410
|
|
1248
|
-
if kind == "docx" or kind=="doc":
|
1411
|
+
if kind == "docx" or kind == "doc":
|
1249
1412
|
save_docx(fpath, content, font_name, font_size, spacing)
|
1250
1413
|
elif kind == "txt":
|
1251
|
-
save_txt_md(fpath, content, sep="",mode=mode)
|
1414
|
+
save_txt_md(fpath, content, sep="", mode=mode)
|
1252
1415
|
elif kind == "md":
|
1253
|
-
save_txt_md(fpath, content, sep="",mode=mode)
|
1416
|
+
save_txt_md(fpath, content, sep="", mode=mode)
|
1254
1417
|
elif kind == "html":
|
1255
1418
|
save_html(fpath, content, font_name, font_size)
|
1256
1419
|
elif kind == "pdf":
|
@@ -1260,20 +1423,20 @@ def fsave(
|
|
1260
1423
|
elif kind == "xlsx":
|
1261
1424
|
save_xlsx(fpath, content, **kwargs)
|
1262
1425
|
elif kind == "json":
|
1263
|
-
save_json(fpath, content)
|
1426
|
+
save_json(fpath, content)
|
1264
1427
|
elif kind == "xml":
|
1265
|
-
save_xml(fpath, content)
|
1428
|
+
save_xml(fpath, content)
|
1266
1429
|
elif kind == "yaml":
|
1267
1430
|
save_yaml(fpath, content, **kwargs)
|
1268
1431
|
elif kind == "ipynb":
|
1269
|
-
save_ipynb(fpath, content, **kwargs)
|
1432
|
+
save_ipynb(fpath, content, **kwargs)
|
1270
1433
|
else:
|
1271
|
-
try:
|
1434
|
+
try:
|
1272
1435
|
netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
|
1273
1436
|
except:
|
1274
1437
|
print(
|
1275
1438
|
f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
1276
|
-
|
1439
|
+
)
|
1277
1440
|
|
1278
1441
|
|
1279
1442
|
# # Example usage
|
@@ -1294,47 +1457,61 @@ def fsave(
|
|
1294
1457
|
# fsave(dir_save + "sample.yaml", yaml_content)
|
1295
1458
|
# fsave(dir_save + "sample.xml", xml_content)
|
1296
1459
|
|
1460
|
+
|
1297
1461
|
def addpath(fpath):
|
1298
|
-
sys.path.insert(0,dir)
|
1462
|
+
sys.path.insert(0, dir)
|
1463
|
+
|
1464
|
+
|
1299
1465
|
def dirname(fpath):
|
1300
1466
|
"""
|
1301
1467
|
dirname: Extracting Directory Name from a File Path
|
1302
1468
|
Args:
|
1303
|
-
fpath (str): the file or directory path
|
1469
|
+
fpath (str): the file or directory path
|
1304
1470
|
Returns:
|
1305
1471
|
str: directory, without filename
|
1306
1472
|
"""
|
1307
|
-
dirname_=os.path.dirname(fpath)
|
1308
|
-
if not dirname_.endswith(
|
1309
|
-
dirname_=dirname_+"/"
|
1473
|
+
dirname_ = os.path.dirname(fpath)
|
1474
|
+
if not dirname_.endswith("/"):
|
1475
|
+
dirname_ = dirname_ + "/"
|
1310
1476
|
return dirname_
|
1311
1477
|
|
1312
|
-
|
1478
|
+
|
1479
|
+
def dir_name(fpath): # same as "dirname"
|
1313
1480
|
return dirname(fpath)
|
1481
|
+
|
1482
|
+
|
1314
1483
|
def basename(fpath):
|
1315
1484
|
"""
|
1316
1485
|
basename: # Output: file.txt
|
1317
1486
|
Args:
|
1318
|
-
fpath (str): the file or directory path
|
1487
|
+
fpath (str): the file or directory path
|
1319
1488
|
Returns:
|
1320
1489
|
str: # Output: file.txt
|
1321
1490
|
"""
|
1322
1491
|
return os.path.basename(fpath)
|
1492
|
+
|
1493
|
+
|
1323
1494
|
def flist(fpath, contains="all"):
|
1324
|
-
all_files = [
|
1495
|
+
all_files = [
|
1496
|
+
os.path.join(fpath, f)
|
1497
|
+
for f in os.listdir(fpath)
|
1498
|
+
if os.path.isfile(os.path.join(fpath, f))
|
1499
|
+
]
|
1325
1500
|
if isinstance(contains, list):
|
1326
1501
|
filt_files = []
|
1327
1502
|
for filter_ in contains:
|
1328
1503
|
filt_files.extend(flist(fpath, filter_))
|
1329
1504
|
return filt_files
|
1330
1505
|
else:
|
1331
|
-
if
|
1506
|
+
if "all" in contains.lower():
|
1332
1507
|
return all_files
|
1333
1508
|
else:
|
1334
1509
|
filt_files = [f for f in all_files if isa(f, contains)]
|
1335
1510
|
return filt_files
|
1511
|
+
|
1512
|
+
|
1336
1513
|
def sort_kind(df, by="name", ascending=True):
|
1337
|
-
if df[by].dtype ==
|
1514
|
+
if df[by].dtype == "object": # Check if the column contains string values
|
1338
1515
|
if ascending:
|
1339
1516
|
sorted_index = df[by].str.lower().argsort()
|
1340
1517
|
else:
|
@@ -1347,7 +1524,8 @@ def sort_kind(df, by="name", ascending=True):
|
|
1347
1524
|
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
1348
1525
|
return sorted_df
|
1349
1526
|
|
1350
|
-
|
1527
|
+
|
1528
|
+
def isa(*args, **kwargs):
|
1351
1529
|
"""
|
1352
1530
|
fpath, contains='img'
|
1353
1531
|
containss file paths based on the specified contains.
|
@@ -1360,30 +1538,33 @@ def isa(*args,**kwargs):
|
|
1360
1538
|
"""
|
1361
1539
|
for arg in args:
|
1362
1540
|
if isinstance(arg, str):
|
1363
|
-
if
|
1541
|
+
if "/" in arg or "\\" in arg:
|
1364
1542
|
fpath = arg
|
1365
1543
|
else:
|
1366
|
-
contains=arg
|
1367
|
-
if
|
1544
|
+
contains = arg
|
1545
|
+
if "img" in contains.lower() or "image" in contains.lower():
|
1368
1546
|
return is_image(fpath)
|
1369
|
-
elif
|
1547
|
+
elif "doc" in contains.lower():
|
1370
1548
|
return is_document(fpath)
|
1371
|
-
elif
|
1549
|
+
elif "zip" in contains.lower():
|
1372
1550
|
return is_zip(fpath)
|
1373
|
-
elif
|
1551
|
+
elif "dir" in contains.lower() or (
|
1552
|
+
"f" in contains.lower() and "d" in contains.lower()
|
1553
|
+
):
|
1374
1554
|
return os.path.isdir(fpath)
|
1375
|
-
elif
|
1555
|
+
elif "fi" in contains.lower(): # file
|
1376
1556
|
return os.path.isfile(fpath)
|
1377
|
-
elif
|
1557
|
+
elif "num" in contains.lower(): # file
|
1378
1558
|
return os.path.isfile(fpath)
|
1379
|
-
elif
|
1559
|
+
elif "text" in contains.lower() or "txt" in contains.lower(): # file
|
1380
1560
|
return is_text(fpath)
|
1381
|
-
elif
|
1561
|
+
elif "color" in contains.lower(): # file
|
1382
1562
|
return is_str_color(fpath)
|
1383
1563
|
else:
|
1384
1564
|
print(f"{contains} was not set up correctly")
|
1385
1565
|
return False
|
1386
1566
|
|
1567
|
+
|
1387
1568
|
def listdir(
|
1388
1569
|
rootdir,
|
1389
1570
|
kind="folder",
|
@@ -1391,7 +1572,7 @@ def listdir(
|
|
1391
1572
|
ascending=True,
|
1392
1573
|
contains=None,
|
1393
1574
|
orient="list",
|
1394
|
-
output="df"
|
1575
|
+
output="df", # 'list','dict','records','index','series'
|
1395
1576
|
):
|
1396
1577
|
if not kind.startswith("."):
|
1397
1578
|
kind = "." + kind
|
@@ -1420,12 +1601,12 @@ def listdir(
|
|
1420
1601
|
is_file = kind.lower() in file_extension.lower() and (
|
1421
1602
|
os.path.isfile(item_path)
|
1422
1603
|
)
|
1423
|
-
if kind in [
|
1604
|
+
if kind in [".doc", ".img", ".zip"]: # 选择大的类别
|
1424
1605
|
if kind != ".folder" and not isa(item_path, kind):
|
1425
1606
|
continue
|
1426
|
-
elif kind in [
|
1607
|
+
elif kind in [".all"]:
|
1427
1608
|
return flist(fpath, contains=contains)
|
1428
|
-
else:
|
1609
|
+
else: # 精确到文件的后缀
|
1429
1610
|
if not is_folder and not is_file:
|
1430
1611
|
continue
|
1431
1612
|
f["name"].append(filename)
|
@@ -1433,9 +1614,15 @@ def listdir(
|
|
1433
1614
|
f["path"].append(os.path.join(os.path.dirname(item_path), item))
|
1434
1615
|
fpath = os.path.join(os.path.dirname(item_path), item)
|
1435
1616
|
f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
|
1436
|
-
f["created_time"].append(
|
1437
|
-
|
1438
|
-
|
1617
|
+
f["created_time"].append(
|
1618
|
+
pd.to_datetime(os.path.getctime(item_path), unit="s")
|
1619
|
+
)
|
1620
|
+
f["modified_time"].append(
|
1621
|
+
pd.to_datetime(os.path.getmtime(item_path), unit="s")
|
1622
|
+
)
|
1623
|
+
f["last_open_time"].append(
|
1624
|
+
pd.to_datetime(os.path.getatime(item_path), unit="s")
|
1625
|
+
)
|
1439
1626
|
f["fname"].append(filename) # will be removed
|
1440
1627
|
f["fpath"].append(fpath) # will be removed
|
1441
1628
|
i += 1
|
@@ -1464,35 +1651,39 @@ def listdir(
|
|
1464
1651
|
elif "s" in sort_by.lower() and "z" in sort_by.lower():
|
1465
1652
|
f = sort_kind(f, by="size", ascending=ascending)
|
1466
1653
|
|
1467
|
-
if
|
1654
|
+
if "df" in output:
|
1468
1655
|
return f
|
1469
1656
|
else:
|
1470
|
-
if
|
1657
|
+
if "l" in orient.lower(): # list # default
|
1471
1658
|
res_output = Box(f.to_dict(orient="list"))
|
1472
1659
|
return res_output
|
1473
|
-
if
|
1660
|
+
if "d" in orient.lower(): # dict
|
1474
1661
|
return Box(f.to_dict(orient="dict"))
|
1475
|
-
if
|
1662
|
+
if "r" in orient.lower(): # records
|
1476
1663
|
return Box(f.to_dict(orient="records"))
|
1477
|
-
if
|
1664
|
+
if "in" in orient.lower(): # records
|
1478
1665
|
return Box(f.to_dict(orient="index"))
|
1479
|
-
if
|
1666
|
+
if "se" in orient.lower(): # records
|
1480
1667
|
return Box(f.to_dict(orient="series"))
|
1481
1668
|
|
1669
|
+
|
1482
1670
|
# Example usage:
|
1483
1671
|
# result = listdir('your_root_directory')
|
1484
1672
|
# print(result)
|
1485
1673
|
# df=listdir("/", contains='sss',sort_by='name',ascending=False)
|
1486
|
-
# print(df.fname.to_list(),"\n",df.fpath.to_list())
|
1674
|
+
# print(df.fname.to_list(),"\n",df.fpath.to_list())
|
1487
1675
|
def list_func(lib_name, opt="call"):
|
1488
1676
|
if opt == "call":
|
1489
1677
|
funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
|
1490
1678
|
else:
|
1491
1679
|
funcs = dir(lib_name)
|
1492
1680
|
return funcs
|
1681
|
+
|
1682
|
+
|
1493
1683
|
def func_list(lib_name, opt="call"):
|
1494
1684
|
return list_func(lib_name, opt=opt)
|
1495
1685
|
|
1686
|
+
|
1496
1687
|
def mkdir(*args, **kwargs):
|
1497
1688
|
"""
|
1498
1689
|
newfolder(pardir, chdir)
|
@@ -1503,17 +1694,17 @@ def mkdir(*args, **kwargs):
|
|
1503
1694
|
Returns:
|
1504
1695
|
mkdir, giving a option if exists_ok or not
|
1505
1696
|
"""
|
1506
|
-
overwrite=kwargs.get("overwrite",False)
|
1697
|
+
overwrite = kwargs.get("overwrite", False)
|
1507
1698
|
for arg in args:
|
1508
|
-
if isinstance(arg, (str,list)):
|
1699
|
+
if isinstance(arg, (str, list)):
|
1509
1700
|
if "/" in arg or "\\" in arg:
|
1510
|
-
pardir=arg
|
1511
|
-
print(f
|
1701
|
+
pardir = arg
|
1702
|
+
print(f"pardir{pardir}")
|
1512
1703
|
else:
|
1513
1704
|
chdir = arg
|
1514
|
-
print(f
|
1515
|
-
elif isinstance(arg,bool):
|
1516
|
-
overwrite=arg
|
1705
|
+
print(f"chdir{chdir}")
|
1706
|
+
elif isinstance(arg, bool):
|
1707
|
+
overwrite = arg
|
1517
1708
|
print(overwrite)
|
1518
1709
|
else:
|
1519
1710
|
print(f"{arg}Error: not support a {type(arg)} type")
|
@@ -1526,7 +1717,7 @@ def mkdir(*args, **kwargs):
|
|
1526
1717
|
if isinstance(pardir, str): # Dir_parents should be 'str' type
|
1527
1718
|
pardir = os.path.normpath(pardir)
|
1528
1719
|
# Get the slash type: "/" or "\"
|
1529
|
-
stype =
|
1720
|
+
stype = "/" if "/" in pardir else "\\"
|
1530
1721
|
# Check if the parent directory exists and is a directory path
|
1531
1722
|
if os.path.isdir(pardir):
|
1532
1723
|
os.chdir(pardir) # Set current path
|
@@ -1538,80 +1729,83 @@ def mkdir(*args, **kwargs):
|
|
1538
1729
|
# Check if the subfolder already exists
|
1539
1730
|
child_tmp = os.path.join(pardir, folder)
|
1540
1731
|
if not os.path.isdir(child_tmp):
|
1541
|
-
os.mkdir(
|
1542
|
-
print(f
|
1732
|
+
os.mkdir("./" + folder)
|
1733
|
+
print(f"\n {folder} was created successfully!\n")
|
1543
1734
|
else:
|
1544
1735
|
if overwrite:
|
1545
1736
|
shutil.rmtree(child_tmp)
|
1546
|
-
os.mkdir(
|
1547
|
-
print(f
|
1737
|
+
os.mkdir("./" + folder)
|
1738
|
+
print(f"\n {folder} overwrite! \n")
|
1548
1739
|
else:
|
1549
|
-
print(f
|
1740
|
+
print(f"\n {folder} already exists! \n")
|
1550
1741
|
rootdir.append(child_tmp + stype) # Note down
|
1551
1742
|
else:
|
1552
|
-
print(
|
1743
|
+
print("\nWarning: Dir_child doesn't exist\n")
|
1553
1744
|
else:
|
1554
|
-
print(
|
1745
|
+
print("\nWarning: Dir_parent is not a directory path\n")
|
1555
1746
|
# Dir is the main output, if only one dir, then str type is inconvenient
|
1556
1747
|
if len(rootdir) == 1:
|
1557
1748
|
rootdir = rootdir[0]
|
1558
1749
|
return rootdir
|
1559
1750
|
|
1560
|
-
|
1751
|
+
|
1752
|
+
def figsave(*args, dpi=300):
|
1561
1753
|
dir_save = None
|
1562
|
-
fname = None
|
1754
|
+
fname = None
|
1563
1755
|
for arg in args:
|
1564
1756
|
if isinstance(arg, str):
|
1565
|
-
if
|
1757
|
+
if "/" in arg or "\\" in arg:
|
1566
1758
|
dir_save = arg
|
1567
|
-
elif
|
1759
|
+
elif "/" not in arg and "\\" not in arg:
|
1568
1760
|
fname = arg
|
1569
1761
|
# Backup original values
|
1570
|
-
if
|
1571
|
-
if dir_save[-1] !=
|
1572
|
-
dir_save = dir_save +
|
1573
|
-
elif
|
1574
|
-
if dir_save[-1] !=
|
1575
|
-
dir_save = dir_save +
|
1762
|
+
if "/" in dir_save:
|
1763
|
+
if dir_save[-1] != "/":
|
1764
|
+
dir_save = dir_save + "/"
|
1765
|
+
elif "\\" in dir_save:
|
1766
|
+
if dir_save[-1] != "\\":
|
1767
|
+
dir_save = dir_save + "\\"
|
1576
1768
|
else:
|
1577
|
-
raise ValueError(
|
1578
|
-
ftype = fname.split(
|
1579
|
-
if len(fname.split(
|
1580
|
-
ftype =
|
1581
|
-
fname = dir_save + fname +
|
1769
|
+
raise ValueError("Check the Path of dir_save Directory")
|
1770
|
+
ftype = fname.split(".")[-1]
|
1771
|
+
if len(fname.split(".")) == 1:
|
1772
|
+
ftype = "nofmt"
|
1773
|
+
fname = dir_save + fname + "." + ftype
|
1582
1774
|
else:
|
1583
1775
|
fname = dir_save + fname
|
1584
1776
|
# Save figure based on file type
|
1585
|
-
if ftype.lower() ==
|
1586
|
-
plt.savefig(fname, format=
|
1587
|
-
plt.savefig(
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1777
|
+
if ftype.lower() == "eps":
|
1778
|
+
plt.savefig(fname, format="eps", bbox_inches="tight")
|
1779
|
+
plt.savefig(
|
1780
|
+
fname.replace(".eps", ".pdf"), format="pdf", bbox_inches="tight", dpi=dpi
|
1781
|
+
)
|
1782
|
+
elif ftype.lower() == "nofmt": # default: both "tif" and "pdf"
|
1783
|
+
fname_corr = fname.replace("nofmt", "pdf")
|
1784
|
+
plt.savefig(fname_corr, format="pdf", bbox_inches="tight", dpi=dpi)
|
1785
|
+
fname = fname.replace("nofmt", "tif")
|
1786
|
+
plt.savefig(fname, format="tiff", dpi=dpi, bbox_inches="tight")
|
1594
1787
|
print(f"default saving filetype: both 'tif' and 'pdf")
|
1595
|
-
elif ftype.lower() ==
|
1596
|
-
plt.savefig(fname, format=
|
1597
|
-
elif ftype.lower() in [
|
1598
|
-
plt.savefig(fname, format=
|
1599
|
-
elif ftype.lower() ==
|
1600
|
-
plt.savefig(fname, format=
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
print(f'\nSaved @: dpi={dpi}\n{fname}')
|
1788
|
+
elif ftype.lower() == "pdf":
|
1789
|
+
plt.savefig(fname, format="pdf", bbox_inches="tight", dpi=dpi)
|
1790
|
+
elif ftype.lower() in ["jpg", "jpeg"]:
|
1791
|
+
plt.savefig(fname, format="jpeg", dpi=dpi, bbox_inches="tight")
|
1792
|
+
elif ftype.lower() == "png":
|
1793
|
+
plt.savefig(fname, format="png", dpi=dpi, bbox_inches="tight", transparent=True)
|
1794
|
+
elif ftype.lower() in ["tiff", "tif"]:
|
1795
|
+
plt.savefig(fname, format="tiff", dpi=dpi, bbox_inches="tight")
|
1796
|
+
elif ftype.lower() == "emf":
|
1797
|
+
plt.savefig(fname, format="emf", dpi=dpi, bbox_inches="tight")
|
1798
|
+
elif ftype.lower() == "fig":
|
1799
|
+
plt.savefig(fname, format="pdf", bbox_inches="tight", dpi=dpi)
|
1800
|
+
print(f"\nSaved @: dpi={dpi}\n{fname}")
|
1609
1801
|
|
1610
1802
|
|
1611
1803
|
def is_str_color(s):
|
1612
1804
|
# Regular expression pattern for hexadecimal color codes
|
1613
1805
|
color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
|
1614
1806
|
return re.match(color_code_pattern, s) is not None
|
1807
|
+
|
1808
|
+
|
1615
1809
|
def is_num(s):
|
1616
1810
|
"""
|
1617
1811
|
Check if a string can be converted to a number (int or float).
|
@@ -1625,58 +1819,72 @@ def is_num(s):
|
|
1625
1819
|
return True
|
1626
1820
|
except ValueError:
|
1627
1821
|
return False
|
1822
|
+
|
1823
|
+
|
1628
1824
|
def isnum(s):
|
1629
1825
|
return is_num(s)
|
1826
|
+
|
1827
|
+
|
1630
1828
|
def is_image(fpath):
|
1631
1829
|
mime_type, _ = mimetypes.guess_type(fpath)
|
1632
|
-
if mime_type and mime_type.startswith(
|
1830
|
+
if mime_type and mime_type.startswith("image"):
|
1633
1831
|
return True
|
1634
1832
|
else:
|
1635
1833
|
return False
|
1834
|
+
|
1835
|
+
|
1636
1836
|
def is_document(fpath):
|
1637
1837
|
mime_type, _ = mimetypes.guess_type(fpath)
|
1638
1838
|
if mime_type and (
|
1639
|
-
mime_type.startswith(
|
1640
|
-
mime_type ==
|
1641
|
-
mime_type ==
|
1642
|
-
mime_type
|
1643
|
-
|
1644
|
-
mime_type ==
|
1645
|
-
mime_type
|
1646
|
-
|
1839
|
+
mime_type.startswith("text/")
|
1840
|
+
or mime_type == "application/pdf"
|
1841
|
+
or mime_type == "application/msword"
|
1842
|
+
or mime_type
|
1843
|
+
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
1844
|
+
or mime_type == "application/vnd.ms-excel"
|
1845
|
+
or mime_type
|
1846
|
+
== "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
1847
|
+
or mime_type == "application/vnd.ms-powerpoint"
|
1848
|
+
or mime_type
|
1849
|
+
== "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
1647
1850
|
):
|
1648
1851
|
return True
|
1649
1852
|
else:
|
1650
1853
|
return False
|
1854
|
+
|
1855
|
+
|
1651
1856
|
def is_zip(fpath):
|
1652
1857
|
mime_type, _ = mimetypes.guess_type(fpath)
|
1653
|
-
if mime_type ==
|
1858
|
+
if mime_type == "application/zip":
|
1654
1859
|
return True
|
1655
1860
|
else:
|
1656
1861
|
return False
|
1657
1862
|
|
1658
1863
|
|
1659
|
-
def adjust_spines(ax=None, spines=[
|
1864
|
+
def adjust_spines(ax=None, spines=["left", "bottom"], distance=2):
|
1660
1865
|
if ax is None:
|
1661
1866
|
ax = plt.gca()
|
1662
1867
|
for loc, spine in ax.spines.items():
|
1663
1868
|
if loc in spines:
|
1664
|
-
spine.set_position((
|
1869
|
+
spine.set_position(("outward", distance)) # outward by 2 points
|
1665
1870
|
# spine.set_smart_bounds(True)
|
1666
1871
|
else:
|
1667
|
-
spine.set_color(
|
1872
|
+
spine.set_color("none") # don't draw spine
|
1668
1873
|
# turn off ticks where there is no spine
|
1669
|
-
if
|
1670
|
-
ax.yaxis.set_ticks_position(
|
1874
|
+
if "left" in spines:
|
1875
|
+
ax.yaxis.set_ticks_position("left")
|
1671
1876
|
else:
|
1672
1877
|
ax.yaxis.set_ticks([])
|
1673
|
-
if
|
1674
|
-
ax.xaxis.set_ticks_position(
|
1878
|
+
if "bottom" in spines:
|
1879
|
+
ax.xaxis.set_ticks_position("bottom")
|
1675
1880
|
else:
|
1676
1881
|
# no xaxis ticks
|
1677
1882
|
ax.xaxis.set_ticks([])
|
1883
|
+
|
1884
|
+
|
1678
1885
|
# And then plot the data:
|
1679
1886
|
|
1887
|
+
|
1680
1888
|
def add_colorbar(im, width=None, pad=None, **kwargs):
|
1681
1889
|
# usage: add_colorbar(im, width=0.01, pad=0.005, label="PSD (dB)", shrink=0.8)
|
1682
1890
|
l, b, w, h = im.axes.get_position().bounds # get boundaries
|
@@ -1685,6 +1893,8 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
|
|
1685
1893
|
fig = im.axes.figure # get figure of image
|
1686
1894
|
cax = fig.add_axes([l + w + pad, b, width, h]) # define cbar Axes
|
1687
1895
|
return fig.colorbar(im, cax=cax, **kwargs) # draw cbar
|
1896
|
+
|
1897
|
+
|
1688
1898
|
# =============================================================================
|
1689
1899
|
# # for plot figures: setting rcParams
|
1690
1900
|
# usage: set_pub()
|
@@ -1697,13 +1907,16 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
|
|
1697
1907
|
|
1698
1908
|
|
1699
1909
|
def list2str(x_str):
|
1700
|
-
s =
|
1910
|
+
s = "".join(str(x) for x in x_str)
|
1701
1911
|
return s
|
1912
|
+
|
1913
|
+
|
1702
1914
|
def str2list(str_):
|
1703
1915
|
l = []
|
1704
1916
|
[l.append(x) for x in str_]
|
1705
1917
|
return l
|
1706
1918
|
|
1919
|
+
|
1707
1920
|
def load_img(fpath):
|
1708
1921
|
"""
|
1709
1922
|
Load an image from the specified file path.
|
@@ -1723,6 +1936,7 @@ def load_img(fpath):
|
|
1723
1936
|
except OSError:
|
1724
1937
|
raise OSError(f"Unable to open file '{fpath}' or it is not a valid image file.")
|
1725
1938
|
|
1939
|
+
|
1726
1940
|
def apply_filter(img, *args):
|
1727
1941
|
# def apply_filter(img, filter_name, filter_value=None):
|
1728
1942
|
"""
|
@@ -1734,42 +1948,47 @@ def apply_filter(img, *args):
|
|
1734
1948
|
Returns:
|
1735
1949
|
PIL.Image: The filtered image.
|
1736
1950
|
"""
|
1951
|
+
|
1737
1952
|
def correct_filter_name(filter_name):
|
1738
|
-
if
|
1739
|
-
return
|
1740
|
-
elif
|
1741
|
-
return
|
1742
|
-
elif
|
1743
|
-
return
|
1744
|
-
elif
|
1745
|
-
|
1746
|
-
|
1747
|
-
|
1748
|
-
|
1749
|
-
return
|
1750
|
-
elif
|
1751
|
-
return
|
1752
|
-
elif
|
1753
|
-
return
|
1754
|
-
elif
|
1755
|
-
return
|
1756
|
-
elif
|
1757
|
-
return
|
1758
|
-
elif
|
1759
|
-
return
|
1760
|
-
elif
|
1761
|
-
return
|
1762
|
-
elif
|
1763
|
-
return
|
1764
|
-
elif
|
1765
|
-
return
|
1766
|
-
elif
|
1767
|
-
return
|
1768
|
-
elif
|
1769
|
-
return
|
1770
|
-
elif
|
1771
|
-
return
|
1772
|
-
|
1953
|
+
if "bl" in filter_name.lower() and "box" not in filter_name.lower():
|
1954
|
+
return "BLUR"
|
1955
|
+
elif "cont" in filter_name.lower():
|
1956
|
+
return "Contour"
|
1957
|
+
elif "det" in filter_name.lower():
|
1958
|
+
return "Detail"
|
1959
|
+
elif (
|
1960
|
+
"edg" in filter_name.lower()
|
1961
|
+
and "mo" not in filter_name.lower()
|
1962
|
+
and "f" not in filter_name.lower()
|
1963
|
+
):
|
1964
|
+
return "EDGE_ENHANCE"
|
1965
|
+
elif "edg" in filter_name.lower() and "mo" in filter_name.lower():
|
1966
|
+
return "EDGE_ENHANCE_MORE"
|
1967
|
+
elif "emb" in filter_name.lower():
|
1968
|
+
return "EMBOSS"
|
1969
|
+
elif "edg" in filter_name.lower() and "f" in filter_name.lower():
|
1970
|
+
return "FIND_EDGES"
|
1971
|
+
elif "sh" in filter_name.lower() and "mo" not in filter_name.lower():
|
1972
|
+
return "SHARPEN"
|
1973
|
+
elif "sm" in filter_name.lower() and "mo" not in filter_name.lower():
|
1974
|
+
return "SMOOTH"
|
1975
|
+
elif "sm" in filter_name.lower() and "mo" in filter_name.lower():
|
1976
|
+
return "SMOOTH_MORE"
|
1977
|
+
elif "min" in filter_name.lower():
|
1978
|
+
return "MIN_FILTER"
|
1979
|
+
elif "max" in filter_name.lower():
|
1980
|
+
return "MAX_FILTER"
|
1981
|
+
elif "mod" in filter_name.lower():
|
1982
|
+
return "MODE_FILTER"
|
1983
|
+
elif "mul" in filter_name.lower():
|
1984
|
+
return "MULTIBAND_FILTER"
|
1985
|
+
elif "gau" in filter_name.lower():
|
1986
|
+
return "GAUSSIAN_BLUR"
|
1987
|
+
elif "box" in filter_name.lower():
|
1988
|
+
return "BOX_BLUR"
|
1989
|
+
elif "med" in filter_name.lower():
|
1990
|
+
return "MEDIAN_FILTER"
|
1991
|
+
else:
|
1773
1992
|
supported_filters = [
|
1774
1993
|
"BLUR",
|
1775
1994
|
"CONTOUR",
|
@@ -1843,21 +2062,232 @@ def apply_filter(img, *args):
|
|
1843
2062
|
return img.filter(supported_filters[filter_name](bands))
|
1844
2063
|
else:
|
1845
2064
|
if filter_value is not None:
|
1846
|
-
print(
|
2065
|
+
print(
|
2066
|
+
f"{filter_name} doesn't require a value for {filter_value}, but it remains unaffected"
|
2067
|
+
)
|
1847
2068
|
return img.filter(supported_filters[filter_name])
|
1848
2069
|
|
1849
2070
|
|
1850
|
-
def
|
1851
|
-
|
1852
|
-
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1856
|
-
|
1857
|
-
|
1858
|
-
|
1859
|
-
|
1860
|
-
):
|
2071
|
+
# def imgsetss(
|
2072
|
+
# img,
|
2073
|
+
# sets=None,
|
2074
|
+
# show=True,
|
2075
|
+
# show_axis=False,
|
2076
|
+
# size=None,
|
2077
|
+
# dpi=100,
|
2078
|
+
# figsize=None,
|
2079
|
+
# auto=False,
|
2080
|
+
# filter_kws=None,
|
2081
|
+
# ):
|
2082
|
+
# """
|
2083
|
+
# Apply various enhancements and filters to an image using PIL's ImageEnhance and ImageFilter modules.
|
2084
|
+
|
2085
|
+
# Args:
|
2086
|
+
# img (PIL.Image): The input image.
|
2087
|
+
# sets (dict): A dictionary specifying the enhancements, filters, and their parameters.
|
2088
|
+
# show (bool): Whether to display the enhanced image.
|
2089
|
+
# show_axis (bool): Whether to display axes on the image plot.
|
2090
|
+
# size (tuple): The size of the thumbnail, cover, contain, or fit operation.
|
2091
|
+
# dpi (int): Dots per inch for the displayed image.
|
2092
|
+
# figsize (tuple): The size of the figure for displaying the image.
|
2093
|
+
# auto (bool): Whether to automatically enhance the image based on its characteristics.
|
2094
|
+
|
2095
|
+
# Returns:
|
2096
|
+
# PIL.Image: The enhanced image.
|
2097
|
+
|
2098
|
+
# Supported enhancements and filters:
|
2099
|
+
# - "sharpness": Adjusts the sharpness of the image. Values > 1 increase sharpness, while values < 1 decrease sharpness.
|
2100
|
+
# - "contrast": Adjusts the contrast of the image. Values > 1 increase contrast, while values < 1 decrease contrast.
|
2101
|
+
# - "brightness": Adjusts the brightness of the image. Values > 1 increase brightness, while values < 1 decrease brightness.
|
2102
|
+
# - "color": Adjusts the color saturation of the image. Values > 1 increase saturation, while values < 1 decrease saturation.
|
2103
|
+
# - "rotate": Rotates the image by the specified angle.
|
2104
|
+
# - "crop" or "cut": Crops the image. The value should be a tuple specifying the crop box as (left, upper, right, lower).
|
2105
|
+
# - "size": Resizes the image to the specified dimensions.
|
2106
|
+
# - "thumbnail": Resizes the image to fit within the given size while preserving aspect ratio.
|
2107
|
+
# - "cover": Resizes and crops the image to fill the specified size.
|
2108
|
+
# - "contain": Resizes the image to fit within the specified size, adding borders if necessary.
|
2109
|
+
# - "fit": Resizes and pads the image to fit within the specified size.
|
2110
|
+
# - "filter": Applies various filters to the image (e.g., BLUR, CONTOUR, EDGE_ENHANCE).
|
2111
|
+
|
2112
|
+
# Note:
|
2113
|
+
# The "color" and "enhance" enhancements are not implemented in this function.
|
2114
|
+
# """
|
2115
|
+
# supported_filters = [
|
2116
|
+
# "BLUR",
|
2117
|
+
# "CONTOUR",
|
2118
|
+
# "DETAIL",
|
2119
|
+
# "EDGE_ENHANCE",
|
2120
|
+
# "EDGE_ENHANCE_MORE",
|
2121
|
+
# "EMBOSS",
|
2122
|
+
# "FIND_EDGES",
|
2123
|
+
# "SHARPEN",
|
2124
|
+
# "SMOOTH",
|
2125
|
+
# "SMOOTH_MORE",
|
2126
|
+
# "MIN_FILTER",
|
2127
|
+
# "MAX_FILTER",
|
2128
|
+
# "MODE_FILTER",
|
2129
|
+
# "MULTIBAND_FILTER",
|
2130
|
+
# "GAUSSIAN_BLUR",
|
2131
|
+
# "BOX_BLUR",
|
2132
|
+
# "MEDIAN_FILTER",
|
2133
|
+
# ]
|
2134
|
+
# print("sets: a dict,'sharp:1.2','color','contrast:'auto' or 1.2','bright', 'crop: x_upperleft,y_upperleft, x_lowerright, y_lowerright','rotation','resize','rem or background'")
|
2135
|
+
# print(f"usage: filter_kws 'dict' below:")
|
2136
|
+
# pp([str(i).lower() for i in supported_filters])
|
2137
|
+
# print("\nlog:\n")
|
2138
|
+
# def confirm_rembg_models(model_name):
|
2139
|
+
# models_support = [
|
2140
|
+
# "u2net",
|
2141
|
+
# "u2netp",
|
2142
|
+
# "u2net_human_seg",
|
2143
|
+
# "u2net_cloth_seg",
|
2144
|
+
# "silueta",
|
2145
|
+
# "isnet-general-use",
|
2146
|
+
# "isnet-anime",
|
2147
|
+
# "sam",
|
2148
|
+
# ]
|
2149
|
+
# if model_name in models_support:
|
2150
|
+
# print(f"model_name: {model_name}")
|
2151
|
+
# return model_name
|
2152
|
+
# else:
|
2153
|
+
# print(f"{model_name} cannot be found, check the name:{models_support}, default('isnet-general-use') has been used")
|
2154
|
+
# return "isnet-general-use"
|
2155
|
+
# def auto_enhance(img):
|
2156
|
+
# """
|
2157
|
+
# Automatically enhances the image based on its characteristics.
|
2158
|
+
# Args:
|
2159
|
+
# img (PIL.Image): The input image.
|
2160
|
+
# Returns:
|
2161
|
+
# dict: A dictionary containing the optimal enhancement values.
|
2162
|
+
# """
|
2163
|
+
# # Determine the bit depth based on the image mode
|
2164
|
+
# if img.mode in ["1", "L", "P", "RGB", "YCbCr", "LAB", "HSV"]:
|
2165
|
+
# # 8-bit depth per channel
|
2166
|
+
# bit_depth = 8
|
2167
|
+
# elif img.mode in ["RGBA", "CMYK"]:
|
2168
|
+
# # 8-bit depth per channel + alpha (RGBA) or additional channels (CMYK)
|
2169
|
+
# bit_depth = 8
|
2170
|
+
# elif img.mode in ["I", "F"]:
|
2171
|
+
# # 16-bit depth per channel (integer or floating-point)
|
2172
|
+
# bit_depth = 16
|
2173
|
+
# else:
|
2174
|
+
# raise ValueError("Unsupported image mode")
|
2175
|
+
# # Calculate the brightness and contrast for each channel
|
2176
|
+
# num_channels = len(img.getbands())
|
2177
|
+
# brightness_factors = []
|
2178
|
+
# contrast_factors = []
|
2179
|
+
# for channel in range(num_channels):
|
2180
|
+
# channel_histogram = img.split()[channel].histogram()
|
2181
|
+
# brightness = sum(i * w for i, w in enumerate(channel_histogram))/sum(channel_histogram)
|
2182
|
+
# channel_min, channel_max = img.split()[channel].getextrema()
|
2183
|
+
# contrast = channel_max - channel_min
|
2184
|
+
# # Adjust calculations based on bit depth
|
2185
|
+
# normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
|
2186
|
+
# brightness_factor = (1.0 + (brightness - normalization_factor / 2) / normalization_factor)
|
2187
|
+
# contrast_factor = (1.0 + (contrast - normalization_factor / 2) / normalization_factor)
|
2188
|
+
# brightness_factors.append(brightness_factor)
|
2189
|
+
# contrast_factors.append(contrast_factor)
|
2190
|
+
# # Calculate the average brightness and contrast factors across channels
|
2191
|
+
# avg_brightness_factor = sum(brightness_factors) / num_channels
|
2192
|
+
# avg_contrast_factor = sum(contrast_factors) / num_channels
|
2193
|
+
# return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
|
2194
|
+
# # Load image if input is a file path
|
2195
|
+
# if isinstance(img, str):
|
2196
|
+
# img = load_img(img)
|
2197
|
+
# img_update = img.copy()
|
2198
|
+
# # Auto-enhance image if requested
|
2199
|
+
# if auto:
|
2200
|
+
# auto_params = auto_enhance(img_update)
|
2201
|
+
# sets.update(auto_params)
|
2202
|
+
# if sets is None:
|
2203
|
+
# sets = {}
|
2204
|
+
# for k, value in sets.items():
|
2205
|
+
# if "shar" in k.lower():
|
2206
|
+
# enhancer = ImageEnhance.Sharpness(img_update)
|
2207
|
+
# img_update = enhancer.enhance(value)
|
2208
|
+
# elif "col" in k.lower() and 'bg' not in k.lower():
|
2209
|
+
# enhancer = ImageEnhance.Color(img_update)
|
2210
|
+
# img_update = enhancer.enhance(value)
|
2211
|
+
# elif "contr" in k.lower():
|
2212
|
+
# if value and isinstance(value,(float,int)):
|
2213
|
+
# enhancer = ImageEnhance.Contrast(img_update)
|
2214
|
+
# img_update = enhancer.enhance(value)
|
2215
|
+
# else:
|
2216
|
+
# print('autocontrasted')
|
2217
|
+
# img_update = ImageOps.autocontrast(img_update)
|
2218
|
+
# elif "bri" in k.lower():
|
2219
|
+
# enhancer = ImageEnhance.Brightness(img_update)
|
2220
|
+
# img_update = enhancer.enhance(value)
|
2221
|
+
# elif "cro" in k.lower() or "cut" in k.lower():
|
2222
|
+
# img_update=img_update.crop(value)
|
2223
|
+
# elif "rota" in k.lower():
|
2224
|
+
# img_update = img_update.rotate(value)
|
2225
|
+
# elif "si" in k.lower():
|
2226
|
+
# img_update = img_update.resize(value)
|
2227
|
+
# elif "thum" in k.lower():
|
2228
|
+
# img_update.thumbnail(value)
|
2229
|
+
# elif "cover" in k.lower():
|
2230
|
+
# img_update = ImageOps.cover(img_update, size=value)
|
2231
|
+
# elif "contain" in k.lower():
|
2232
|
+
# img_update = ImageOps.contain(img_update, size=value)
|
2233
|
+
# elif "fit" in k.lower():
|
2234
|
+
# img_update = ImageOps.fit(img_update, size=value)
|
2235
|
+
# elif "pad" in k.lower():
|
2236
|
+
# img_update = ImageOps.pad(img_update, size=value)
|
2237
|
+
# elif 'rem' in k.lower() or 'rm' in k.lower() or 'back' in k.lower():
|
2238
|
+
# if value and isinstance(value,(int,float,list)):
|
2239
|
+
# print('example usage: {"rm":[alpha_matting_background_threshold(20),alpha_matting_foreground_threshold(270),alpha_matting_erode_sive(11)]}')
|
2240
|
+
# print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
|
2241
|
+
# # ### Parameters:
|
2242
|
+
# # data (Union[bytes, PILImage, np.ndarray]): The input image data.
|
2243
|
+
# # alpha_matting (bool, optional): Flag indicating whether to use alpha matting. Defaults to False.
|
2244
|
+
# # alpha_matting_foreground_threshold (int, optional): Foreground threshold for alpha matting. Defaults to 240.
|
2245
|
+
# # alpha_matting_background_threshold (int, optional): Background threshold for alpha matting. Defaults to 10.
|
2246
|
+
# # alpha_matting_erode_size (int, optional): Erosion size for alpha matting. Defaults to 10.
|
2247
|
+
# # session (Optional[BaseSession], optional): A session object for the 'u2net' model. Defaults to None.
|
2248
|
+
# # only_mask (bool, optional): Flag indicating whether to return only the binary masks. Defaults to False.
|
2249
|
+
# # post_process_mask (bool, optional): Flag indicating whether to post-process the masks. Defaults to False.
|
2250
|
+
# # bgcolor (Optional[Tuple[int, int, int, int]], optional): Background color for the cutout image. Defaults to None.
|
2251
|
+
# # ###
|
2252
|
+
# if isinstance(value,int):
|
2253
|
+
# value=[value]
|
2254
|
+
# if len(value) <2:
|
2255
|
+
# img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value)
|
2256
|
+
# elif 2<=len(value)<3:
|
2257
|
+
# img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1])
|
2258
|
+
# elif 3<=len(value)<4:
|
2259
|
+
# img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1],alpha_matting_erode_size=value[2])
|
2260
|
+
# if isinstance(value,tuple): # replace the background color
|
2261
|
+
# if len(value)==3:
|
2262
|
+
# value+=(255,)
|
2263
|
+
# img_update = remove(img_update, bgcolor=value)
|
2264
|
+
# if isinstance(value,str):
|
2265
|
+
# if confirm_rembg_models(value):
|
2266
|
+
# img_update=remove(img_update,session=new_session(value))
|
2267
|
+
# else:
|
2268
|
+
# img_update=remove(img_update)
|
2269
|
+
# elif 'bgcolor' in k.lower():
|
2270
|
+
# if isinstance(value,list):
|
2271
|
+
# value=tuple(value)
|
2272
|
+
# if isinstance(value,tuple): # replace the background color
|
2273
|
+
# if len(value)==3:
|
2274
|
+
# value+=(255,)
|
2275
|
+
# img_update = remove(img_update, bgcolor=value)
|
2276
|
+
# if filter_kws:
|
2277
|
+
# for filter_name, filter_value in filter_kws.items():
|
2278
|
+
# img_update = apply_filter(img_update, filter_name, filter_value)
|
2279
|
+
# # Display the image if requested
|
2280
|
+
# if show:
|
2281
|
+
# if figsize is None:
|
2282
|
+
# plt.figure(dpi=dpi)
|
2283
|
+
# else:
|
2284
|
+
# plt.figure(figsize=figsize, dpi=dpi)
|
2285
|
+
# plt.imshow(img_update)
|
2286
|
+
# plt.axis("on") if show_axis else plt.axis("off")
|
2287
|
+
# return img_update
|
2288
|
+
|
2289
|
+
|
2290
|
+
def imgsets(img, **kwargs):
|
1861
2291
|
"""
|
1862
2292
|
Apply various enhancements and filters to an image using PIL's ImageEnhance and ImageFilter modules.
|
1863
2293
|
|
@@ -1892,28 +2322,31 @@ def imgsets(
|
|
1892
2322
|
The "color" and "enhance" enhancements are not implemented in this function.
|
1893
2323
|
"""
|
1894
2324
|
supported_filters = [
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1901
|
-
|
1902
|
-
|
1903
|
-
|
1904
|
-
|
1905
|
-
|
1906
|
-
|
1907
|
-
|
1908
|
-
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1912
|
-
|
1913
|
-
print(
|
2325
|
+
"BLUR",
|
2326
|
+
"CONTOUR",
|
2327
|
+
"DETAIL",
|
2328
|
+
"EDGE_ENHANCE",
|
2329
|
+
"EDGE_ENHANCE_MORE",
|
2330
|
+
"EMBOSS",
|
2331
|
+
"FIND_EDGES",
|
2332
|
+
"SHARPEN",
|
2333
|
+
"SMOOTH",
|
2334
|
+
"SMOOTH_MORE",
|
2335
|
+
"MIN_FILTER",
|
2336
|
+
"MAX_FILTER",
|
2337
|
+
"MODE_FILTER",
|
2338
|
+
"MULTIBAND_FILTER",
|
2339
|
+
"GAUSSIAN_BLUR",
|
2340
|
+
"BOX_BLUR",
|
2341
|
+
"MEDIAN_FILTER",
|
2342
|
+
]
|
2343
|
+
print(
|
2344
|
+
"sets: a dict,'sharp:1.2','color','contrast:'auto' or 1.2','bright', 'crop: x_upperleft,y_upperleft, x_lowerright, y_lowerright','rotation','resize','rem or background'"
|
2345
|
+
)
|
1914
2346
|
print(f"usage: filter_kws 'dict' below:")
|
1915
2347
|
pp([str(i).lower() for i in supported_filters])
|
1916
2348
|
print("\nlog:\n")
|
2349
|
+
|
1917
2350
|
def confirm_rembg_models(model_name):
|
1918
2351
|
models_support = [
|
1919
2352
|
"u2net",
|
@@ -1929,8 +2362,11 @@ def imgsets(
|
|
1929
2362
|
print(f"model_name: {model_name}")
|
1930
2363
|
return model_name
|
1931
2364
|
else:
|
1932
|
-
print(
|
2365
|
+
print(
|
2366
|
+
f"{model_name} cannot be found, check the name:{models_support}, default('isnet-general-use') has been used"
|
2367
|
+
)
|
1933
2368
|
return "isnet-general-use"
|
2369
|
+
|
1934
2370
|
def auto_enhance(img):
|
1935
2371
|
"""
|
1936
2372
|
Automatically enhances the image based on its characteristics.
|
@@ -1957,48 +2393,61 @@ def imgsets(
|
|
1957
2393
|
contrast_factors = []
|
1958
2394
|
for channel in range(num_channels):
|
1959
2395
|
channel_histogram = img.split()[channel].histogram()
|
1960
|
-
brightness = sum(i * w for i, w in enumerate(channel_histogram))/sum(
|
2396
|
+
brightness = sum(i * w for i, w in enumerate(channel_histogram)) / sum(
|
2397
|
+
channel_histogram
|
2398
|
+
)
|
1961
2399
|
channel_min, channel_max = img.split()[channel].getextrema()
|
1962
2400
|
contrast = channel_max - channel_min
|
1963
2401
|
# Adjust calculations based on bit depth
|
1964
2402
|
normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
|
1965
|
-
brightness_factor = (
|
1966
|
-
|
2403
|
+
brightness_factor = (
|
2404
|
+
1.0 + (brightness - normalization_factor / 2) / normalization_factor
|
2405
|
+
)
|
2406
|
+
contrast_factor = (
|
2407
|
+
1.0 + (contrast - normalization_factor / 2) / normalization_factor
|
2408
|
+
)
|
1967
2409
|
brightness_factors.append(brightness_factor)
|
1968
2410
|
contrast_factors.append(contrast_factor)
|
1969
2411
|
# Calculate the average brightness and contrast factors across channels
|
1970
2412
|
avg_brightness_factor = sum(brightness_factors) / num_channels
|
1971
2413
|
avg_contrast_factor = sum(contrast_factors) / num_channels
|
1972
2414
|
return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
|
2415
|
+
|
1973
2416
|
# Load image if input is a file path
|
1974
2417
|
if isinstance(img, str):
|
1975
2418
|
img = load_img(img)
|
1976
2419
|
img_update = img.copy()
|
1977
2420
|
# Auto-enhance image if requested
|
2421
|
+
|
2422
|
+
auto = kwargs.get("auto", False)
|
2423
|
+
show = kwargs.get("show", True)
|
2424
|
+
show_axis = kwargs.get("show_axis", False)
|
2425
|
+
size = kwargs.get("size", None)
|
2426
|
+
figsize = kwargs.get("figsize", None)
|
2427
|
+
dpi = kwargs.get("dpi", 100)
|
2428
|
+
|
1978
2429
|
if auto:
|
1979
|
-
|
1980
|
-
|
1981
|
-
|
1982
|
-
sets = {}
|
1983
|
-
for k, value in sets.items():
|
2430
|
+
kwargs = {**auto_enhance(img_update), **kwargs}
|
2431
|
+
|
2432
|
+
for k, value in kwargs.items():
|
1984
2433
|
if "shar" in k.lower():
|
1985
2434
|
enhancer = ImageEnhance.Sharpness(img_update)
|
1986
2435
|
img_update = enhancer.enhance(value)
|
1987
|
-
elif "col" in k.lower() and
|
2436
|
+
elif "col" in k.lower() and "bg" not in k.lower():
|
1988
2437
|
enhancer = ImageEnhance.Color(img_update)
|
1989
2438
|
img_update = enhancer.enhance(value)
|
1990
2439
|
elif "contr" in k.lower():
|
1991
|
-
if value and isinstance(value,(float,int)):
|
2440
|
+
if value and isinstance(value, (float, int)):
|
1992
2441
|
enhancer = ImageEnhance.Contrast(img_update)
|
1993
2442
|
img_update = enhancer.enhance(value)
|
1994
2443
|
else:
|
1995
|
-
print(
|
2444
|
+
print("autocontrasted")
|
1996
2445
|
img_update = ImageOps.autocontrast(img_update)
|
1997
2446
|
elif "bri" in k.lower():
|
1998
2447
|
enhancer = ImageEnhance.Brightness(img_update)
|
1999
2448
|
img_update = enhancer.enhance(value)
|
2000
2449
|
elif "cro" in k.lower() or "cut" in k.lower():
|
2001
|
-
img_update=img_update.crop(value)
|
2450
|
+
img_update = img_update.crop(value)
|
2002
2451
|
elif "rota" in k.lower():
|
2003
2452
|
img_update = img_update.rotate(value)
|
2004
2453
|
elif "si" in k.lower():
|
@@ -2010,12 +2459,22 @@ def imgsets(
|
|
2010
2459
|
elif "contain" in k.lower():
|
2011
2460
|
img_update = ImageOps.contain(img_update, size=value)
|
2012
2461
|
elif "fit" in k.lower():
|
2013
|
-
|
2462
|
+
if isinstance(value, dict):
|
2463
|
+
if filter_kws:
|
2464
|
+
for filter_name, filter_value in filter_kws.items():
|
2465
|
+
img_update = apply_filter(img_update, filter_name, filter_value)
|
2466
|
+
else:
|
2467
|
+
img_update = ImageOps.fit(img_update, size=value)
|
2014
2468
|
elif "pad" in k.lower():
|
2015
2469
|
img_update = ImageOps.pad(img_update, size=value)
|
2016
|
-
elif
|
2017
|
-
if
|
2018
|
-
|
2470
|
+
elif "rem" in k.lower() or "rm" in k.lower() or "back" in k.lower():
|
2471
|
+
if isinstance(value, bool):
|
2472
|
+
session = new_session("isnet-general-use")
|
2473
|
+
img_update = remove(img_update, session=session)
|
2474
|
+
elif value and isinstance(value, (int, float, list)):
|
2475
|
+
print(
|
2476
|
+
'example usage: {"rm":[alpha_matting_background_threshold(20),alpha_matting_foreground_threshold(270),alpha_matting_erode_sive(11)]}'
|
2477
|
+
)
|
2019
2478
|
print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
|
2020
2479
|
# ### Parameters:
|
2021
2480
|
# data (Union[bytes, PILImage, np.ndarray]): The input image data.
|
@@ -2028,33 +2487,45 @@ def imgsets(
|
|
2028
2487
|
# post_process_mask (bool, optional): Flag indicating whether to post-process the masks. Defaults to False.
|
2029
2488
|
# bgcolor (Optional[Tuple[int, int, int, int]], optional): Background color for the cutout image. Defaults to None.
|
2030
2489
|
# ###
|
2031
|
-
if isinstance(value,int):
|
2032
|
-
value=[value]
|
2033
|
-
if len(value) <2:
|
2034
|
-
img_update = remove(
|
2035
|
-
|
2036
|
-
|
2037
|
-
|
2038
|
-
|
2039
|
-
|
2040
|
-
|
2041
|
-
|
2490
|
+
if isinstance(value, int):
|
2491
|
+
value = [value]
|
2492
|
+
if len(value) < 2:
|
2493
|
+
img_update = remove(
|
2494
|
+
img_update,
|
2495
|
+
alpha_matting=True,
|
2496
|
+
alpha_matting_background_threshold=value,
|
2497
|
+
)
|
2498
|
+
elif 2 <= len(value) < 3:
|
2499
|
+
img_update = remove(
|
2500
|
+
img_update,
|
2501
|
+
alpha_matting=True,
|
2502
|
+
alpha_matting_background_threshold=value[0],
|
2503
|
+
alpha_matting_foreground_threshold=value[1],
|
2504
|
+
)
|
2505
|
+
elif 3 <= len(value) < 4:
|
2506
|
+
img_update = remove(
|
2507
|
+
img_update,
|
2508
|
+
alpha_matting=True,
|
2509
|
+
alpha_matting_background_threshold=value[0],
|
2510
|
+
alpha_matting_foreground_threshold=value[1],
|
2511
|
+
alpha_matting_erode_size=value[2],
|
2512
|
+
)
|
2513
|
+
elif isinstance(value, tuple): # replace the background color
|
2514
|
+
if len(value) == 3:
|
2515
|
+
value += (255,)
|
2042
2516
|
img_update = remove(img_update, bgcolor=value)
|
2043
|
-
|
2517
|
+
elif isinstance(value, str):
|
2044
2518
|
if confirm_rembg_models(value):
|
2045
|
-
img_update=remove(img_update,session=new_session(value))
|
2519
|
+
img_update = remove(img_update, session=new_session(value))
|
2046
2520
|
else:
|
2047
|
-
img_update=remove(img_update)
|
2048
|
-
elif
|
2049
|
-
if isinstance(value,list):
|
2050
|
-
value=tuple(value)
|
2051
|
-
if isinstance(value,tuple):
|
2052
|
-
if len(value)==3:
|
2053
|
-
value+=(255,)
|
2521
|
+
img_update = remove(img_update)
|
2522
|
+
elif "bg" in k.lower() and "color" in k.lower():
|
2523
|
+
if isinstance(value, list):
|
2524
|
+
value = tuple(value)
|
2525
|
+
if isinstance(value, tuple): # replace the background color
|
2526
|
+
if len(value) == 3:
|
2527
|
+
value += (255,)
|
2054
2528
|
img_update = remove(img_update, bgcolor=value)
|
2055
|
-
if filter_kws:
|
2056
|
-
for filter_name, filter_value in filter_kws.items():
|
2057
|
-
img_update = apply_filter(img_update, filter_name, filter_value)
|
2058
2529
|
# Display the image if requested
|
2059
2530
|
if show:
|
2060
2531
|
if figsize is None:
|
@@ -2064,6 +2535,8 @@ def imgsets(
|
|
2064
2535
|
plt.imshow(img_update)
|
2065
2536
|
plt.axis("on") if show_axis else plt.axis("off")
|
2066
2537
|
return img_update
|
2538
|
+
|
2539
|
+
|
2067
2540
|
# # usage:
|
2068
2541
|
# img = imgsets(
|
2069
2542
|
# fpath,
|
@@ -2074,26 +2547,26 @@ def imgsets(
|
|
2074
2547
|
# )
|
2075
2548
|
|
2076
2549
|
|
2077
|
-
def thumbnail(dir_img_list,figsize=(10,10),dpi=100, dir_save=None, kind=
|
2550
|
+
def thumbnail(dir_img_list, figsize=(10, 10), dpi=100, dir_save=None, kind=".png"):
|
2078
2551
|
"""
|
2079
2552
|
Display a thumbnail figure of all images in the specified directory.
|
2080
2553
|
Args:
|
2081
2554
|
dir_img_list (list): List of the Directory containing the images.
|
2082
2555
|
"""
|
2083
2556
|
num_images = len(dir_img_list)
|
2084
|
-
if not kind.startswith(
|
2085
|
-
kind=
|
2557
|
+
if not kind.startswith("."):
|
2558
|
+
kind = "." + kind
|
2086
2559
|
|
2087
2560
|
if num_images == 0:
|
2088
2561
|
print("No images found to display.")
|
2089
2562
|
return
|
2090
|
-
grid_size = int(num_images
|
2091
|
-
fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize,dpi=dpi)
|
2563
|
+
grid_size = int(num_images**0.5) + 1 # Determine grid size
|
2564
|
+
fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize, dpi=dpi)
|
2092
2565
|
for ax, image_file in zip(axs.flatten(), dir_img_list):
|
2093
2566
|
try:
|
2094
2567
|
img = Image.open(image_file)
|
2095
2568
|
ax.imshow(img)
|
2096
|
-
ax.axis(
|
2569
|
+
ax.axis("off")
|
2097
2570
|
except:
|
2098
2571
|
continue
|
2099
2572
|
# for ax in axs.flatten():
|
@@ -2104,13 +2577,15 @@ def thumbnail(dir_img_list,figsize=(10,10),dpi=100, dir_save=None, kind='.png'):
|
|
2104
2577
|
plt.show()
|
2105
2578
|
else:
|
2106
2579
|
if basename(dir_save):
|
2107
|
-
fname= basename(dir_save) +kind
|
2580
|
+
fname = basename(dir_save) + kind
|
2108
2581
|
else:
|
2109
|
-
fname= "_thumbnail_"+basename(dirname(dir_save)[:-1])+
|
2582
|
+
fname = "_thumbnail_" + basename(dirname(dir_save)[:-1]) + ".png"
|
2110
2583
|
if dirname(dir_img_list[0]) == dirname(dir_save):
|
2111
|
-
figsave(dirname(dir_save[:-1]),fname)
|
2584
|
+
figsave(dirname(dir_save[:-1]), fname)
|
2112
2585
|
else:
|
2113
|
-
figsave(dirname(dir_save),fname)
|
2586
|
+
figsave(dirname(dir_save), fname)
|
2587
|
+
|
2588
|
+
|
2114
2589
|
# usage:
|
2115
2590
|
# fpath = "/Users/macjianfeng/Dropbox/github/python/py2ls/tests/xample_netfinder/images/"
|
2116
2591
|
# thumbnail(listdir(fpath,'png').fpath.to_list(),dir_save=dirname(fpath))
|
@@ -2127,6 +2602,8 @@ def read_mplstyle(style_file):
|
|
2127
2602
|
for i, j in style_dict.items():
|
2128
2603
|
print(f"\n{i}::::{j}")
|
2129
2604
|
return style_dict
|
2605
|
+
|
2606
|
+
|
2130
2607
|
# #example usage:
|
2131
2608
|
# style_file = "/ std-colors.mplstyle"
|
2132
2609
|
# style_dict = read_mplstyle(style_file)
|
@@ -2150,8 +2627,10 @@ def dir_lib(lib_oi):
|
|
2150
2627
|
else:
|
2151
2628
|
print(f"Cannot find the {lib_oi} in site-packages directory.")
|
2152
2629
|
return dir_list
|
2630
|
+
|
2631
|
+
|
2153
2632
|
# example usage:
|
2154
|
-
# dir_lib("seaborn")
|
2633
|
+
# dir_lib("seaborn")
|
2155
2634
|
|
2156
2635
|
|
2157
2636
|
# set up the colorlist, give the number, or the colormap's name
|
@@ -2162,6 +2641,7 @@ def get_color(n=1, cmap="auto", by="start"):
|
|
2162
2641
|
colors = [cmap_(i) for i in range(cmap_.N)]
|
2163
2642
|
return [matplotlib.colors.rgb2hex(color) for color in colors]
|
2164
2643
|
# usage: clist = cmap2hex("viridis")
|
2644
|
+
|
2165
2645
|
# cycle times, total number is n (defaultn=10)
|
2166
2646
|
def cycle2list(colorlist, n=10):
|
2167
2647
|
cycler_ = cycler(tmp=colorlist)
|
@@ -2171,18 +2651,21 @@ def get_color(n=1, cmap="auto", by="start"):
|
|
2171
2651
|
if i > n:
|
2172
2652
|
break
|
2173
2653
|
return clist
|
2654
|
+
|
2174
2655
|
def hue2rgb(hex_colors):
|
2175
2656
|
def hex_to_rgb(hex_color):
|
2176
2657
|
"""Converts a hexadecimal color code to RGB values."""
|
2177
2658
|
if hex_colors.startswith("#"):
|
2178
2659
|
hex_color = hex_color.lstrip("#")
|
2179
2660
|
return tuple(int(hex_color[i : i + 2], 16) / 255.0 for i in (0, 2, 4))
|
2661
|
+
|
2180
2662
|
if isinstance(hex_colors, str):
|
2181
2663
|
return hex_to_rgb(hex_colors)
|
2182
2664
|
elif isinstance(hex_colors, (list)):
|
2183
2665
|
"""Converts a list of hexadecimal color codes to a list of RGB values."""
|
2184
2666
|
rgb_values = [hex_to_rgb(hex_color) for hex_color in hex_colors]
|
2185
2667
|
return rgb_values
|
2668
|
+
|
2186
2669
|
if "aut" in cmap:
|
2187
2670
|
colorlist = [
|
2188
2671
|
"#474747",
|
@@ -2195,7 +2678,7 @@ def get_color(n=1, cmap="auto", by="start"):
|
|
2195
2678
|
]
|
2196
2679
|
else:
|
2197
2680
|
colorlist = cmap2hex(cmap)
|
2198
|
-
if "st" in by.lower() or "be" in by.lower():
|
2681
|
+
if "st" in by.lower() or "be" in by.lower():
|
2199
2682
|
# cycle it
|
2200
2683
|
clist = cycle2list(colorlist, n=n)
|
2201
2684
|
if "l" in by.lower() or "p" in by.lower():
|
@@ -2208,6 +2691,7 @@ def get_color(n=1, cmap="auto", by="start"):
|
|
2208
2691
|
return clist # a color list
|
2209
2692
|
# example usage: clist = get_color(4,cmap="auto", by="start") # get_color(4, cmap="hot", by="linspace")
|
2210
2693
|
|
2694
|
+
|
2211
2695
|
"""
|
2212
2696
|
# n = 7
|
2213
2697
|
# clist = get_color(n, cmap="auto", how="linspace") # get_color(100)
|
@@ -2222,7 +2706,18 @@ def get_color(n=1, cmap="auto", by="start"):
|
|
2222
2706
|
|
2223
2707
|
|
2224
2708
|
class FileInfo:
|
2225
|
-
def __init__(
|
2709
|
+
def __init__(
|
2710
|
+
self,
|
2711
|
+
size,
|
2712
|
+
creation_time,
|
2713
|
+
ctime,
|
2714
|
+
mod_time,
|
2715
|
+
mtime,
|
2716
|
+
parent_dir,
|
2717
|
+
fname,
|
2718
|
+
kind,
|
2719
|
+
extra_info=None,
|
2720
|
+
):
|
2226
2721
|
self.size = size
|
2227
2722
|
self.creation_time = creation_time
|
2228
2723
|
self.ctime = ctime
|
@@ -2237,20 +2732,25 @@ class FileInfo:
|
|
2237
2732
|
print("to show the res: 'finfo(fpath).show()'")
|
2238
2733
|
|
2239
2734
|
def __repr__(self):
|
2240
|
-
return (
|
2241
|
-
|
2242
|
-
|
2735
|
+
return (
|
2736
|
+
f"FileInfo(size={self.size} MB, creation_time='{self.creation_time}', "
|
2737
|
+
f"ctime='{self.ctime}', mod_time='{self.mod_time}', mtime='{self.mtime}', "
|
2738
|
+
f"parent_dir='{self.parent_dir}', fname='{self.fname}', kind='{self.kind}')"
|
2739
|
+
)
|
2243
2740
|
|
2244
2741
|
def __str__(self):
|
2245
|
-
return (
|
2246
|
-
|
2247
|
-
|
2248
|
-
|
2249
|
-
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
|
2742
|
+
return (
|
2743
|
+
f"FileInfo:\n"
|
2744
|
+
f" Size: {self.size} MB\n"
|
2745
|
+
f" Creation Time: {self.creation_time}\n"
|
2746
|
+
f" CTime: {self.ctime}\n"
|
2747
|
+
f" Modification Time: {self.mod_time}\n"
|
2748
|
+
f" MTime: {self.mtime}\n"
|
2749
|
+
f" Parent Directory: {self.parent_dir}\n"
|
2750
|
+
f" File Name: {self.fname}\n"
|
2751
|
+
f" Kind: {self.kind}"
|
2752
|
+
)
|
2753
|
+
|
2254
2754
|
def show(self):
|
2255
2755
|
# Convert the object to a dictionary
|
2256
2756
|
return {
|
@@ -2262,12 +2762,27 @@ class FileInfo:
|
|
2262
2762
|
"parent_dir": self.parent_dir,
|
2263
2763
|
"fname": self.fname,
|
2264
2764
|
"kind": self.kind,
|
2265
|
-
**{
|
2765
|
+
**{
|
2766
|
+
key: getattr(self, key)
|
2767
|
+
for key in vars(self)
|
2768
|
+
if key
|
2769
|
+
not in [
|
2770
|
+
"size",
|
2771
|
+
"creation_time",
|
2772
|
+
"ctime",
|
2773
|
+
"mod_time",
|
2774
|
+
"mtime",
|
2775
|
+
"parent_dir",
|
2776
|
+
"fname",
|
2777
|
+
"kind",
|
2778
|
+
]
|
2779
|
+
},
|
2266
2780
|
}
|
2267
2781
|
|
2782
|
+
|
2268
2783
|
def finfo(fpath):
|
2269
2784
|
fname, fmt = os.path.splitext(fpath)
|
2270
|
-
dir_par = os.path.dirname(fpath) +
|
2785
|
+
dir_par = os.path.dirname(fpath) + "/"
|
2271
2786
|
data = {
|
2272
2787
|
"size": round(os.path.getsize(fpath) / 1024 / 1024, 3),
|
2273
2788
|
"creation_time": time.ctime(os.path.getctime(fpath)),
|
@@ -2276,12 +2791,12 @@ def finfo(fpath):
|
|
2276
2791
|
"mtime": time.ctime(os.path.getmtime(fpath)),
|
2277
2792
|
"parent_dir": dir_par,
|
2278
2793
|
"fname": fname.replace(dir_par, ""),
|
2279
|
-
"kind": fmt
|
2794
|
+
"kind": fmt,
|
2280
2795
|
}
|
2281
2796
|
extra_info = {}
|
2282
2797
|
if data["kind"] == ".pdf":
|
2283
2798
|
extra_info = pdfinfo_from_path(fpath)
|
2284
|
-
|
2799
|
+
|
2285
2800
|
return FileInfo(
|
2286
2801
|
size=data["size"],
|
2287
2802
|
creation_time=data["creation_time"],
|
@@ -2291,5 +2806,5 @@ def finfo(fpath):
|
|
2291
2806
|
parent_dir=data["parent_dir"],
|
2292
2807
|
fname=data["fname"],
|
2293
2808
|
kind=data["kind"],
|
2294
|
-
extra_info=extra_info
|
2295
|
-
)
|
2809
|
+
extra_info=extra_info,
|
2810
|
+
)
|