py2ls 0.1.8.0__py3-none-any.whl → 0.1.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py
CHANGED
@@ -4,23 +4,23 @@ import pandas as pd
|
|
4
4
|
import json
|
5
5
|
import matplotlib
|
6
6
|
import matplotlib.pyplot as plt
|
7
|
-
import matplotlib.ticker as tck
|
7
|
+
import matplotlib.ticker as tck
|
8
8
|
from cycler import cycler
|
9
9
|
from mpl_toolkits.mplot3d import Axes3D
|
10
10
|
import seaborn as sns
|
11
11
|
|
12
|
-
import sys, os,shutil,re, yaml,json,subprocess
|
12
|
+
import sys, os, shutil, re, yaml, json, subprocess
|
13
13
|
import importlib.util
|
14
14
|
import time
|
15
15
|
from dateutil import parser
|
16
16
|
from datetime import datetime
|
17
17
|
|
18
|
-
from PIL import Image,ImageEnhance, ImageOps,ImageFilter
|
19
|
-
from rembg import remove,new_session
|
18
|
+
from PIL import Image, ImageEnhance, ImageOps, ImageFilter
|
19
|
+
from rembg import remove, new_session
|
20
20
|
|
21
21
|
import docx
|
22
|
-
from fpdf import FPDF
|
23
|
-
from lxml import etree
|
22
|
+
from fpdf import FPDF
|
23
|
+
from lxml import etree
|
24
24
|
from docx import Document
|
25
25
|
from PyPDF2 import PdfReader
|
26
26
|
from pptx import Presentation
|
@@ -40,24 +40,27 @@ from tqdm import tqdm
|
|
40
40
|
import mimetypes
|
41
41
|
from pprint import pp
|
42
42
|
from collections import Counter
|
43
|
-
from fuzzywuzzy import fuzz,process
|
43
|
+
from fuzzywuzzy import fuzz, process
|
44
44
|
from langdetect import detect
|
45
45
|
from duckduckgo_search import DDGS
|
46
46
|
|
47
|
-
from
|
47
|
+
from . import netfinder
|
48
|
+
from .plot import get_color
|
48
49
|
|
49
50
|
try:
|
50
|
-
get_ipython().run_line_magic(
|
51
|
-
get_ipython().run_line_magic(
|
51
|
+
get_ipython().run_line_magic("load_ext", "autoreload")
|
52
|
+
get_ipython().run_line_magic("autoreload", "2")
|
52
53
|
except NameError:
|
53
54
|
pass
|
54
55
|
|
56
|
+
|
55
57
|
def is_package_installed(package_name):
|
56
58
|
"""Check if a package is installed."""
|
57
59
|
package_spec = importlib.util.find_spec(package_name)
|
58
60
|
return package_spec is not None
|
59
|
-
|
60
|
-
|
61
|
+
|
62
|
+
|
63
|
+
def upgrade(module="py2ls"):
|
61
64
|
# try:
|
62
65
|
# pkg_resources.get_distribution(module)
|
63
66
|
# except pkg_resources.DistributionNotFound:
|
@@ -68,37 +71,45 @@ def upgrade(module='py2ls'):
|
|
68
71
|
except subprocess.CalledProcessError as e:
|
69
72
|
print(f"An error occurred while installing {module}: {e}")
|
70
73
|
try:
|
71
|
-
subprocess.check_call(
|
74
|
+
subprocess.check_call(
|
75
|
+
[sys.executable, "-m", "pip", "install", "--upgrade", module]
|
76
|
+
)
|
72
77
|
except subprocess.CalledProcessError as e:
|
73
78
|
print(f"An error occurred while upgrading py2ls: {e}")
|
74
79
|
|
75
80
|
|
76
|
-
dir_save=
|
81
|
+
dir_save = "/Users/macjianfeng/Dropbox/Downloads/"
|
77
82
|
|
78
83
|
|
79
84
|
def get_version(pkg):
|
80
85
|
import importlib.metadata
|
86
|
+
|
81
87
|
def get_v(pkg_name):
|
82
88
|
try:
|
83
89
|
version = importlib.metadata.version(pkg_name)
|
84
90
|
print(f"version {pkg_name} == {version}")
|
85
91
|
except importlib.metadata.PackageNotFoundError:
|
86
92
|
print(f"Package '{pkg_name}' not found")
|
87
|
-
|
93
|
+
|
94
|
+
if isinstance(pkg, str):
|
88
95
|
get_v(pkg)
|
89
|
-
elif isinstance(pkg,list):
|
96
|
+
elif isinstance(pkg, list):
|
90
97
|
[get_v(pkg_) for pkg_ in pkg]
|
91
|
-
|
98
|
+
|
99
|
+
|
100
|
+
# usage:
|
92
101
|
# get_version(['pandas','numpy','py2ls'])
|
93
|
-
|
102
|
+
|
103
|
+
|
94
104
|
def rm_folder(folder_path, verbose=True):
|
95
105
|
try:
|
96
106
|
shutil.rmtree(folder_path)
|
97
107
|
if verbose:
|
98
|
-
print(f
|
108
|
+
print(f"Successfully deleted {folder_path}")
|
99
109
|
except Exception as e:
|
100
110
|
if verbose:
|
101
|
-
print(f
|
111
|
+
print(f"Failed to delete {folder_path}. Reason: {e}")
|
112
|
+
|
102
113
|
|
103
114
|
def fremove(path, verbose=True):
|
104
115
|
"""
|
@@ -111,20 +122,20 @@ def fremove(path, verbose=True):
|
|
111
122
|
if os.path.isdir(path):
|
112
123
|
shutil.rmtree(path)
|
113
124
|
if verbose:
|
114
|
-
print(f
|
125
|
+
print(f"Successfully deleted folder {path}")
|
115
126
|
elif os.path.isfile(path):
|
116
127
|
os.remove(path)
|
117
128
|
if verbose:
|
118
|
-
print(f
|
129
|
+
print(f"Successfully deleted file {path}")
|
119
130
|
else:
|
120
131
|
if verbose:
|
121
|
-
print(f
|
132
|
+
print(f"Path {path} does not exist")
|
122
133
|
except Exception as e:
|
123
134
|
if verbose:
|
124
|
-
print(f
|
135
|
+
print(f"Failed to delete {path}. Reason: {e}")
|
125
136
|
|
126
137
|
|
127
|
-
def get_cwd(verbose:bool = True):
|
138
|
+
def get_cwd(verbose: bool = True):
|
128
139
|
"""
|
129
140
|
get_cwd: to get the current working directory
|
130
141
|
Args:
|
@@ -138,26 +149,39 @@ def get_cwd(verbose:bool = True):
|
|
138
149
|
# This works in an interactive environment (like a Jupyter notebook)
|
139
150
|
script_dir = os.getcwd()
|
140
151
|
if verbose:
|
141
|
-
print("os.getcwd():", script_dir)
|
152
|
+
print("os.getcwd():", script_dir)
|
142
153
|
return script_dir
|
143
154
|
|
144
|
-
|
155
|
+
|
156
|
+
def search(
|
157
|
+
query,
|
158
|
+
limit=5,
|
159
|
+
kind="text",
|
160
|
+
output="df",
|
161
|
+
verbose=False,
|
162
|
+
download=True,
|
163
|
+
dir_save=dir_save,
|
164
|
+
):
|
145
165
|
from duckduckgo_search import DDGS
|
146
|
-
|
166
|
+
|
167
|
+
if "te" in kind.lower():
|
147
168
|
results = DDGS().text(query, max_results=limit)
|
148
|
-
res=pd.DataFrame(results)
|
149
|
-
res.rename(columns={"href":"links"},inplace=True)
|
169
|
+
res = pd.DataFrame(results)
|
170
|
+
res.rename(columns={"href": "links"}, inplace=True)
|
150
171
|
if verbose:
|
151
172
|
print(f'searching "{query}": got the results below\n{res}')
|
152
173
|
if download:
|
153
174
|
try:
|
154
|
-
netfinder.downloader(
|
175
|
+
netfinder.downloader(
|
176
|
+
url=res.links.tolist(), dir_save=dir_save, verbose=verbose
|
177
|
+
)
|
155
178
|
except:
|
156
179
|
if verbose:
|
157
180
|
print(f"failed link")
|
158
181
|
return res
|
159
182
|
|
160
|
-
|
183
|
+
|
184
|
+
def echo(*args, **kwargs):
|
161
185
|
"""
|
162
186
|
query, model="gpt", verbose=True, log=True, dir_save=dir_save
|
163
187
|
a ai chat tool
|
@@ -172,12 +196,12 @@ def echo(*args,**kwargs):
|
|
172
196
|
str: the answer from ai
|
173
197
|
"""
|
174
198
|
global dir_save
|
175
|
-
|
176
|
-
query=None
|
177
|
-
model=kwargs.get(
|
178
|
-
verbose=kwargs.get(
|
179
|
-
log=kwargs.get(
|
180
|
-
dir_save=kwargs.get(
|
199
|
+
|
200
|
+
query = None
|
201
|
+
model = kwargs.get("model", "gpt")
|
202
|
+
verbose = kwargs.get("verbose", True)
|
203
|
+
log = kwargs.get("log", True)
|
204
|
+
dir_save = kwargs.get("dir_save", dir_save)
|
181
205
|
for arg in args:
|
182
206
|
if isinstance(arg, str):
|
183
207
|
if os.path.isdir(arg):
|
@@ -191,15 +215,17 @@ def echo(*args,**kwargs):
|
|
191
215
|
elif isinstance(arg, dict):
|
192
216
|
verbose = arg.get("verbose", verbose)
|
193
217
|
log = arg.get("log", log)
|
218
|
+
|
194
219
|
def is_in_any(str_candi_short, str_full, ignore_case=True):
|
195
220
|
if isinstance(str_candi_short, str):
|
196
|
-
str_candi_short=[str_candi_short]
|
197
|
-
res_bool=[]
|
221
|
+
str_candi_short = [str_candi_short]
|
222
|
+
res_bool = []
|
198
223
|
if ignore_case:
|
199
|
-
[res_bool.append(i in str_full.lower())
|
224
|
+
[res_bool.append(i in str_full.lower()) for i in str_candi_short]
|
200
225
|
else:
|
201
|
-
[res_bool.append(i in str_full)
|
226
|
+
[res_bool.append(i in str_full) for i in str_candi_short]
|
202
227
|
return any(res_bool)
|
228
|
+
|
203
229
|
def valid_mod_name(str_fly):
|
204
230
|
if is_in_any(str_fly, "claude-3-haiku"):
|
205
231
|
return "claude-3-haiku"
|
@@ -210,49 +236,56 @@ def echo(*args,**kwargs):
|
|
210
236
|
elif is_in_any(str_fly, "mixtral-8x7b"):
|
211
237
|
return "mixtral-8x7b"
|
212
238
|
else:
|
213
|
-
print(
|
214
|
-
|
239
|
+
print(
|
240
|
+
f"not support your model{model}, supported models: 'claude','gpt(default)', 'llama','mixtral'"
|
241
|
+
)
|
242
|
+
return "gpt-3.5" # default model
|
243
|
+
|
215
244
|
model_valid = valid_mod_name(model)
|
216
|
-
res=DDGS().chat(query, model=model_valid)
|
245
|
+
res = DDGS().chat(query, model=model_valid)
|
217
246
|
if verbose:
|
218
247
|
pp(res)
|
219
248
|
if log:
|
220
|
-
dt_str=datetime.fromtimestamp(time.time()).strftime(
|
249
|
+
dt_str = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d_%H:%M:%S")
|
221
250
|
res_ = f"\n\n####Q:{query}\n\n#####Ans:{dt_str}\n\n>{res}\n"
|
222
251
|
if bool(os.path.basename(dir_save)):
|
223
252
|
fpath = dir_save
|
224
253
|
else:
|
225
254
|
os.makedirs(dir_save, exist_ok=True)
|
226
255
|
fpath = os.path.join(dir_save, f"log_ai.md")
|
227
|
-
fupdate(fpath=fpath,content=res_)
|
256
|
+
fupdate(fpath=fpath, content=res_)
|
228
257
|
print(f"log file:{fpath}")
|
229
258
|
return res
|
230
259
|
|
260
|
+
|
231
261
|
def chat(*args, **kwargs):
|
232
262
|
return echo(*args, **kwargs)
|
233
263
|
|
264
|
+
|
234
265
|
def ai(*args, **kwargs):
|
235
266
|
return echo(*args, **kwargs)
|
236
267
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
268
|
+
|
269
|
+
def detect_lang(text, output="lang", verbose=True):
|
270
|
+
dir_curr_script = os.path.dirname(os.path.abspath(__file__))
|
271
|
+
dir_lang_code = dir_curr_script + "/data/lang_code_iso639.json"
|
272
|
+
print(dir_curr_script, os.getcwd(), dir_lang_code)
|
273
|
+
lang_code_iso639 = fload(dir_lang_code)
|
274
|
+
l_lang, l_code = [], []
|
275
|
+
[[l_lang.append(v), l_code.append(k)] for v, k in lang_code_iso639.items()]
|
244
276
|
try:
|
245
277
|
if is_text(text):
|
246
|
-
code_detect=detect(text)
|
247
|
-
if
|
248
|
-
return l_code[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
278
|
+
code_detect = detect(text)
|
279
|
+
if "c" in output.lower(): # return code
|
280
|
+
return l_code[strcmp(code_detect, l_code, verbose=verbose)[1]]
|
249
281
|
else:
|
250
|
-
return l_lang[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
282
|
+
return l_lang[strcmp(code_detect, l_code, verbose=verbose)[1]]
|
251
283
|
else:
|
252
284
|
print(f"{text} is not supported")
|
253
|
-
return
|
285
|
+
return "no"
|
254
286
|
except:
|
255
|
-
return
|
287
|
+
return "no"
|
288
|
+
|
256
289
|
|
257
290
|
def is_text(s):
|
258
291
|
has_alpha = any(char.isalpha() for char in s)
|
@@ -260,7 +293,8 @@ def is_text(s):
|
|
260
293
|
# no_special = not re.search(r'[^A-Za-z0-9\s]', s)
|
261
294
|
return has_alpha and has_non_alpha
|
262
295
|
|
263
|
-
|
296
|
+
|
297
|
+
def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer="WR"):
|
264
298
|
"""
|
265
299
|
Compares a search term with a list of candidate strings and finds the best match based on similarity score.
|
266
300
|
|
@@ -273,21 +307,23 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR')
|
|
273
307
|
Returns:
|
274
308
|
tuple: A tuple containing the best match and its index in the candidates list.
|
275
309
|
"""
|
310
|
+
|
276
311
|
def to_lower(s, ignore_case=True):
|
277
|
-
#Converts a string or list of strings to lowercase if ignore_case is True.
|
312
|
+
# Converts a string or list of strings to lowercase if ignore_case is True.
|
278
313
|
if ignore_case:
|
279
314
|
if isinstance(s, str):
|
280
315
|
return s.lower()
|
281
316
|
elif isinstance(s, list):
|
282
317
|
return [elem.lower() for elem in s]
|
283
318
|
return s
|
284
|
-
|
319
|
+
|
320
|
+
str1_, str2_ = to_lower(search_term, ignore_case), to_lower(candidates, ignore_case)
|
285
321
|
if isinstance(str2_, list):
|
286
|
-
if
|
322
|
+
if "part" in scorer.lower():
|
287
323
|
similarity_scores = [fuzz.partial_ratio(str1_, word) for word in str2_]
|
288
|
-
elif
|
324
|
+
elif "W" in scorer.lower():
|
289
325
|
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
290
|
-
elif
|
326
|
+
elif "Ratio" in scorer.lower():
|
291
327
|
similarity_scores = [fuzz.Ratio(str1_, word) for word in str2_]
|
292
328
|
else:
|
293
329
|
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
@@ -295,11 +331,11 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR')
|
|
295
331
|
best_match_score = similarity_scores[best_match_index]
|
296
332
|
else:
|
297
333
|
best_match_index = 0
|
298
|
-
if
|
334
|
+
if "part" in scorer.lower():
|
299
335
|
best_match_score = fuzz.partial_ratio(str1_, str2_)
|
300
|
-
elif
|
336
|
+
elif "W" in scorer.lower():
|
301
337
|
best_match_score = fuzz.WRatio(str1_, str2_)
|
302
|
-
elif
|
338
|
+
elif "Ratio" in scorer.lower():
|
303
339
|
best_match_score = fuzz.Ratio(str1_, str2_)
|
304
340
|
else:
|
305
341
|
best_match_score = fuzz.WRatio(str1_, str2_)
|
@@ -309,11 +345,13 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR')
|
|
309
345
|
print(f"建议: {best_match}")
|
310
346
|
return candidates[best_match_index], best_match_index
|
311
347
|
|
348
|
+
|
312
349
|
# Example usaged
|
313
350
|
# str1 = "plos biology"
|
314
351
|
# str2 = ['PLoS Computational Biology', 'PLOS BIOLOGY']
|
315
352
|
# best_match, idx = strcmp(str1, str2, ignore_case=1)
|
316
353
|
|
354
|
+
|
317
355
|
def counter(list_, verbose=True):
|
318
356
|
c = Counter(list_)
|
319
357
|
# Print the name counts
|
@@ -321,14 +359,15 @@ def counter(list_, verbose=True):
|
|
321
359
|
if verbose:
|
322
360
|
print(f"{item}: {count}")
|
323
361
|
return c
|
362
|
+
|
363
|
+
|
324
364
|
# usage:
|
325
365
|
# print(f"Return an iterator over elements repeating each as many times as its count:\n{sorted(c.elements())}")
|
326
366
|
# print(f"Return a list of the n most common elements:\n{c.most_common()}")
|
327
367
|
# print(f"Compute the sum of the counts:\n{c.total()}")
|
328
368
|
|
329
369
|
|
330
|
-
|
331
|
-
def str2time(time_str, fmt='24'):
|
370
|
+
def str2time(time_str, fmt="24"):
|
332
371
|
"""
|
333
372
|
Convert a time string into the specified format.
|
334
373
|
Parameters:
|
@@ -342,42 +381,47 @@ def str2time(time_str, fmt='24'):
|
|
342
381
|
%p represents AM or PM.
|
343
382
|
- str: The converted time string.
|
344
383
|
"""
|
384
|
+
|
345
385
|
def time_len_corr(time_str):
|
346
|
-
time_str_=
|
347
|
-
|
348
|
-
|
386
|
+
time_str_ = (
|
387
|
+
ssplit(time_str, by=[":", " ", "digital_num"]) if ":" in time_str else None
|
388
|
+
)
|
389
|
+
time_str_split = []
|
390
|
+
[time_str_split.append(i) for i in time_str_ if is_num(i)]
|
349
391
|
if time_str_split:
|
350
|
-
if len(time_str_split)==2:
|
351
|
-
H,M=time_str_split
|
352
|
-
time_str_full=H+":"+M+":00"
|
353
|
-
elif len(time_str_split)==3:
|
354
|
-
H,M,S=time_str_split
|
355
|
-
time_str_full=H+":"+M+":"+S
|
392
|
+
if len(time_str_split) == 2:
|
393
|
+
H, M = time_str_split
|
394
|
+
time_str_full = H + ":" + M + ":00"
|
395
|
+
elif len(time_str_split) == 3:
|
396
|
+
H, M, S = time_str_split
|
397
|
+
time_str_full = H + ":" + M + ":" + S
|
356
398
|
else:
|
357
|
-
time_str_full=time_str_
|
358
|
-
if
|
359
|
-
time_str_full+=" AM"
|
360
|
-
elif "pm"in time_str.lower():
|
361
|
-
time_str_full +=" PM"
|
399
|
+
time_str_full = time_str_
|
400
|
+
if "am" in time_str.lower():
|
401
|
+
time_str_full += " AM"
|
402
|
+
elif "pm" in time_str.lower():
|
403
|
+
time_str_full += " PM"
|
362
404
|
return time_str_full
|
363
|
-
|
405
|
+
|
406
|
+
if "12" in fmt:
|
364
407
|
fmt = "%I:%M:%S %p"
|
365
|
-
elif
|
408
|
+
elif "24" in fmt:
|
366
409
|
fmt = "%H:%M:%S"
|
367
410
|
|
368
411
|
try:
|
369
412
|
# Try to parse the time string assuming it could be in 24-hour or 12-hour format
|
370
|
-
time_obj = datetime.strptime(time_len_corr(time_str),
|
413
|
+
time_obj = datetime.strptime(time_len_corr(time_str), "%H:%M:%S")
|
371
414
|
except ValueError:
|
372
415
|
try:
|
373
|
-
time_obj = datetime.strptime(time_len_corr(time_str),
|
416
|
+
time_obj = datetime.strptime(time_len_corr(time_str), "%I:%M:%S %p")
|
374
417
|
except ValueError as e:
|
375
418
|
raise ValueError(f"Unable to parse time string: {time_str}. Error: {e}")
|
376
|
-
|
419
|
+
|
377
420
|
# Format the time object to the desired output format
|
378
421
|
formatted_time = time_obj.strftime(fmt)
|
379
422
|
return formatted_time
|
380
423
|
|
424
|
+
|
381
425
|
# # Example usage:
|
382
426
|
# time_str1 = "14:30:45"
|
383
427
|
# time_str2 = "02:30:45 PM"
|
@@ -388,7 +432,8 @@ def str2time(time_str, fmt='24'):
|
|
388
432
|
# print(formatted_time1) # Output: 02:30:45 PM
|
389
433
|
# print(formatted_time2) # Output: 14:30:45
|
390
434
|
|
391
|
-
|
435
|
+
|
436
|
+
def str2date(date_str, fmt="%Y-%m-%d_%H:%M:%S"):
|
392
437
|
"""
|
393
438
|
Convert a date string into the specified format.
|
394
439
|
Parameters:
|
@@ -404,11 +449,14 @@ def str2date(date_str, fmt='%Y-%m-%d_%H:%M:%S'):
|
|
404
449
|
# Format the date object to the desired output format
|
405
450
|
formatted_date = date_obj.strftime(fmt)
|
406
451
|
return formatted_date
|
452
|
+
|
453
|
+
|
407
454
|
# str1=str2date(num2str(20240625),fmt="%a %d-%B-%Y")
|
408
455
|
# print(str1)
|
409
456
|
# str2=str2num(str2date(str1,fmt='%a %Y%m%d'))
|
410
457
|
# print(str2)
|
411
458
|
|
459
|
+
|
412
460
|
def str2num(s, *args):
|
413
461
|
delimiter = None
|
414
462
|
round_digits = None
|
@@ -425,11 +473,11 @@ def str2num(s, *args):
|
|
425
473
|
except ValueError:
|
426
474
|
try:
|
427
475
|
numerized = numerize(s)
|
428
|
-
num = int(numerized) if
|
476
|
+
num = int(numerized) if "." not in numerized else float(numerized)
|
429
477
|
except Exception as e:
|
430
478
|
# Attempt to handle multiple number segments
|
431
479
|
try:
|
432
|
-
number_segments = ssplit(s,by=
|
480
|
+
number_segments = ssplit(s, by="number_strings")
|
433
481
|
nums = []
|
434
482
|
for segment in number_segments:
|
435
483
|
try:
|
@@ -439,7 +487,9 @@ def str2num(s, *args):
|
|
439
487
|
if len(nums) == 1:
|
440
488
|
num = nums[0]
|
441
489
|
else:
|
442
|
-
raise ValueError(
|
490
|
+
raise ValueError(
|
491
|
+
"Multiple number segments found, cannot determine single numeric value"
|
492
|
+
)
|
443
493
|
except Exception as e:
|
444
494
|
raise ValueError(f"Cannot convert {s} to a number: {e}")
|
445
495
|
|
@@ -454,6 +504,8 @@ def str2num(s, *args):
|
|
454
504
|
return num_str
|
455
505
|
|
456
506
|
return num
|
507
|
+
|
508
|
+
|
457
509
|
# Examples
|
458
510
|
# print(str2num("123")) # Output: 123
|
459
511
|
# print(str2num("123.456", 2)) # Output: 123.46
|
@@ -495,13 +547,15 @@ def num2str(num, *args):
|
|
495
547
|
num_str = "{:,}".format(int(num_str_parts[0]))
|
496
548
|
|
497
549
|
return num_str
|
550
|
+
|
551
|
+
|
498
552
|
# Examples
|
499
553
|
# print(num2str(123),type(num2str(123))) # Output: "123"
|
500
554
|
# print(num2str(123.456, 2),type(num2str(123.456, 2))) # Output: "123.46"
|
501
555
|
# print(num2str(7000.125, 2),type(num2str(7000.125, 2))) # Output: "7000.13"
|
502
556
|
# print(num2str(12345.6789, ","),type(num2str(12345.6789, ","))) # Output: "12,345.6789"
|
503
557
|
# print(num2str(7000.00, ","),type(num2str(7000.00, ","))) # Output: "7,000.00"
|
504
|
-
def sreplace(*args
|
558
|
+
def sreplace(*args, **kwargs):
|
505
559
|
"""
|
506
560
|
sreplace(text, by=None, robust=True)
|
507
561
|
Replace specified substrings in the input text with provided replacements.
|
@@ -515,19 +569,19 @@ def sreplace(*args,**kwargs):
|
|
515
569
|
str: The text after replacements have been made.
|
516
570
|
"""
|
517
571
|
text = None
|
518
|
-
by = kwargs.get(
|
519
|
-
robust = kwargs.get(
|
520
|
-
|
572
|
+
by = kwargs.get("by", None)
|
573
|
+
robust = kwargs.get("robust", True)
|
574
|
+
|
521
575
|
for arg in args:
|
522
|
-
if isinstance(arg,str):
|
523
|
-
text=arg
|
524
|
-
elif isinstance(arg,dict):
|
525
|
-
by=arg
|
526
|
-
elif isinstance(arg,bool):
|
527
|
-
robust=arg
|
576
|
+
if isinstance(arg, str):
|
577
|
+
text = arg
|
578
|
+
elif isinstance(arg, dict):
|
579
|
+
by = arg
|
580
|
+
elif isinstance(arg, bool):
|
581
|
+
robust = arg
|
528
582
|
else:
|
529
583
|
Error(f"{type(arg)} is not supported")
|
530
|
-
|
584
|
+
|
531
585
|
# Default replacements for newline and tab characters
|
532
586
|
default_replacements = {
|
533
587
|
"\a": "",
|
@@ -558,47 +612,76 @@ def sreplace(*args,**kwargs):
|
|
558
612
|
for k, v in by.items():
|
559
613
|
text = text.replace(k, v)
|
560
614
|
return text
|
615
|
+
|
616
|
+
|
561
617
|
# usage:
|
562
618
|
# sreplace(text, by=dict(old_str='new_str'), robust=True)
|
563
619
|
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
620
|
+
|
621
|
+
def paper_size(paper_type_str="a4"):
|
622
|
+
df = pd.DataFrame(
|
623
|
+
{
|
624
|
+
"a0": [841, 1189],
|
625
|
+
"a1": [594, 841],
|
626
|
+
"a2": [420, 594],
|
627
|
+
"a3": [297, 420],
|
628
|
+
"a4": [210, 297],
|
629
|
+
"a5": [148, 210],
|
630
|
+
"a6": [105, 148],
|
631
|
+
"a7": [74, 105],
|
632
|
+
"b0": [1028, 1456],
|
633
|
+
"b1": [707, 1000],
|
634
|
+
"b2": [514, 728],
|
635
|
+
"b3": [364, 514],
|
636
|
+
"b4": [257, 364],
|
637
|
+
"b5": [182, 257],
|
638
|
+
"b6": [128, 182],
|
639
|
+
"letter": [215.9, 279.4],
|
640
|
+
"legal": [215.9, 355.6],
|
641
|
+
"business card": [85.6, 53.98],
|
642
|
+
"photo china passport": [33, 48],
|
643
|
+
"passport single": [125, 88],
|
644
|
+
"visa": [105, 74],
|
645
|
+
"sim": [25, 15],
|
646
|
+
}
|
647
|
+
)
|
569
648
|
for name in df.columns:
|
570
649
|
if paper_type_str in name.lower():
|
571
|
-
paper_type=name
|
650
|
+
paper_type = name
|
572
651
|
if not paper_type:
|
573
|
-
paper_type=
|
652
|
+
paper_type = "a4" # default
|
574
653
|
return df[paper_type].tolist()
|
575
654
|
|
655
|
+
|
576
656
|
def docx2pdf(dir_docx, dir_pdf=None):
|
577
657
|
if dir_pdf:
|
578
|
-
convert(dir_docx,dir_pdf)
|
658
|
+
convert(dir_docx, dir_pdf)
|
579
659
|
else:
|
580
660
|
convert(dir_docx)
|
581
661
|
|
582
|
-
|
662
|
+
|
663
|
+
def img2pdf(dir_img, kind="jpeg", page=None, dir_save=None, page_size="a4", dpi=300):
|
583
664
|
def mm_to_point(size):
|
584
|
-
return (image2pdf.mm_to_pt(size[0]),image2pdf.mm_to_pt(size[1]))
|
665
|
+
return (image2pdf.mm_to_pt(size[0]), image2pdf.mm_to_pt(size[1]))
|
666
|
+
|
585
667
|
def set_dpi(x):
|
586
|
-
dpix=dpiy=x
|
668
|
+
dpix = dpiy = x
|
587
669
|
return image2pdf.get_fixed_dpi_layout_fun((dpix, dpiy))
|
670
|
+
|
588
671
|
if not kind.startswith("."):
|
589
|
-
kind="."+kind
|
672
|
+
kind = "." + kind
|
590
673
|
if dir_save is None:
|
591
|
-
dir_save = dir_img.replace(kind,
|
592
|
-
imgs = []
|
674
|
+
dir_save = dir_img.replace(kind, ".pdf")
|
675
|
+
imgs = []
|
593
676
|
if os.path.isdir(dir_img):
|
594
677
|
if not dir_save.endswith(".pdf"):
|
595
|
-
dir_save+="#merged_img2pdf.pdf"
|
678
|
+
dir_save += "#merged_img2pdf.pdf"
|
596
679
|
if page is None:
|
597
|
-
select_range = listdir(dir_img,kind=kind).fpath
|
680
|
+
select_range = listdir(dir_img, kind=kind).fpath
|
598
681
|
else:
|
599
|
-
if not isinstance(page, (np.ndarray,list,range)):
|
600
|
-
page=[page]
|
601
|
-
select_range = listdir(dir_img,kind=kind)[
|
682
|
+
if not isinstance(page, (np.ndarray, list, range)):
|
683
|
+
page = [page]
|
684
|
+
select_range = listdir(dir_img, kind=kind)["fpath"][page]
|
602
685
|
for fname in select_range:
|
603
686
|
if not fname.endswith(kind):
|
604
687
|
continue
|
@@ -607,24 +690,27 @@ def img2pdf(dir_img, kind="jpeg",page=None, dir_save=None, page_size="a4", dpi=3
|
|
607
690
|
continue
|
608
691
|
imgs.append(path)
|
609
692
|
else:
|
610
|
-
imgs=[os.path.isdir(dir_img),dir_img]
|
693
|
+
imgs = [os.path.isdir(dir_img), dir_img]
|
611
694
|
|
612
695
|
if page_size:
|
613
|
-
if isinstance(page_size,str):
|
614
|
-
pdf_in_mm=mm_to_point(paper_size(page_size))
|
696
|
+
if isinstance(page_size, str):
|
697
|
+
pdf_in_mm = mm_to_point(paper_size(page_size))
|
615
698
|
else:
|
616
699
|
print("default: page_size = (210,297)")
|
617
|
-
pdf_in_mm=mm_to_point(page_size)
|
700
|
+
pdf_in_mm = mm_to_point(page_size)
|
618
701
|
print(f"page size was set to {page_size}")
|
619
|
-
p_size= image2pdf.get_layout_fun(pdf_in_mm)
|
702
|
+
p_size = image2pdf.get_layout_fun(pdf_in_mm)
|
620
703
|
else:
|
621
704
|
p_size = set_dpi(dpi)
|
622
|
-
with open(dir_save,"wb") as f:
|
705
|
+
with open(dir_save, "wb") as f:
|
623
706
|
f.write(image2pdf.convert(imgs, layout_fun=p_size))
|
707
|
+
|
708
|
+
|
624
709
|
# usage:
|
625
710
|
# dir_img="/Users/macjianfeng/Dropbox/00-Personal/2015-History/2012-2015_兰州大学/120901-大学课件/生物统计学 陆卫/复习题/"
|
626
711
|
# img2pdf(dir_img,kind='tif', page=range(3,7,2))
|
627
712
|
|
713
|
+
|
628
714
|
def pdf2ppt(dir_pdf, dir_ppt):
|
629
715
|
prs = Presentation()
|
630
716
|
|
@@ -639,21 +725,26 @@ def pdf2ppt(dir_pdf, dir_ppt):
|
|
639
725
|
text = page.extract_text()
|
640
726
|
|
641
727
|
# Add a slide for each page's content
|
642
|
-
slide_layout = prs.slide_layouts[
|
728
|
+
slide_layout = prs.slide_layouts[
|
729
|
+
5
|
730
|
+
] # Use slide layout that suits your needs
|
643
731
|
slide = prs.slides.add_slide(slide_layout)
|
644
732
|
slide.shapes.title.text = f"Page {page_num + 1}"
|
645
|
-
slide.shapes.add_textbox(
|
733
|
+
slide.shapes.add_textbox(
|
734
|
+
Inches(1), Inches(1.5), Inches(8), Inches(5)
|
735
|
+
).text = text
|
646
736
|
|
647
737
|
# Save the PowerPoint presentation
|
648
738
|
prs.save(dir_ppt)
|
649
739
|
print(f"Conversion from {dir_pdf} to {dir_ppt} complete.")
|
650
740
|
|
651
741
|
|
652
|
-
def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
742
|
+
def ssplit(text, by="space", verbose=False, strict=False, **kws):
|
653
743
|
if isinstance(text, list):
|
654
|
-
nested_list= [ssplit(i,by=by,verbose=verbose
|
744
|
+
nested_list = [ssplit(i, by=by, verbose=verbose, **kws) for i in text]
|
655
745
|
flat_list = [item for sublist in nested_list for item in sublist]
|
656
746
|
return flat_list
|
747
|
+
|
657
748
|
def split_by_word_length(text, length):
|
658
749
|
return [word for word in text.split() if len(word) == length]
|
659
750
|
|
@@ -677,10 +768,10 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
677
768
|
return split_text
|
678
769
|
|
679
770
|
def split_by_regex_lookahead(text, pattern):
|
680
|
-
return re.split(f
|
681
|
-
|
771
|
+
return re.split(f"(?<={pattern})", text)
|
772
|
+
|
682
773
|
def split_by_regex_end(text, pattern):
|
683
|
-
return re.split(f
|
774
|
+
return re.split(f"(?={pattern})", text)
|
684
775
|
|
685
776
|
# def split_by_sentence_endings(text):
|
686
777
|
# return re.split(r"(?<=[.!?])", text)
|
@@ -688,24 +779,27 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
688
779
|
# return re.split(r"([^\x00-\x7F\w\s,.!?:\"'()\-]+)", text)
|
689
780
|
# return re.split(r"[^\x00-\x7F]+", text)
|
690
781
|
return re.split(r"([^\x00-\x7F]+)", text)
|
782
|
+
|
691
783
|
def split_by_consecutive_non_alphanumeric(text):
|
692
784
|
return re.split(r"\W+", text)
|
693
785
|
|
694
786
|
def split_by_fixed_length_chunks(text, length):
|
695
787
|
return [text[i : i + length] for i in range(0, len(text), length)]
|
696
|
-
|
788
|
+
|
789
|
+
def split_by_sent_num(text, n=10):
|
697
790
|
# split text into sentences
|
698
|
-
text_split_by_sent=sent_tokenize(text)
|
699
|
-
cut_loc_array=np.arange(0,len(text_split_by_sent),n)
|
700
|
-
if cut_loc_array[-1]!=len(text_split_by_sent):
|
701
|
-
cut_loc=np.append(cut_loc_array,len(text_split_by_sent))
|
791
|
+
text_split_by_sent = sent_tokenize(text)
|
792
|
+
cut_loc_array = np.arange(0, len(text_split_by_sent), n)
|
793
|
+
if cut_loc_array[-1] != len(text_split_by_sent):
|
794
|
+
cut_loc = np.append(cut_loc_array, len(text_split_by_sent))
|
702
795
|
else:
|
703
796
|
cut_loc = cut_loc_array
|
704
797
|
# get text in section (e.g., every 10 sentences)
|
705
|
-
text_section=[]
|
706
|
-
for i,j in pairwise(cut_loc):
|
798
|
+
text_section = []
|
799
|
+
for i, j in pairwise(cut_loc):
|
707
800
|
text_section.append(text_split_by_sent[i:j])
|
708
801
|
return text_section
|
802
|
+
|
709
803
|
def split_general(text, by, verbose=False, ignore_case=False):
|
710
804
|
if ignore_case:
|
711
805
|
if verbose:
|
@@ -717,8 +811,10 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
717
811
|
if verbose:
|
718
812
|
print(f"used {by} to split, ignore_case=False")
|
719
813
|
return text.split(by)
|
814
|
+
|
720
815
|
def reg_split(text, pattern):
|
721
816
|
return re.split(pattern, text)
|
817
|
+
|
722
818
|
if ("sp" in by or "white" in by) and not strict:
|
723
819
|
if verbose:
|
724
820
|
print(f"splited by space")
|
@@ -735,14 +831,20 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
735
831
|
if verbose:
|
736
832
|
print(f"split_by_multiple_delimiters: ['|','&']")
|
737
833
|
return split_by_multiple_delimiters(text, by)
|
738
|
-
elif
|
834
|
+
elif (
|
835
|
+
all([("digi" in by or "num" in by), not "sent" in by, not "str" in by])
|
836
|
+
and not strict
|
837
|
+
):
|
739
838
|
if verbose:
|
740
839
|
print(f"splited by digital (numbers)")
|
741
840
|
return re.split(r"(\d+)", text)
|
742
|
-
elif all([("digi" in by or "num" in by),
|
841
|
+
elif all([("digi" in by or "num" in by), "str" in by]) and not strict:
|
743
842
|
if verbose:
|
744
843
|
print(f"Splitting by (number strings)")
|
745
|
-
pattern = re.compile(
|
844
|
+
pattern = re.compile(
|
845
|
+
r"\b((?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?)(?:[-\s]?(?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?))*)\b",
|
846
|
+
re.IGNORECASE,
|
847
|
+
)
|
746
848
|
return re.split(pattern, text)
|
747
849
|
elif ("pun" in by) and not strict:
|
748
850
|
if verbose:
|
@@ -760,12 +862,12 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
760
862
|
if verbose:
|
761
863
|
print(f"splited by word")
|
762
864
|
return word_tokenize(text)
|
763
|
-
elif ("sen" in by and not
|
865
|
+
elif ("sen" in by and not "num" in by) and not strict:
|
764
866
|
if verbose:
|
765
867
|
print(f"splited by sentence")
|
766
868
|
return sent_tokenize(text)
|
767
|
-
elif (
|
768
|
-
return split_by_sent_num(text
|
869
|
+
elif ("sen" in by and "num" in by) and not strict:
|
870
|
+
return split_by_sent_num(text, **kws)
|
769
871
|
elif ("cha" in by) and not strict:
|
770
872
|
if verbose:
|
771
873
|
print(f"splited by chracters")
|
@@ -803,32 +905,32 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
|
|
803
905
|
if verbose:
|
804
906
|
print(f"splited by customed, re; => {by}")
|
805
907
|
return reg_split(text, **kws)
|
806
|
-
elif (
|
908
|
+
elif ("lang" in by or "eng" in by) and not strict:
|
807
909
|
return split_non_ascii(text)
|
808
910
|
else:
|
809
911
|
return split_general(text, by, verbose=verbose, **kws)
|
810
912
|
|
811
913
|
|
812
|
-
def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
|
914
|
+
def pdf2img(dir_pdf, dir_save=None, page=None, kind="png", verbose=True, **kws):
|
813
915
|
df_dir_img_single_page = pd.DataFrame()
|
814
916
|
dir_single_page = []
|
815
917
|
if verbose:
|
816
918
|
pp(pdfinfo_from_path(dir_pdf))
|
817
919
|
if isinstance(page, tuple) and page:
|
818
920
|
page = list(page)
|
819
|
-
if isinstance(page,int):
|
820
|
-
page=[page]
|
921
|
+
if isinstance(page, int):
|
922
|
+
page = [page]
|
821
923
|
if page is None:
|
822
924
|
page = [pdfinfo_from_path(dir_pdf)["Pages"]]
|
823
|
-
if len(page)==1 and page != pdfinfo_from_path(dir_pdf)["Pages"]:
|
824
|
-
page=[page[0], page[0]]
|
925
|
+
if len(page) == 1 and page != pdfinfo_from_path(dir_pdf)["Pages"]:
|
926
|
+
page = [page[0], page[0]]
|
825
927
|
else:
|
826
|
-
page=[1, page[0]]
|
928
|
+
page = [1, page[0]]
|
827
929
|
pages = convert_from_path(dir_pdf, first_page=page[0], last_page=page[1], **kws)
|
828
930
|
if dir_save is None:
|
829
931
|
dir_save = newfolder(dirname(dir_pdf), basename(dir_pdf).split(".")[0] + "_img")
|
830
932
|
for i, page in enumerate(pages):
|
831
|
-
if verbose:
|
933
|
+
if verbose:
|
832
934
|
print(f"processing page: {i+1}")
|
833
935
|
if i < 9:
|
834
936
|
dir_img_each_page = dir_save + f"page_0{i+1}.png"
|
@@ -839,6 +941,7 @@ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
|
|
839
941
|
df_dir_img_single_page["fpath"] = dir_single_page
|
840
942
|
return df_dir_img_single_page
|
841
943
|
|
944
|
+
|
842
945
|
# dir_pdf = "/Users/macjianfeng/Dropbox/github/python/240308_Python Data Science Handbook.pdf"
|
843
946
|
# df_page = pdf2img(dir_pdf, page=[1, 5],dpi=300)
|
844
947
|
def get_encoding(fpath, alternative_encodings=None, verbose=False):
|
@@ -855,14 +958,37 @@ def get_encoding(fpath, alternative_encodings=None, verbose=False):
|
|
855
958
|
"""
|
856
959
|
if alternative_encodings is None:
|
857
960
|
alternative_encodings = [
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
961
|
+
"utf-8",
|
962
|
+
"latin1",
|
963
|
+
"windows-1252",
|
964
|
+
"iso-8859-1",
|
965
|
+
"iso-8859-2",
|
966
|
+
"iso-8859-3",
|
967
|
+
"iso-8859-4",
|
968
|
+
"iso-8859-5",
|
969
|
+
"iso-8859-6",
|
970
|
+
"iso-8859-7",
|
971
|
+
"iso-8859-8",
|
972
|
+
"iso-8859-9",
|
973
|
+
"windows-1250",
|
974
|
+
"windows-1251",
|
975
|
+
"windows-1253",
|
976
|
+
"windows-1254",
|
977
|
+
"windows-1255",
|
978
|
+
"windows-1256",
|
979
|
+
"windows-1257",
|
980
|
+
"windows-1258",
|
981
|
+
"big5",
|
982
|
+
"gb18030",
|
983
|
+
"shift_jis",
|
984
|
+
"euc_jp",
|
985
|
+
"koi8_r",
|
986
|
+
"mac_roman",
|
987
|
+
"mac_central_europe",
|
988
|
+
"mac_greek",
|
989
|
+
"mac_cyrillic",
|
990
|
+
"mac_arabic",
|
991
|
+
"mac_hebrew",
|
866
992
|
]
|
867
993
|
|
868
994
|
if not os.path.isfile(fpath):
|
@@ -870,7 +996,7 @@ def get_encoding(fpath, alternative_encodings=None, verbose=False):
|
|
870
996
|
|
871
997
|
for enc in alternative_encodings:
|
872
998
|
try:
|
873
|
-
with open(fpath, mode=
|
999
|
+
with open(fpath, mode="r", encoding=enc) as file:
|
874
1000
|
file.read() # Try to read the file
|
875
1001
|
if verbose:
|
876
1002
|
print(f"Successfully detected encoding: {enc}")
|
@@ -895,6 +1021,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
895
1021
|
Returns:
|
896
1022
|
content: The content loaded from the file.
|
897
1023
|
"""
|
1024
|
+
|
898
1025
|
def load_txt_md(fpath):
|
899
1026
|
with open(fpath, "r") as file:
|
900
1027
|
content = file.read()
|
@@ -920,7 +1047,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
920
1047
|
root = tree.getroot()
|
921
1048
|
return etree.tostring(root, pretty_print=True).decode()
|
922
1049
|
|
923
|
-
def load_csv(fpath, engine=
|
1050
|
+
def load_csv(fpath, engine="pyarrow", **kwargs):
|
924
1051
|
print(f"engine={engine}")
|
925
1052
|
df = pd.read_csv(fpath, engine=engine, **kwargs)
|
926
1053
|
return df
|
@@ -928,35 +1055,36 @@ def fload(fpath, kind=None, **kwargs):
|
|
928
1055
|
def load_xlsx(fpath, **kwargs):
|
929
1056
|
df = pd.read_excel(fpath, **kwargs)
|
930
1057
|
return df
|
931
|
-
|
932
|
-
|
1058
|
+
|
1059
|
+
def load_ipynb(fpath, **kwargs):
|
1060
|
+
as_version = kwargs.get("as_version", 4)
|
933
1061
|
with open(fpath, "r") as file:
|
934
1062
|
nb = nbformat.read(file, as_version=as_version)
|
935
1063
|
md_exporter = MarkdownExporter()
|
936
1064
|
md_body, _ = md_exporter.from_notebook_node(nb)
|
937
1065
|
return md_body
|
938
|
-
|
939
|
-
def load_pdf(fpath, page=
|
1066
|
+
|
1067
|
+
def load_pdf(fpath, page="all", verbose=False, **kwargs):
|
940
1068
|
"""
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
1069
|
+
Parameters:
|
1070
|
+
fpath: The path to the PDF file to be loaded.
|
1071
|
+
page (optional):
|
1072
|
+
Specifies which page or pages to extract text from. By default, it's set to "all", which means text from all
|
1073
|
+
pages will be returned. It can also be an integer to specify a single page number or a list of integers to
|
1074
|
+
specify multiple pages.
|
1075
|
+
verbose (optional):
|
1076
|
+
If True, prints the total number of pages processed.
|
1077
|
+
Functionality:
|
1078
|
+
It initializes an empty dictionary text_dict to store page numbers as keys and their corresponding text as values.
|
1079
|
+
It iterates through each page of the PDF file using a for loop.
|
1080
|
+
For each page, it extracts the text using PyPDF2's extract_text() method and stores it in text_dict with the page number incremented by 1 as the key.
|
1081
|
+
If the page parameter is an integer, it converts it into a list containing that single page number to ensure consistency in handling.
|
1082
|
+
If the page parameter is a NumPy array, it converts it to a list using the tolist() method to ensure compatibility with list operations.
|
1083
|
+
If verbose is True, it prints the total number of pages processed.
|
1084
|
+
If page is a list, it combines the text of the specified pages into a single string combined_text and returns it.
|
1085
|
+
If page is set to "all", it returns the entire text_dict containing text of all pages.
|
1086
|
+
If page is an integer, it returns the text of the specified page number.
|
1087
|
+
If the specified page is not found, it returns the string "Page is not found".
|
960
1088
|
"""
|
961
1089
|
text_dict = {}
|
962
1090
|
with open(fpath, "rb") as file:
|
@@ -989,18 +1117,52 @@ def fload(fpath, kind=None, **kwargs):
|
|
989
1117
|
def load_docx(fpath):
|
990
1118
|
doc = Document(fpath)
|
991
1119
|
content = [para.text for para in doc.paragraphs]
|
992
|
-
return content
|
1120
|
+
return content
|
993
1121
|
|
994
1122
|
if kind is None:
|
995
1123
|
_, kind = os.path.splitext(fpath)
|
996
1124
|
kind = kind.lower()
|
997
1125
|
|
998
|
-
kind = kind.lstrip(
|
999
|
-
img_types=[
|
1000
|
-
|
1126
|
+
kind = kind.lstrip(".").lower()
|
1127
|
+
img_types = [
|
1128
|
+
"bmp",
|
1129
|
+
"eps",
|
1130
|
+
"gif",
|
1131
|
+
"icns",
|
1132
|
+
"ico",
|
1133
|
+
"im",
|
1134
|
+
"jpg",
|
1135
|
+
"jpeg",
|
1136
|
+
"jpeg2000",
|
1137
|
+
"msp",
|
1138
|
+
"pcx",
|
1139
|
+
"png",
|
1140
|
+
"ppm",
|
1141
|
+
"sgi",
|
1142
|
+
"spider",
|
1143
|
+
"tga",
|
1144
|
+
"tiff",
|
1145
|
+
"webp",
|
1146
|
+
"json",
|
1147
|
+
]
|
1148
|
+
doc_types = [
|
1149
|
+
"docx",
|
1150
|
+
"txt",
|
1151
|
+
"md",
|
1152
|
+
"html",
|
1153
|
+
"json",
|
1154
|
+
"yaml",
|
1155
|
+
"xml",
|
1156
|
+
"csv",
|
1157
|
+
"xlsx",
|
1158
|
+
"pdf",
|
1159
|
+
"ipynb",
|
1160
|
+
]
|
1001
1161
|
supported_types = [*doc_types, *img_types]
|
1002
1162
|
if kind not in supported_types:
|
1003
|
-
raise ValueError(
|
1163
|
+
raise ValueError(
|
1164
|
+
f"Error:\n{kind} is not in the supported list {supported_types}"
|
1165
|
+
)
|
1004
1166
|
if kind == "docx":
|
1005
1167
|
return load_docx(fpath)
|
1006
1168
|
elif kind == "txt" or kind == "md":
|
@@ -1024,9 +1186,12 @@ def fload(fpath, kind=None, **kwargs):
|
|
1024
1186
|
return load_pdf(fpath, **kwargs)
|
1025
1187
|
elif kind.lower() in img_types:
|
1026
1188
|
print(f'Image ".{kind}" is loaded.')
|
1027
|
-
return load_img(fpath)
|
1189
|
+
return load_img(fpath)
|
1028
1190
|
else:
|
1029
|
-
raise ValueError(
|
1191
|
+
raise ValueError(
|
1192
|
+
f"Error:\n{kind} is not in the supported list {supported_types}"
|
1193
|
+
)
|
1194
|
+
|
1030
1195
|
|
1031
1196
|
# Example usage
|
1032
1197
|
# txt_content = fload('sample.txt')
|
@@ -1039,6 +1204,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
1039
1204
|
# xlsx_content = fload('sample.xlsx')
|
1040
1205
|
# docx_content = fload('sample.docx')
|
1041
1206
|
|
1207
|
+
|
1042
1208
|
def fupdate(fpath, content=None):
|
1043
1209
|
"""
|
1044
1210
|
Update a file by adding new content at the top and moving the old content to the bottom.
|
@@ -1055,34 +1221,37 @@ def fupdate(fpath, content=None):
|
|
1055
1221
|
"""
|
1056
1222
|
content = content or ""
|
1057
1223
|
if os.path.exists(fpath):
|
1058
|
-
with open(fpath,
|
1224
|
+
with open(fpath, "r") as file:
|
1059
1225
|
old_content = file.read()
|
1060
1226
|
else:
|
1061
|
-
old_content =
|
1062
|
-
|
1063
|
-
with open(fpath,
|
1227
|
+
old_content = ""
|
1228
|
+
|
1229
|
+
with open(fpath, "w") as file:
|
1064
1230
|
file.write(content)
|
1065
1231
|
file.write(old_content)
|
1232
|
+
|
1233
|
+
|
1066
1234
|
def fappend(fpath, content=None):
|
1067
1235
|
"""
|
1068
1236
|
append new content at the end.
|
1069
1237
|
"""
|
1070
1238
|
content = content or ""
|
1071
1239
|
if os.path.exists(fpath):
|
1072
|
-
with open(fpath,
|
1240
|
+
with open(fpath, "r") as file:
|
1073
1241
|
old_content = file.read()
|
1074
1242
|
else:
|
1075
|
-
old_content =
|
1076
|
-
|
1077
|
-
with open(fpath,
|
1243
|
+
old_content = ""
|
1244
|
+
|
1245
|
+
with open(fpath, "w") as file:
|
1078
1246
|
file.write(old_content)
|
1079
1247
|
file.write(content)
|
1080
|
-
|
1248
|
+
|
1249
|
+
|
1081
1250
|
def fsave(
|
1082
1251
|
fpath,
|
1083
1252
|
content,
|
1084
|
-
mode=
|
1085
|
-
how
|
1253
|
+
mode="w",
|
1254
|
+
how="overwrite",
|
1086
1255
|
kind=None,
|
1087
1256
|
font_name="Times",
|
1088
1257
|
font_size=10,
|
@@ -1102,16 +1271,16 @@ def fsave(
|
|
1102
1271
|
Returns:
|
1103
1272
|
None
|
1104
1273
|
"""
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1274
|
+
|
1275
|
+
def save_content(fpath, content, mode=mode, how="overwrite"):
|
1276
|
+
if "wri" in how.lower():
|
1277
|
+
with open(fpath, mode, encoding="utf-8") as file:
|
1108
1278
|
file.write(content)
|
1109
|
-
elif
|
1279
|
+
elif "upd" in how.lower():
|
1110
1280
|
fupdate(fpath, content=content)
|
1111
|
-
elif
|
1281
|
+
elif "app" in how.lower():
|
1112
1282
|
fappend(fpath, content=content)
|
1113
1283
|
|
1114
|
-
|
1115
1284
|
def save_docx(fpath, content, font_name, font_size, spacing):
|
1116
1285
|
if isinstance(content, str):
|
1117
1286
|
content = content.split(". ")
|
@@ -1126,45 +1295,40 @@ def fsave(
|
|
1126
1295
|
paragraph.space_after = docx.shared.Pt(spacing)
|
1127
1296
|
doc.save(fpath)
|
1128
1297
|
|
1129
|
-
|
1130
|
-
|
1131
|
-
# Ensure content is a single string
|
1298
|
+
def save_txt_md(fpath, content, sep="\n", mode="w"):
|
1299
|
+
# Ensure content is a single string
|
1132
1300
|
if isinstance(content, list):
|
1133
1301
|
content = sep.join(content)
|
1134
|
-
save_content(fpath, sep.join(content),mode)
|
1135
|
-
|
1302
|
+
save_content(fpath, sep.join(content), mode)
|
1136
1303
|
|
1137
|
-
def save_html(fpath, content, font_name, font_size,mode=
|
1304
|
+
def save_html(fpath, content, font_name, font_size, mode="w"):
|
1138
1305
|
html_content = "<html><body>"
|
1139
1306
|
for paragraph_text in content:
|
1140
1307
|
html_content += f'<p style="font-family:{font_name}; font-size:{font_size}px;">{paragraph_text}</p>'
|
1141
1308
|
html_content += "</body></html>"
|
1142
|
-
save_content(fpath, html_content,mode)
|
1143
|
-
|
1309
|
+
save_content(fpath, html_content, mode)
|
1144
1310
|
|
1145
1311
|
def save_pdf(fpath, content, font_name, font_size):
|
1146
1312
|
pdf = FPDF()
|
1147
1313
|
pdf.add_page()
|
1148
1314
|
# pdf.add_font('Arial','',r'/System/Library/Fonts/Supplemental/Arial.ttf',uni=True)
|
1149
|
-
pdf.set_font(font_name,
|
1315
|
+
pdf.set_font(font_name, "", font_size)
|
1150
1316
|
for paragraph_text in content:
|
1151
1317
|
pdf.multi_cell(0, 10, paragraph_text)
|
1152
|
-
pdf.ln(h
|
1153
|
-
pdf.output(fpath,
|
1154
|
-
|
1318
|
+
pdf.ln(h="")
|
1319
|
+
pdf.output(fpath, "F")
|
1155
1320
|
|
1156
1321
|
def save_csv(fpath, data, **kwargs):
|
1157
1322
|
df = pd.DataFrame(data)
|
1158
1323
|
df.to_csv(fpath, **kwargs)
|
1159
1324
|
|
1160
|
-
|
1161
1325
|
def save_xlsx(fpath, data, **kwargs):
|
1162
1326
|
df = pd.DataFrame(data)
|
1163
1327
|
df.to_excel(fpath, **kwargs)
|
1164
1328
|
|
1165
|
-
def save_ipynb(fpath,data
|
1329
|
+
def save_ipynb(fpath, data, **kwargs):
|
1166
1330
|
# Split the content by code fences to distinguish between code and markdown
|
1167
|
-
parts = data.split(
|
1331
|
+
parts = data.split("```")
|
1168
1332
|
cells = []
|
1169
1333
|
|
1170
1334
|
for i, part in enumerate(parts):
|
@@ -1176,31 +1340,32 @@ def fsave(
|
|
1176
1340
|
cells.append(nbformat.v4.new_code_cell(part.strip()))
|
1177
1341
|
# Create a new notebook
|
1178
1342
|
nb = nbformat.v4.new_notebook()
|
1179
|
-
nb[
|
1343
|
+
nb["cells"] = cells
|
1180
1344
|
# Write the notebook to a file
|
1181
|
-
with open(fpath,
|
1345
|
+
with open(fpath, "w", encoding="utf-8") as ipynb_file:
|
1182
1346
|
nbformat.write(nb, ipynb_file)
|
1183
|
-
|
1347
|
+
|
1184
1348
|
# def save_json(fpath, data, **kwargs):
|
1185
1349
|
# with open(fpath, "w") as file:
|
1186
1350
|
# json.dump(data, file, **kwargs)
|
1187
1351
|
|
1188
|
-
def save_json(fpath_fname, var_dict_or_df):
|
1352
|
+
def save_json(fpath_fname, var_dict_or_df):
|
1189
1353
|
with open(fpath_fname, "w") as f_json:
|
1190
1354
|
# Check if var_dict_or_df is a DataFrame
|
1191
1355
|
if isinstance(var_dict_or_df, pd.DataFrame):
|
1192
1356
|
# Convert DataFrame to a list of dictionaries
|
1193
1357
|
var_dict_or_df = var_dict_or_df.to_dict(orient="dict")
|
1194
|
-
|
1358
|
+
|
1195
1359
|
# Check if var_dict_or_df is a dictionary
|
1196
1360
|
if isinstance(var_dict_or_df, dict):
|
1197
1361
|
# Convert NumPy arrays to lists
|
1198
1362
|
for key, value in var_dict_or_df.items():
|
1199
1363
|
if isinstance(value, np.ndarray):
|
1200
1364
|
var_dict_or_df[key] = value.tolist()
|
1201
|
-
|
1365
|
+
|
1202
1366
|
# Save the dictionary or list of dictionaries to a JSON file
|
1203
1367
|
json.dump(var_dict_or_df, f_json, indent=4)
|
1368
|
+
|
1204
1369
|
# # Example usage:
|
1205
1370
|
# sets = {"title": "mse_path_ MSE"}
|
1206
1371
|
# jsonsave("/.json", sets)
|
@@ -1210,7 +1375,6 @@ def fsave(
|
|
1210
1375
|
with open(fpath, "w") as file:
|
1211
1376
|
yaml.dump(data, file, **kwargs)
|
1212
1377
|
|
1213
|
-
|
1214
1378
|
def save_xml(fpath, data):
|
1215
1379
|
root = etree.Element("root")
|
1216
1380
|
if isinstance(data, dict):
|
@@ -1239,18 +1403,18 @@ def fsave(
|
|
1239
1403
|
"json",
|
1240
1404
|
"xml",
|
1241
1405
|
"yaml",
|
1242
|
-
"ipynb"
|
1406
|
+
"ipynb",
|
1243
1407
|
]:
|
1244
1408
|
print(
|
1245
1409
|
f"Warning:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
1246
1410
|
)
|
1247
1411
|
|
1248
|
-
if kind == "docx" or kind=="doc":
|
1412
|
+
if kind == "docx" or kind == "doc":
|
1249
1413
|
save_docx(fpath, content, font_name, font_size, spacing)
|
1250
1414
|
elif kind == "txt":
|
1251
|
-
save_txt_md(fpath, content, sep="",mode=mode)
|
1415
|
+
save_txt_md(fpath, content, sep="", mode=mode)
|
1252
1416
|
elif kind == "md":
|
1253
|
-
save_txt_md(fpath, content, sep="",mode=mode)
|
1417
|
+
save_txt_md(fpath, content, sep="", mode=mode)
|
1254
1418
|
elif kind == "html":
|
1255
1419
|
save_html(fpath, content, font_name, font_size)
|
1256
1420
|
elif kind == "pdf":
|
@@ -1260,20 +1424,20 @@ def fsave(
|
|
1260
1424
|
elif kind == "xlsx":
|
1261
1425
|
save_xlsx(fpath, content, **kwargs)
|
1262
1426
|
elif kind == "json":
|
1263
|
-
save_json(fpath, content)
|
1427
|
+
save_json(fpath, content)
|
1264
1428
|
elif kind == "xml":
|
1265
|
-
save_xml(fpath, content)
|
1429
|
+
save_xml(fpath, content)
|
1266
1430
|
elif kind == "yaml":
|
1267
1431
|
save_yaml(fpath, content, **kwargs)
|
1268
1432
|
elif kind == "ipynb":
|
1269
|
-
save_ipynb(fpath, content, **kwargs)
|
1433
|
+
save_ipynb(fpath, content, **kwargs)
|
1270
1434
|
else:
|
1271
|
-
try:
|
1435
|
+
try:
|
1272
1436
|
netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
|
1273
1437
|
except:
|
1274
1438
|
print(
|
1275
1439
|
f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
1276
|
-
|
1440
|
+
)
|
1277
1441
|
|
1278
1442
|
|
1279
1443
|
# # Example usage
|
@@ -1294,47 +1458,61 @@ def fsave(
|
|
1294
1458
|
# fsave(dir_save + "sample.yaml", yaml_content)
|
1295
1459
|
# fsave(dir_save + "sample.xml", xml_content)
|
1296
1460
|
|
1461
|
+
|
1297
1462
|
def addpath(fpath):
|
1298
|
-
sys.path.insert(0,dir)
|
1463
|
+
sys.path.insert(0, dir)
|
1464
|
+
|
1465
|
+
|
1299
1466
|
def dirname(fpath):
|
1300
1467
|
"""
|
1301
1468
|
dirname: Extracting Directory Name from a File Path
|
1302
1469
|
Args:
|
1303
|
-
fpath (str): the file or directory path
|
1470
|
+
fpath (str): the file or directory path
|
1304
1471
|
Returns:
|
1305
1472
|
str: directory, without filename
|
1306
1473
|
"""
|
1307
|
-
dirname_=os.path.dirname(fpath)
|
1308
|
-
if not dirname_.endswith(
|
1309
|
-
dirname_=dirname_+"/"
|
1474
|
+
dirname_ = os.path.dirname(fpath)
|
1475
|
+
if not dirname_.endswith("/"):
|
1476
|
+
dirname_ = dirname_ + "/"
|
1310
1477
|
return dirname_
|
1311
1478
|
|
1312
|
-
|
1479
|
+
|
1480
|
+
def dir_name(fpath): # same as "dirname"
|
1313
1481
|
return dirname(fpath)
|
1482
|
+
|
1483
|
+
|
1314
1484
|
def basename(fpath):
|
1315
1485
|
"""
|
1316
1486
|
basename: # Output: file.txt
|
1317
1487
|
Args:
|
1318
|
-
fpath (str): the file or directory path
|
1488
|
+
fpath (str): the file or directory path
|
1319
1489
|
Returns:
|
1320
1490
|
str: # Output: file.txt
|
1321
1491
|
"""
|
1322
1492
|
return os.path.basename(fpath)
|
1493
|
+
|
1494
|
+
|
1323
1495
|
def flist(fpath, contains="all"):
|
1324
|
-
all_files = [
|
1496
|
+
all_files = [
|
1497
|
+
os.path.join(fpath, f)
|
1498
|
+
for f in os.listdir(fpath)
|
1499
|
+
if os.path.isfile(os.path.join(fpath, f))
|
1500
|
+
]
|
1325
1501
|
if isinstance(contains, list):
|
1326
1502
|
filt_files = []
|
1327
1503
|
for filter_ in contains:
|
1328
1504
|
filt_files.extend(flist(fpath, filter_))
|
1329
1505
|
return filt_files
|
1330
1506
|
else:
|
1331
|
-
if
|
1507
|
+
if "all" in contains.lower():
|
1332
1508
|
return all_files
|
1333
1509
|
else:
|
1334
1510
|
filt_files = [f for f in all_files if isa(f, contains)]
|
1335
1511
|
return filt_files
|
1512
|
+
|
1513
|
+
|
1336
1514
|
def sort_kind(df, by="name", ascending=True):
|
1337
|
-
if df[by].dtype ==
|
1515
|
+
if df[by].dtype == "object": # Check if the column contains string values
|
1338
1516
|
if ascending:
|
1339
1517
|
sorted_index = df[by].str.lower().argsort()
|
1340
1518
|
else:
|
@@ -1347,7 +1525,8 @@ def sort_kind(df, by="name", ascending=True):
|
|
1347
1525
|
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
1348
1526
|
return sorted_df
|
1349
1527
|
|
1350
|
-
|
1528
|
+
|
1529
|
+
def isa(*args, **kwargs):
|
1351
1530
|
"""
|
1352
1531
|
fpath, contains='img'
|
1353
1532
|
containss file paths based on the specified contains.
|
@@ -1360,30 +1539,33 @@ def isa(*args,**kwargs):
|
|
1360
1539
|
"""
|
1361
1540
|
for arg in args:
|
1362
1541
|
if isinstance(arg, str):
|
1363
|
-
if
|
1542
|
+
if "/" in arg or "\\" in arg:
|
1364
1543
|
fpath = arg
|
1365
1544
|
else:
|
1366
|
-
contains=arg
|
1367
|
-
if
|
1545
|
+
contains = arg
|
1546
|
+
if "img" in contains.lower() or "image" in contains.lower():
|
1368
1547
|
return is_image(fpath)
|
1369
|
-
elif
|
1548
|
+
elif "doc" in contains.lower():
|
1370
1549
|
return is_document(fpath)
|
1371
|
-
elif
|
1550
|
+
elif "zip" in contains.lower():
|
1372
1551
|
return is_zip(fpath)
|
1373
|
-
elif
|
1552
|
+
elif "dir" in contains.lower() or (
|
1553
|
+
"f" in contains.lower() and "d" in contains.lower()
|
1554
|
+
):
|
1374
1555
|
return os.path.isdir(fpath)
|
1375
|
-
elif
|
1556
|
+
elif "fi" in contains.lower(): # file
|
1376
1557
|
return os.path.isfile(fpath)
|
1377
|
-
elif
|
1558
|
+
elif "num" in contains.lower(): # file
|
1378
1559
|
return os.path.isfile(fpath)
|
1379
|
-
elif
|
1560
|
+
elif "text" in contains.lower() or "txt" in contains.lower(): # file
|
1380
1561
|
return is_text(fpath)
|
1381
|
-
elif
|
1562
|
+
elif "color" in contains.lower(): # file
|
1382
1563
|
return is_str_color(fpath)
|
1383
1564
|
else:
|
1384
1565
|
print(f"{contains} was not set up correctly")
|
1385
1566
|
return False
|
1386
1567
|
|
1568
|
+
|
1387
1569
|
def listdir(
|
1388
1570
|
rootdir,
|
1389
1571
|
kind="folder",
|
@@ -1391,7 +1573,7 @@ def listdir(
|
|
1391
1573
|
ascending=True,
|
1392
1574
|
contains=None,
|
1393
1575
|
orient="list",
|
1394
|
-
output="df"
|
1576
|
+
output="df", # 'list','dict','records','index','series'
|
1395
1577
|
):
|
1396
1578
|
if not kind.startswith("."):
|
1397
1579
|
kind = "." + kind
|
@@ -1420,12 +1602,12 @@ def listdir(
|
|
1420
1602
|
is_file = kind.lower() in file_extension.lower() and (
|
1421
1603
|
os.path.isfile(item_path)
|
1422
1604
|
)
|
1423
|
-
if kind in [
|
1605
|
+
if kind in [".doc", ".img", ".zip"]: # 选择大的类别
|
1424
1606
|
if kind != ".folder" and not isa(item_path, kind):
|
1425
1607
|
continue
|
1426
|
-
elif kind in [
|
1608
|
+
elif kind in [".all"]:
|
1427
1609
|
return flist(fpath, contains=contains)
|
1428
|
-
else:
|
1610
|
+
else: # 精确到文件的后缀
|
1429
1611
|
if not is_folder and not is_file:
|
1430
1612
|
continue
|
1431
1613
|
f["name"].append(filename)
|
@@ -1433,9 +1615,15 @@ def listdir(
|
|
1433
1615
|
f["path"].append(os.path.join(os.path.dirname(item_path), item))
|
1434
1616
|
fpath = os.path.join(os.path.dirname(item_path), item)
|
1435
1617
|
f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
|
1436
|
-
f["created_time"].append(
|
1437
|
-
|
1438
|
-
|
1618
|
+
f["created_time"].append(
|
1619
|
+
pd.to_datetime(os.path.getctime(item_path), unit="s")
|
1620
|
+
)
|
1621
|
+
f["modified_time"].append(
|
1622
|
+
pd.to_datetime(os.path.getmtime(item_path), unit="s")
|
1623
|
+
)
|
1624
|
+
f["last_open_time"].append(
|
1625
|
+
pd.to_datetime(os.path.getatime(item_path), unit="s")
|
1626
|
+
)
|
1439
1627
|
f["fname"].append(filename) # will be removed
|
1440
1628
|
f["fpath"].append(fpath) # will be removed
|
1441
1629
|
i += 1
|
@@ -1464,35 +1652,39 @@ def listdir(
|
|
1464
1652
|
elif "s" in sort_by.lower() and "z" in sort_by.lower():
|
1465
1653
|
f = sort_kind(f, by="size", ascending=ascending)
|
1466
1654
|
|
1467
|
-
if
|
1655
|
+
if "df" in output:
|
1468
1656
|
return f
|
1469
1657
|
else:
|
1470
|
-
if
|
1658
|
+
if "l" in orient.lower(): # list # default
|
1471
1659
|
res_output = Box(f.to_dict(orient="list"))
|
1472
1660
|
return res_output
|
1473
|
-
if
|
1661
|
+
if "d" in orient.lower(): # dict
|
1474
1662
|
return Box(f.to_dict(orient="dict"))
|
1475
|
-
if
|
1663
|
+
if "r" in orient.lower(): # records
|
1476
1664
|
return Box(f.to_dict(orient="records"))
|
1477
|
-
if
|
1665
|
+
if "in" in orient.lower(): # records
|
1478
1666
|
return Box(f.to_dict(orient="index"))
|
1479
|
-
if
|
1667
|
+
if "se" in orient.lower(): # records
|
1480
1668
|
return Box(f.to_dict(orient="series"))
|
1481
1669
|
|
1670
|
+
|
1482
1671
|
# Example usage:
|
1483
1672
|
# result = listdir('your_root_directory')
|
1484
1673
|
# print(result)
|
1485
1674
|
# df=listdir("/", contains='sss',sort_by='name',ascending=False)
|
1486
|
-
# print(df.fname.to_list(),"\n",df.fpath.to_list())
|
1675
|
+
# print(df.fname.to_list(),"\n",df.fpath.to_list())
|
1487
1676
|
def list_func(lib_name, opt="call"):
|
1488
1677
|
if opt == "call":
|
1489
1678
|
funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
|
1490
1679
|
else:
|
1491
1680
|
funcs = dir(lib_name)
|
1492
1681
|
return funcs
|
1682
|
+
|
1683
|
+
|
1493
1684
|
def func_list(lib_name, opt="call"):
|
1494
1685
|
return list_func(lib_name, opt=opt)
|
1495
1686
|
|
1687
|
+
|
1496
1688
|
def mkdir(*args, **kwargs):
|
1497
1689
|
"""
|
1498
1690
|
newfolder(pardir, chdir)
|
@@ -1503,17 +1695,17 @@ def mkdir(*args, **kwargs):
|
|
1503
1695
|
Returns:
|
1504
1696
|
mkdir, giving a option if exists_ok or not
|
1505
1697
|
"""
|
1506
|
-
overwrite=kwargs.get("overwrite",False)
|
1698
|
+
overwrite = kwargs.get("overwrite", False)
|
1507
1699
|
for arg in args:
|
1508
|
-
if isinstance(arg, (str,list)):
|
1700
|
+
if isinstance(arg, (str, list)):
|
1509
1701
|
if "/" in arg or "\\" in arg:
|
1510
|
-
pardir=arg
|
1511
|
-
print(f
|
1702
|
+
pardir = arg
|
1703
|
+
print(f"pardir{pardir}")
|
1512
1704
|
else:
|
1513
1705
|
chdir = arg
|
1514
|
-
print(f
|
1515
|
-
elif isinstance(arg,bool):
|
1516
|
-
overwrite=arg
|
1706
|
+
print(f"chdir{chdir}")
|
1707
|
+
elif isinstance(arg, bool):
|
1708
|
+
overwrite = arg
|
1517
1709
|
print(overwrite)
|
1518
1710
|
else:
|
1519
1711
|
print(f"{arg}Error: not support a {type(arg)} type")
|
@@ -1526,7 +1718,7 @@ def mkdir(*args, **kwargs):
|
|
1526
1718
|
if isinstance(pardir, str): # Dir_parents should be 'str' type
|
1527
1719
|
pardir = os.path.normpath(pardir)
|
1528
1720
|
# Get the slash type: "/" or "\"
|
1529
|
-
stype =
|
1721
|
+
stype = "/" if "/" in pardir else "\\"
|
1530
1722
|
# Check if the parent directory exists and is a directory path
|
1531
1723
|
if os.path.isdir(pardir):
|
1532
1724
|
os.chdir(pardir) # Set current path
|
@@ -1538,80 +1730,83 @@ def mkdir(*args, **kwargs):
|
|
1538
1730
|
# Check if the subfolder already exists
|
1539
1731
|
child_tmp = os.path.join(pardir, folder)
|
1540
1732
|
if not os.path.isdir(child_tmp):
|
1541
|
-
os.mkdir(
|
1542
|
-
print(f
|
1733
|
+
os.mkdir("./" + folder)
|
1734
|
+
print(f"\n {folder} was created successfully!\n")
|
1543
1735
|
else:
|
1544
1736
|
if overwrite:
|
1545
1737
|
shutil.rmtree(child_tmp)
|
1546
|
-
os.mkdir(
|
1547
|
-
print(f
|
1738
|
+
os.mkdir("./" + folder)
|
1739
|
+
print(f"\n {folder} overwrite! \n")
|
1548
1740
|
else:
|
1549
|
-
print(f
|
1741
|
+
print(f"\n {folder} already exists! \n")
|
1550
1742
|
rootdir.append(child_tmp + stype) # Note down
|
1551
1743
|
else:
|
1552
|
-
print(
|
1744
|
+
print("\nWarning: Dir_child doesn't exist\n")
|
1553
1745
|
else:
|
1554
|
-
print(
|
1746
|
+
print("\nWarning: Dir_parent is not a directory path\n")
|
1555
1747
|
# Dir is the main output, if only one dir, then str type is inconvenient
|
1556
1748
|
if len(rootdir) == 1:
|
1557
1749
|
rootdir = rootdir[0]
|
1558
1750
|
return rootdir
|
1559
1751
|
|
1560
|
-
|
1752
|
+
|
1753
|
+
def figsave(*args, dpi=300):
|
1561
1754
|
dir_save = None
|
1562
|
-
fname = None
|
1755
|
+
fname = None
|
1563
1756
|
for arg in args:
|
1564
1757
|
if isinstance(arg, str):
|
1565
|
-
if
|
1758
|
+
if "/" in arg or "\\" in arg:
|
1566
1759
|
dir_save = arg
|
1567
|
-
elif
|
1760
|
+
elif "/" not in arg and "\\" not in arg:
|
1568
1761
|
fname = arg
|
1569
1762
|
# Backup original values
|
1570
|
-
if
|
1571
|
-
if dir_save[-1] !=
|
1572
|
-
dir_save = dir_save +
|
1573
|
-
elif
|
1574
|
-
if dir_save[-1] !=
|
1575
|
-
dir_save = dir_save +
|
1763
|
+
if "/" in dir_save:
|
1764
|
+
if dir_save[-1] != "/":
|
1765
|
+
dir_save = dir_save + "/"
|
1766
|
+
elif "\\" in dir_save:
|
1767
|
+
if dir_save[-1] != "\\":
|
1768
|
+
dir_save = dir_save + "\\"
|
1576
1769
|
else:
|
1577
|
-
raise ValueError(
|
1578
|
-
ftype = fname.split(
|
1579
|
-
if len(fname.split(
|
1580
|
-
ftype =
|
1581
|
-
fname = dir_save + fname +
|
1770
|
+
raise ValueError("Check the Path of dir_save Directory")
|
1771
|
+
ftype = fname.split(".")[-1]
|
1772
|
+
if len(fname.split(".")) == 1:
|
1773
|
+
ftype = "nofmt"
|
1774
|
+
fname = dir_save + fname + "." + ftype
|
1582
1775
|
else:
|
1583
1776
|
fname = dir_save + fname
|
1584
1777
|
# Save figure based on file type
|
1585
|
-
if ftype.lower() ==
|
1586
|
-
plt.savefig(fname, format=
|
1587
|
-
plt.savefig(
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1778
|
+
if ftype.lower() == "eps":
|
1779
|
+
plt.savefig(fname, format="eps", bbox_inches="tight")
|
1780
|
+
plt.savefig(
|
1781
|
+
fname.replace(".eps", ".pdf"), format="pdf", bbox_inches="tight", dpi=dpi
|
1782
|
+
)
|
1783
|
+
elif ftype.lower() == "nofmt": # default: both "tif" and "pdf"
|
1784
|
+
fname_corr = fname.replace("nofmt", "pdf")
|
1785
|
+
plt.savefig(fname_corr, format="pdf", bbox_inches="tight", dpi=dpi)
|
1786
|
+
fname = fname.replace("nofmt", "tif")
|
1787
|
+
plt.savefig(fname, format="tiff", dpi=dpi, bbox_inches="tight")
|
1594
1788
|
print(f"default saving filetype: both 'tif' and 'pdf")
|
1595
|
-
elif ftype.lower() ==
|
1596
|
-
plt.savefig(fname, format=
|
1597
|
-
elif ftype.lower() in [
|
1598
|
-
plt.savefig(fname, format=
|
1599
|
-
elif ftype.lower() ==
|
1600
|
-
plt.savefig(fname, format=
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
print(f'\nSaved @: dpi={dpi}\n{fname}')
|
1789
|
+
elif ftype.lower() == "pdf":
|
1790
|
+
plt.savefig(fname, format="pdf", bbox_inches="tight", dpi=dpi)
|
1791
|
+
elif ftype.lower() in ["jpg", "jpeg"]:
|
1792
|
+
plt.savefig(fname, format="jpeg", dpi=dpi, bbox_inches="tight")
|
1793
|
+
elif ftype.lower() == "png":
|
1794
|
+
plt.savefig(fname, format="png", dpi=dpi, bbox_inches="tight", transparent=True)
|
1795
|
+
elif ftype.lower() in ["tiff", "tif"]:
|
1796
|
+
plt.savefig(fname, format="tiff", dpi=dpi, bbox_inches="tight")
|
1797
|
+
elif ftype.lower() == "emf":
|
1798
|
+
plt.savefig(fname, format="emf", dpi=dpi, bbox_inches="tight")
|
1799
|
+
elif ftype.lower() == "fig":
|
1800
|
+
plt.savefig(fname, format="pdf", bbox_inches="tight", dpi=dpi)
|
1801
|
+
print(f"\nSaved @: dpi={dpi}\n{fname}")
|
1609
1802
|
|
1610
1803
|
|
1611
1804
|
def is_str_color(s):
|
1612
1805
|
# Regular expression pattern for hexadecimal color codes
|
1613
1806
|
color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
|
1614
1807
|
return re.match(color_code_pattern, s) is not None
|
1808
|
+
|
1809
|
+
|
1615
1810
|
def is_num(s):
|
1616
1811
|
"""
|
1617
1812
|
Check if a string can be converted to a number (int or float).
|
@@ -1625,58 +1820,72 @@ def is_num(s):
|
|
1625
1820
|
return True
|
1626
1821
|
except ValueError:
|
1627
1822
|
return False
|
1823
|
+
|
1824
|
+
|
1628
1825
|
def isnum(s):
|
1629
1826
|
return is_num(s)
|
1827
|
+
|
1828
|
+
|
1630
1829
|
def is_image(fpath):
|
1631
1830
|
mime_type, _ = mimetypes.guess_type(fpath)
|
1632
|
-
if mime_type and mime_type.startswith(
|
1831
|
+
if mime_type and mime_type.startswith("image"):
|
1633
1832
|
return True
|
1634
1833
|
else:
|
1635
1834
|
return False
|
1835
|
+
|
1836
|
+
|
1636
1837
|
def is_document(fpath):
|
1637
1838
|
mime_type, _ = mimetypes.guess_type(fpath)
|
1638
1839
|
if mime_type and (
|
1639
|
-
mime_type.startswith(
|
1640
|
-
mime_type ==
|
1641
|
-
mime_type ==
|
1642
|
-
mime_type
|
1643
|
-
|
1644
|
-
mime_type ==
|
1645
|
-
mime_type
|
1646
|
-
|
1840
|
+
mime_type.startswith("text/")
|
1841
|
+
or mime_type == "application/pdf"
|
1842
|
+
or mime_type == "application/msword"
|
1843
|
+
or mime_type
|
1844
|
+
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
1845
|
+
or mime_type == "application/vnd.ms-excel"
|
1846
|
+
or mime_type
|
1847
|
+
== "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
1848
|
+
or mime_type == "application/vnd.ms-powerpoint"
|
1849
|
+
or mime_type
|
1850
|
+
== "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
1647
1851
|
):
|
1648
1852
|
return True
|
1649
1853
|
else:
|
1650
1854
|
return False
|
1855
|
+
|
1856
|
+
|
1651
1857
|
def is_zip(fpath):
|
1652
1858
|
mime_type, _ = mimetypes.guess_type(fpath)
|
1653
|
-
if mime_type ==
|
1859
|
+
if mime_type == "application/zip":
|
1654
1860
|
return True
|
1655
1861
|
else:
|
1656
1862
|
return False
|
1657
1863
|
|
1658
1864
|
|
1659
|
-
def adjust_spines(ax=None, spines=[
|
1865
|
+
def adjust_spines(ax=None, spines=["left", "bottom"], distance=2):
|
1660
1866
|
if ax is None:
|
1661
1867
|
ax = plt.gca()
|
1662
1868
|
for loc, spine in ax.spines.items():
|
1663
1869
|
if loc in spines:
|
1664
|
-
spine.set_position((
|
1870
|
+
spine.set_position(("outward", distance)) # outward by 2 points
|
1665
1871
|
# spine.set_smart_bounds(True)
|
1666
1872
|
else:
|
1667
|
-
spine.set_color(
|
1873
|
+
spine.set_color("none") # don't draw spine
|
1668
1874
|
# turn off ticks where there is no spine
|
1669
|
-
if
|
1670
|
-
ax.yaxis.set_ticks_position(
|
1875
|
+
if "left" in spines:
|
1876
|
+
ax.yaxis.set_ticks_position("left")
|
1671
1877
|
else:
|
1672
1878
|
ax.yaxis.set_ticks([])
|
1673
|
-
if
|
1674
|
-
ax.xaxis.set_ticks_position(
|
1879
|
+
if "bottom" in spines:
|
1880
|
+
ax.xaxis.set_ticks_position("bottom")
|
1675
1881
|
else:
|
1676
1882
|
# no xaxis ticks
|
1677
1883
|
ax.xaxis.set_ticks([])
|
1884
|
+
|
1885
|
+
|
1678
1886
|
# And then plot the data:
|
1679
1887
|
|
1888
|
+
|
1680
1889
|
def add_colorbar(im, width=None, pad=None, **kwargs):
|
1681
1890
|
# usage: add_colorbar(im, width=0.01, pad=0.005, label="PSD (dB)", shrink=0.8)
|
1682
1891
|
l, b, w, h = im.axes.get_position().bounds # get boundaries
|
@@ -1685,6 +1894,8 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
|
|
1685
1894
|
fig = im.axes.figure # get figure of image
|
1686
1895
|
cax = fig.add_axes([l + w + pad, b, width, h]) # define cbar Axes
|
1687
1896
|
return fig.colorbar(im, cax=cax, **kwargs) # draw cbar
|
1897
|
+
|
1898
|
+
|
1688
1899
|
# =============================================================================
|
1689
1900
|
# # for plot figures: setting rcParams
|
1690
1901
|
# usage: set_pub()
|
@@ -1697,13 +1908,16 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
|
|
1697
1908
|
|
1698
1909
|
|
1699
1910
|
def list2str(x_str):
|
1700
|
-
s =
|
1911
|
+
s = "".join(str(x) for x in x_str)
|
1701
1912
|
return s
|
1913
|
+
|
1914
|
+
|
1702
1915
|
def str2list(str_):
|
1703
1916
|
l = []
|
1704
1917
|
[l.append(x) for x in str_]
|
1705
1918
|
return l
|
1706
1919
|
|
1920
|
+
|
1707
1921
|
def load_img(fpath):
|
1708
1922
|
"""
|
1709
1923
|
Load an image from the specified file path.
|
@@ -1723,6 +1937,7 @@ def load_img(fpath):
|
|
1723
1937
|
except OSError:
|
1724
1938
|
raise OSError(f"Unable to open file '{fpath}' or it is not a valid image file.")
|
1725
1939
|
|
1940
|
+
|
1726
1941
|
def apply_filter(img, *args):
|
1727
1942
|
# def apply_filter(img, filter_name, filter_value=None):
|
1728
1943
|
"""
|
@@ -1734,42 +1949,47 @@ def apply_filter(img, *args):
|
|
1734
1949
|
Returns:
|
1735
1950
|
PIL.Image: The filtered image.
|
1736
1951
|
"""
|
1952
|
+
|
1737
1953
|
def correct_filter_name(filter_name):
|
1738
|
-
if
|
1739
|
-
return
|
1740
|
-
elif
|
1741
|
-
return
|
1742
|
-
elif
|
1743
|
-
return
|
1744
|
-
elif
|
1745
|
-
|
1746
|
-
|
1747
|
-
|
1748
|
-
|
1749
|
-
return
|
1750
|
-
elif
|
1751
|
-
return
|
1752
|
-
elif
|
1753
|
-
return
|
1754
|
-
elif
|
1755
|
-
return
|
1756
|
-
elif
|
1757
|
-
return
|
1758
|
-
elif
|
1759
|
-
return
|
1760
|
-
elif
|
1761
|
-
return
|
1762
|
-
elif
|
1763
|
-
return
|
1764
|
-
elif
|
1765
|
-
return
|
1766
|
-
elif
|
1767
|
-
return
|
1768
|
-
elif
|
1769
|
-
return
|
1770
|
-
elif
|
1771
|
-
return
|
1772
|
-
|
1954
|
+
if "bl" in filter_name.lower() and "box" not in filter_name.lower():
|
1955
|
+
return "BLUR"
|
1956
|
+
elif "cont" in filter_name.lower():
|
1957
|
+
return "Contour"
|
1958
|
+
elif "det" in filter_name.lower():
|
1959
|
+
return "Detail"
|
1960
|
+
elif (
|
1961
|
+
"edg" in filter_name.lower()
|
1962
|
+
and "mo" not in filter_name.lower()
|
1963
|
+
and "f" not in filter_name.lower()
|
1964
|
+
):
|
1965
|
+
return "EDGE_ENHANCE"
|
1966
|
+
elif "edg" in filter_name.lower() and "mo" in filter_name.lower():
|
1967
|
+
return "EDGE_ENHANCE_MORE"
|
1968
|
+
elif "emb" in filter_name.lower():
|
1969
|
+
return "EMBOSS"
|
1970
|
+
elif "edg" in filter_name.lower() and "f" in filter_name.lower():
|
1971
|
+
return "FIND_EDGES"
|
1972
|
+
elif "sh" in filter_name.lower() and "mo" not in filter_name.lower():
|
1973
|
+
return "SHARPEN"
|
1974
|
+
elif "sm" in filter_name.lower() and "mo" not in filter_name.lower():
|
1975
|
+
return "SMOOTH"
|
1976
|
+
elif "sm" in filter_name.lower() and "mo" in filter_name.lower():
|
1977
|
+
return "SMOOTH_MORE"
|
1978
|
+
elif "min" in filter_name.lower():
|
1979
|
+
return "MIN_FILTER"
|
1980
|
+
elif "max" in filter_name.lower():
|
1981
|
+
return "MAX_FILTER"
|
1982
|
+
elif "mod" in filter_name.lower():
|
1983
|
+
return "MODE_FILTER"
|
1984
|
+
elif "mul" in filter_name.lower():
|
1985
|
+
return "MULTIBAND_FILTER"
|
1986
|
+
elif "gau" in filter_name.lower():
|
1987
|
+
return "GAUSSIAN_BLUR"
|
1988
|
+
elif "box" in filter_name.lower():
|
1989
|
+
return "BOX_BLUR"
|
1990
|
+
elif "med" in filter_name.lower():
|
1991
|
+
return "MEDIAN_FILTER"
|
1992
|
+
else:
|
1773
1993
|
supported_filters = [
|
1774
1994
|
"BLUR",
|
1775
1995
|
"CONTOUR",
|
@@ -1843,21 +2063,232 @@ def apply_filter(img, *args):
|
|
1843
2063
|
return img.filter(supported_filters[filter_name](bands))
|
1844
2064
|
else:
|
1845
2065
|
if filter_value is not None:
|
1846
|
-
print(
|
2066
|
+
print(
|
2067
|
+
f"{filter_name} doesn't require a value for {filter_value}, but it remains unaffected"
|
2068
|
+
)
|
1847
2069
|
return img.filter(supported_filters[filter_name])
|
1848
2070
|
|
1849
2071
|
|
1850
|
-
def
|
1851
|
-
|
1852
|
-
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1856
|
-
|
1857
|
-
|
1858
|
-
|
1859
|
-
|
1860
|
-
):
|
2072
|
+
# def imgsetss(
|
2073
|
+
# img,
|
2074
|
+
# sets=None,
|
2075
|
+
# show=True,
|
2076
|
+
# show_axis=False,
|
2077
|
+
# size=None,
|
2078
|
+
# dpi=100,
|
2079
|
+
# figsize=None,
|
2080
|
+
# auto=False,
|
2081
|
+
# filter_kws=None,
|
2082
|
+
# ):
|
2083
|
+
# """
|
2084
|
+
# Apply various enhancements and filters to an image using PIL's ImageEnhance and ImageFilter modules.
|
2085
|
+
|
2086
|
+
# Args:
|
2087
|
+
# img (PIL.Image): The input image.
|
2088
|
+
# sets (dict): A dictionary specifying the enhancements, filters, and their parameters.
|
2089
|
+
# show (bool): Whether to display the enhanced image.
|
2090
|
+
# show_axis (bool): Whether to display axes on the image plot.
|
2091
|
+
# size (tuple): The size of the thumbnail, cover, contain, or fit operation.
|
2092
|
+
# dpi (int): Dots per inch for the displayed image.
|
2093
|
+
# figsize (tuple): The size of the figure for displaying the image.
|
2094
|
+
# auto (bool): Whether to automatically enhance the image based on its characteristics.
|
2095
|
+
|
2096
|
+
# Returns:
|
2097
|
+
# PIL.Image: The enhanced image.
|
2098
|
+
|
2099
|
+
# Supported enhancements and filters:
|
2100
|
+
# - "sharpness": Adjusts the sharpness of the image. Values > 1 increase sharpness, while values < 1 decrease sharpness.
|
2101
|
+
# - "contrast": Adjusts the contrast of the image. Values > 1 increase contrast, while values < 1 decrease contrast.
|
2102
|
+
# - "brightness": Adjusts the brightness of the image. Values > 1 increase brightness, while values < 1 decrease brightness.
|
2103
|
+
# - "color": Adjusts the color saturation of the image. Values > 1 increase saturation, while values < 1 decrease saturation.
|
2104
|
+
# - "rotate": Rotates the image by the specified angle.
|
2105
|
+
# - "crop" or "cut": Crops the image. The value should be a tuple specifying the crop box as (left, upper, right, lower).
|
2106
|
+
# - "size": Resizes the image to the specified dimensions.
|
2107
|
+
# - "thumbnail": Resizes the image to fit within the given size while preserving aspect ratio.
|
2108
|
+
# - "cover": Resizes and crops the image to fill the specified size.
|
2109
|
+
# - "contain": Resizes the image to fit within the specified size, adding borders if necessary.
|
2110
|
+
# - "fit": Resizes and pads the image to fit within the specified size.
|
2111
|
+
# - "filter": Applies various filters to the image (e.g., BLUR, CONTOUR, EDGE_ENHANCE).
|
2112
|
+
|
2113
|
+
# Note:
|
2114
|
+
# The "color" and "enhance" enhancements are not implemented in this function.
|
2115
|
+
# """
|
2116
|
+
# supported_filters = [
|
2117
|
+
# "BLUR",
|
2118
|
+
# "CONTOUR",
|
2119
|
+
# "DETAIL",
|
2120
|
+
# "EDGE_ENHANCE",
|
2121
|
+
# "EDGE_ENHANCE_MORE",
|
2122
|
+
# "EMBOSS",
|
2123
|
+
# "FIND_EDGES",
|
2124
|
+
# "SHARPEN",
|
2125
|
+
# "SMOOTH",
|
2126
|
+
# "SMOOTH_MORE",
|
2127
|
+
# "MIN_FILTER",
|
2128
|
+
# "MAX_FILTER",
|
2129
|
+
# "MODE_FILTER",
|
2130
|
+
# "MULTIBAND_FILTER",
|
2131
|
+
# "GAUSSIAN_BLUR",
|
2132
|
+
# "BOX_BLUR",
|
2133
|
+
# "MEDIAN_FILTER",
|
2134
|
+
# ]
|
2135
|
+
# print("sets: a dict,'sharp:1.2','color','contrast:'auto' or 1.2','bright', 'crop: x_upperleft,y_upperleft, x_lowerright, y_lowerright','rotation','resize','rem or background'")
|
2136
|
+
# print(f"usage: filter_kws 'dict' below:")
|
2137
|
+
# pp([str(i).lower() for i in supported_filters])
|
2138
|
+
# print("\nlog:\n")
|
2139
|
+
# def confirm_rembg_models(model_name):
|
2140
|
+
# models_support = [
|
2141
|
+
# "u2net",
|
2142
|
+
# "u2netp",
|
2143
|
+
# "u2net_human_seg",
|
2144
|
+
# "u2net_cloth_seg",
|
2145
|
+
# "silueta",
|
2146
|
+
# "isnet-general-use",
|
2147
|
+
# "isnet-anime",
|
2148
|
+
# "sam",
|
2149
|
+
# ]
|
2150
|
+
# if model_name in models_support:
|
2151
|
+
# print(f"model_name: {model_name}")
|
2152
|
+
# return model_name
|
2153
|
+
# else:
|
2154
|
+
# print(f"{model_name} cannot be found, check the name:{models_support}, default('isnet-general-use') has been used")
|
2155
|
+
# return "isnet-general-use"
|
2156
|
+
# def auto_enhance(img):
|
2157
|
+
# """
|
2158
|
+
# Automatically enhances the image based on its characteristics.
|
2159
|
+
# Args:
|
2160
|
+
# img (PIL.Image): The input image.
|
2161
|
+
# Returns:
|
2162
|
+
# dict: A dictionary containing the optimal enhancement values.
|
2163
|
+
# """
|
2164
|
+
# # Determine the bit depth based on the image mode
|
2165
|
+
# if img.mode in ["1", "L", "P", "RGB", "YCbCr", "LAB", "HSV"]:
|
2166
|
+
# # 8-bit depth per channel
|
2167
|
+
# bit_depth = 8
|
2168
|
+
# elif img.mode in ["RGBA", "CMYK"]:
|
2169
|
+
# # 8-bit depth per channel + alpha (RGBA) or additional channels (CMYK)
|
2170
|
+
# bit_depth = 8
|
2171
|
+
# elif img.mode in ["I", "F"]:
|
2172
|
+
# # 16-bit depth per channel (integer or floating-point)
|
2173
|
+
# bit_depth = 16
|
2174
|
+
# else:
|
2175
|
+
# raise ValueError("Unsupported image mode")
|
2176
|
+
# # Calculate the brightness and contrast for each channel
|
2177
|
+
# num_channels = len(img.getbands())
|
2178
|
+
# brightness_factors = []
|
2179
|
+
# contrast_factors = []
|
2180
|
+
# for channel in range(num_channels):
|
2181
|
+
# channel_histogram = img.split()[channel].histogram()
|
2182
|
+
# brightness = sum(i * w for i, w in enumerate(channel_histogram))/sum(channel_histogram)
|
2183
|
+
# channel_min, channel_max = img.split()[channel].getextrema()
|
2184
|
+
# contrast = channel_max - channel_min
|
2185
|
+
# # Adjust calculations based on bit depth
|
2186
|
+
# normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
|
2187
|
+
# brightness_factor = (1.0 + (brightness - normalization_factor / 2) / normalization_factor)
|
2188
|
+
# contrast_factor = (1.0 + (contrast - normalization_factor / 2) / normalization_factor)
|
2189
|
+
# brightness_factors.append(brightness_factor)
|
2190
|
+
# contrast_factors.append(contrast_factor)
|
2191
|
+
# # Calculate the average brightness and contrast factors across channels
|
2192
|
+
# avg_brightness_factor = sum(brightness_factors) / num_channels
|
2193
|
+
# avg_contrast_factor = sum(contrast_factors) / num_channels
|
2194
|
+
# return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
|
2195
|
+
# # Load image if input is a file path
|
2196
|
+
# if isinstance(img, str):
|
2197
|
+
# img = load_img(img)
|
2198
|
+
# img_update = img.copy()
|
2199
|
+
# # Auto-enhance image if requested
|
2200
|
+
# if auto:
|
2201
|
+
# auto_params = auto_enhance(img_update)
|
2202
|
+
# sets.update(auto_params)
|
2203
|
+
# if sets is None:
|
2204
|
+
# sets = {}
|
2205
|
+
# for k, value in sets.items():
|
2206
|
+
# if "shar" in k.lower():
|
2207
|
+
# enhancer = ImageEnhance.Sharpness(img_update)
|
2208
|
+
# img_update = enhancer.enhance(value)
|
2209
|
+
# elif "col" in k.lower() and 'bg' not in k.lower():
|
2210
|
+
# enhancer = ImageEnhance.Color(img_update)
|
2211
|
+
# img_update = enhancer.enhance(value)
|
2212
|
+
# elif "contr" in k.lower():
|
2213
|
+
# if value and isinstance(value,(float,int)):
|
2214
|
+
# enhancer = ImageEnhance.Contrast(img_update)
|
2215
|
+
# img_update = enhancer.enhance(value)
|
2216
|
+
# else:
|
2217
|
+
# print('autocontrasted')
|
2218
|
+
# img_update = ImageOps.autocontrast(img_update)
|
2219
|
+
# elif "bri" in k.lower():
|
2220
|
+
# enhancer = ImageEnhance.Brightness(img_update)
|
2221
|
+
# img_update = enhancer.enhance(value)
|
2222
|
+
# elif "cro" in k.lower() or "cut" in k.lower():
|
2223
|
+
# img_update=img_update.crop(value)
|
2224
|
+
# elif "rota" in k.lower():
|
2225
|
+
# img_update = img_update.rotate(value)
|
2226
|
+
# elif "si" in k.lower():
|
2227
|
+
# img_update = img_update.resize(value)
|
2228
|
+
# elif "thum" in k.lower():
|
2229
|
+
# img_update.thumbnail(value)
|
2230
|
+
# elif "cover" in k.lower():
|
2231
|
+
# img_update = ImageOps.cover(img_update, size=value)
|
2232
|
+
# elif "contain" in k.lower():
|
2233
|
+
# img_update = ImageOps.contain(img_update, size=value)
|
2234
|
+
# elif "fit" in k.lower():
|
2235
|
+
# img_update = ImageOps.fit(img_update, size=value)
|
2236
|
+
# elif "pad" in k.lower():
|
2237
|
+
# img_update = ImageOps.pad(img_update, size=value)
|
2238
|
+
# elif 'rem' in k.lower() or 'rm' in k.lower() or 'back' in k.lower():
|
2239
|
+
# if value and isinstance(value,(int,float,list)):
|
2240
|
+
# print('example usage: {"rm":[alpha_matting_background_threshold(20),alpha_matting_foreground_threshold(270),alpha_matting_erode_sive(11)]}')
|
2241
|
+
# print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
|
2242
|
+
# # ### Parameters:
|
2243
|
+
# # data (Union[bytes, PILImage, np.ndarray]): The input image data.
|
2244
|
+
# # alpha_matting (bool, optional): Flag indicating whether to use alpha matting. Defaults to False.
|
2245
|
+
# # alpha_matting_foreground_threshold (int, optional): Foreground threshold for alpha matting. Defaults to 240.
|
2246
|
+
# # alpha_matting_background_threshold (int, optional): Background threshold for alpha matting. Defaults to 10.
|
2247
|
+
# # alpha_matting_erode_size (int, optional): Erosion size for alpha matting. Defaults to 10.
|
2248
|
+
# # session (Optional[BaseSession], optional): A session object for the 'u2net' model. Defaults to None.
|
2249
|
+
# # only_mask (bool, optional): Flag indicating whether to return only the binary masks. Defaults to False.
|
2250
|
+
# # post_process_mask (bool, optional): Flag indicating whether to post-process the masks. Defaults to False.
|
2251
|
+
# # bgcolor (Optional[Tuple[int, int, int, int]], optional): Background color for the cutout image. Defaults to None.
|
2252
|
+
# # ###
|
2253
|
+
# if isinstance(value,int):
|
2254
|
+
# value=[value]
|
2255
|
+
# if len(value) <2:
|
2256
|
+
# img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value)
|
2257
|
+
# elif 2<=len(value)<3:
|
2258
|
+
# img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1])
|
2259
|
+
# elif 3<=len(value)<4:
|
2260
|
+
# img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1],alpha_matting_erode_size=value[2])
|
2261
|
+
# if isinstance(value,tuple): # replace the background color
|
2262
|
+
# if len(value)==3:
|
2263
|
+
# value+=(255,)
|
2264
|
+
# img_update = remove(img_update, bgcolor=value)
|
2265
|
+
# if isinstance(value,str):
|
2266
|
+
# if confirm_rembg_models(value):
|
2267
|
+
# img_update=remove(img_update,session=new_session(value))
|
2268
|
+
# else:
|
2269
|
+
# img_update=remove(img_update)
|
2270
|
+
# elif 'bgcolor' in k.lower():
|
2271
|
+
# if isinstance(value,list):
|
2272
|
+
# value=tuple(value)
|
2273
|
+
# if isinstance(value,tuple): # replace the background color
|
2274
|
+
# if len(value)==3:
|
2275
|
+
# value+=(255,)
|
2276
|
+
# img_update = remove(img_update, bgcolor=value)
|
2277
|
+
# if filter_kws:
|
2278
|
+
# for filter_name, filter_value in filter_kws.items():
|
2279
|
+
# img_update = apply_filter(img_update, filter_name, filter_value)
|
2280
|
+
# # Display the image if requested
|
2281
|
+
# if show:
|
2282
|
+
# if figsize is None:
|
2283
|
+
# plt.figure(dpi=dpi)
|
2284
|
+
# else:
|
2285
|
+
# plt.figure(figsize=figsize, dpi=dpi)
|
2286
|
+
# plt.imshow(img_update)
|
2287
|
+
# plt.axis("on") if show_axis else plt.axis("off")
|
2288
|
+
# return img_update
|
2289
|
+
|
2290
|
+
|
2291
|
+
def imgsets(img, **kwargs):
|
1861
2292
|
"""
|
1862
2293
|
Apply various enhancements and filters to an image using PIL's ImageEnhance and ImageFilter modules.
|
1863
2294
|
|
@@ -1892,28 +2323,27 @@ def imgsets(
|
|
1892
2323
|
The "color" and "enhance" enhancements are not implemented in this function.
|
1893
2324
|
"""
|
1894
2325
|
supported_filters = [
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1901
|
-
|
1902
|
-
|
1903
|
-
|
1904
|
-
|
1905
|
-
|
1906
|
-
|
1907
|
-
|
1908
|
-
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1912
|
-
|
1913
|
-
print(
|
1914
|
-
print(f"usage: filter_kws 'dict' below:")
|
1915
|
-
pp([str(i).lower() for i in supported_filters])
|
2326
|
+
"BLUR",
|
2327
|
+
"CONTOUR",
|
2328
|
+
"DETAIL",
|
2329
|
+
"EDGE_ENHANCE",
|
2330
|
+
"EDGE_ENHANCE_MORE",
|
2331
|
+
"EMBOSS",
|
2332
|
+
"FIND_EDGES",
|
2333
|
+
"SHARPEN",
|
2334
|
+
"SMOOTH",
|
2335
|
+
"SMOOTH_MORE",
|
2336
|
+
"MIN_FILTER",
|
2337
|
+
"MAX_FILTER",
|
2338
|
+
"MODE_FILTER",
|
2339
|
+
"MULTIBAND_FILTER",
|
2340
|
+
"GAUSSIAN_BLUR",
|
2341
|
+
"BOX_BLUR",
|
2342
|
+
"MEDIAN_FILTER",
|
2343
|
+
]
|
2344
|
+
print('usage: imgsets(dir_img, contrast="auto", rm=True, color=2.2)')
|
1916
2345
|
print("\nlog:\n")
|
2346
|
+
|
1917
2347
|
def confirm_rembg_models(model_name):
|
1918
2348
|
models_support = [
|
1919
2349
|
"u2net",
|
@@ -1929,8 +2359,11 @@ def imgsets(
|
|
1929
2359
|
print(f"model_name: {model_name}")
|
1930
2360
|
return model_name
|
1931
2361
|
else:
|
1932
|
-
print(
|
2362
|
+
print(
|
2363
|
+
f"{model_name} cannot be found, check the name:{models_support}, default('isnet-general-use') has been used"
|
2364
|
+
)
|
1933
2365
|
return "isnet-general-use"
|
2366
|
+
|
1934
2367
|
def auto_enhance(img):
|
1935
2368
|
"""
|
1936
2369
|
Automatically enhances the image based on its characteristics.
|
@@ -1957,48 +2390,61 @@ def imgsets(
|
|
1957
2390
|
contrast_factors = []
|
1958
2391
|
for channel in range(num_channels):
|
1959
2392
|
channel_histogram = img.split()[channel].histogram()
|
1960
|
-
brightness = sum(i * w for i, w in enumerate(channel_histogram))/sum(
|
2393
|
+
brightness = sum(i * w for i, w in enumerate(channel_histogram)) / sum(
|
2394
|
+
channel_histogram
|
2395
|
+
)
|
1961
2396
|
channel_min, channel_max = img.split()[channel].getextrema()
|
1962
2397
|
contrast = channel_max - channel_min
|
1963
2398
|
# Adjust calculations based on bit depth
|
1964
2399
|
normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
|
1965
|
-
brightness_factor = (
|
1966
|
-
|
2400
|
+
brightness_factor = (
|
2401
|
+
1.0 + (brightness - normalization_factor / 2) / normalization_factor
|
2402
|
+
)
|
2403
|
+
contrast_factor = (
|
2404
|
+
1.0 + (contrast - normalization_factor / 2) / normalization_factor
|
2405
|
+
)
|
1967
2406
|
brightness_factors.append(brightness_factor)
|
1968
2407
|
contrast_factors.append(contrast_factor)
|
1969
2408
|
# Calculate the average brightness and contrast factors across channels
|
1970
2409
|
avg_brightness_factor = sum(brightness_factors) / num_channels
|
1971
2410
|
avg_contrast_factor = sum(contrast_factors) / num_channels
|
1972
2411
|
return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
|
2412
|
+
|
1973
2413
|
# Load image if input is a file path
|
1974
2414
|
if isinstance(img, str):
|
1975
2415
|
img = load_img(img)
|
1976
2416
|
img_update = img.copy()
|
1977
2417
|
# Auto-enhance image if requested
|
2418
|
+
|
2419
|
+
auto = kwargs.get("auto", False)
|
2420
|
+
show = kwargs.get("show", True)
|
2421
|
+
show_axis = kwargs.get("show_axis", False)
|
2422
|
+
size = kwargs.get("size", None)
|
2423
|
+
figsize = kwargs.get("figsize", None)
|
2424
|
+
dpi = kwargs.get("dpi", 100)
|
2425
|
+
|
1978
2426
|
if auto:
|
1979
|
-
|
1980
|
-
|
1981
|
-
|
1982
|
-
sets = {}
|
1983
|
-
for k, value in sets.items():
|
2427
|
+
kwargs = {**auto_enhance(img_update), **kwargs}
|
2428
|
+
|
2429
|
+
for k, value in kwargs.items():
|
1984
2430
|
if "shar" in k.lower():
|
1985
2431
|
enhancer = ImageEnhance.Sharpness(img_update)
|
1986
2432
|
img_update = enhancer.enhance(value)
|
1987
|
-
elif "col" in k.lower() and
|
2433
|
+
elif "col" in k.lower() and "bg" not in k.lower():
|
1988
2434
|
enhancer = ImageEnhance.Color(img_update)
|
1989
2435
|
img_update = enhancer.enhance(value)
|
1990
2436
|
elif "contr" in k.lower():
|
1991
|
-
if value and isinstance(value,(float,int)):
|
2437
|
+
if value and isinstance(value, (float, int)):
|
1992
2438
|
enhancer = ImageEnhance.Contrast(img_update)
|
1993
2439
|
img_update = enhancer.enhance(value)
|
1994
2440
|
else:
|
1995
|
-
print(
|
2441
|
+
print("autocontrasted")
|
1996
2442
|
img_update = ImageOps.autocontrast(img_update)
|
1997
2443
|
elif "bri" in k.lower():
|
1998
2444
|
enhancer = ImageEnhance.Brightness(img_update)
|
1999
2445
|
img_update = enhancer.enhance(value)
|
2000
2446
|
elif "cro" in k.lower() or "cut" in k.lower():
|
2001
|
-
img_update=img_update.crop(value)
|
2447
|
+
img_update = img_update.crop(value)
|
2002
2448
|
elif "rota" in k.lower():
|
2003
2449
|
img_update = img_update.rotate(value)
|
2004
2450
|
elif "si" in k.lower():
|
@@ -2010,12 +2456,21 @@ def imgsets(
|
|
2010
2456
|
elif "contain" in k.lower():
|
2011
2457
|
img_update = ImageOps.contain(img_update, size=value)
|
2012
2458
|
elif "fit" in k.lower():
|
2013
|
-
|
2459
|
+
if isinstance(value, dict):
|
2460
|
+
for filter_name, filter_value in value.items():
|
2461
|
+
img_update = apply_filter(img_update, filter_name, filter_value)
|
2462
|
+
else:
|
2463
|
+
img_update = ImageOps.fit(img_update, size=value)
|
2014
2464
|
elif "pad" in k.lower():
|
2015
2465
|
img_update = ImageOps.pad(img_update, size=value)
|
2016
|
-
elif
|
2017
|
-
if
|
2018
|
-
|
2466
|
+
elif "rem" in k.lower() or "rm" in k.lower() or "back" in k.lower():
|
2467
|
+
if isinstance(value, bool):
|
2468
|
+
session = new_session("isnet-general-use")
|
2469
|
+
img_update = remove(img_update, session=session)
|
2470
|
+
elif value and isinstance(value, (int, float, list)):
|
2471
|
+
print(
|
2472
|
+
'example usage: {"rm":[alpha_matting_background_threshold(20),alpha_matting_foreground_threshold(270),alpha_matting_erode_sive(11)]}'
|
2473
|
+
)
|
2019
2474
|
print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
|
2020
2475
|
# ### Parameters:
|
2021
2476
|
# data (Union[bytes, PILImage, np.ndarray]): The input image data.
|
@@ -2028,33 +2483,45 @@ def imgsets(
|
|
2028
2483
|
# post_process_mask (bool, optional): Flag indicating whether to post-process the masks. Defaults to False.
|
2029
2484
|
# bgcolor (Optional[Tuple[int, int, int, int]], optional): Background color for the cutout image. Defaults to None.
|
2030
2485
|
# ###
|
2031
|
-
if isinstance(value,int):
|
2032
|
-
value=[value]
|
2033
|
-
if len(value) <2:
|
2034
|
-
img_update = remove(
|
2035
|
-
|
2036
|
-
|
2037
|
-
|
2038
|
-
|
2039
|
-
|
2040
|
-
|
2041
|
-
|
2486
|
+
if isinstance(value, int):
|
2487
|
+
value = [value]
|
2488
|
+
if len(value) < 2:
|
2489
|
+
img_update = remove(
|
2490
|
+
img_update,
|
2491
|
+
alpha_matting=True,
|
2492
|
+
alpha_matting_background_threshold=value,
|
2493
|
+
)
|
2494
|
+
elif 2 <= len(value) < 3:
|
2495
|
+
img_update = remove(
|
2496
|
+
img_update,
|
2497
|
+
alpha_matting=True,
|
2498
|
+
alpha_matting_background_threshold=value[0],
|
2499
|
+
alpha_matting_foreground_threshold=value[1],
|
2500
|
+
)
|
2501
|
+
elif 3 <= len(value) < 4:
|
2502
|
+
img_update = remove(
|
2503
|
+
img_update,
|
2504
|
+
alpha_matting=True,
|
2505
|
+
alpha_matting_background_threshold=value[0],
|
2506
|
+
alpha_matting_foreground_threshold=value[1],
|
2507
|
+
alpha_matting_erode_size=value[2],
|
2508
|
+
)
|
2509
|
+
elif isinstance(value, tuple): # replace the background color
|
2510
|
+
if len(value) == 3:
|
2511
|
+
value += (255,)
|
2042
2512
|
img_update = remove(img_update, bgcolor=value)
|
2043
|
-
|
2513
|
+
elif isinstance(value, str):
|
2044
2514
|
if confirm_rembg_models(value):
|
2045
|
-
img_update=remove(img_update,session=new_session(value))
|
2515
|
+
img_update = remove(img_update, session=new_session(value))
|
2046
2516
|
else:
|
2047
|
-
img_update=remove(img_update)
|
2048
|
-
elif
|
2049
|
-
if isinstance(value,list):
|
2050
|
-
value=tuple(value)
|
2051
|
-
if isinstance(value,tuple):
|
2052
|
-
if len(value)==3:
|
2053
|
-
value+=(255,)
|
2517
|
+
img_update = remove(img_update)
|
2518
|
+
elif "bg" in k.lower() and "color" in k.lower():
|
2519
|
+
if isinstance(value, list):
|
2520
|
+
value = tuple(value)
|
2521
|
+
if isinstance(value, tuple): # replace the background color
|
2522
|
+
if len(value) == 3:
|
2523
|
+
value += (255,)
|
2054
2524
|
img_update = remove(img_update, bgcolor=value)
|
2055
|
-
if filter_kws:
|
2056
|
-
for filter_name, filter_value in filter_kws.items():
|
2057
|
-
img_update = apply_filter(img_update, filter_name, filter_value)
|
2058
2525
|
# Display the image if requested
|
2059
2526
|
if show:
|
2060
2527
|
if figsize is None:
|
@@ -2062,8 +2529,18 @@ def imgsets(
|
|
2062
2529
|
else:
|
2063
2530
|
plt.figure(figsize=figsize, dpi=dpi)
|
2064
2531
|
plt.imshow(img_update)
|
2065
|
-
|
2532
|
+
if show_axis:
|
2533
|
+
plt.axis("on") # Turn on axis
|
2534
|
+
plt.minorticks_on()
|
2535
|
+
plt.grid(
|
2536
|
+
which="both", linestyle="--", linewidth=0.5, color="gray", alpha=0.7
|
2537
|
+
)
|
2538
|
+
|
2539
|
+
else:
|
2540
|
+
plt.axis("off") # Turn off axis
|
2066
2541
|
return img_update
|
2542
|
+
|
2543
|
+
|
2067
2544
|
# # usage:
|
2068
2545
|
# img = imgsets(
|
2069
2546
|
# fpath,
|
@@ -2074,26 +2551,26 @@ def imgsets(
|
|
2074
2551
|
# )
|
2075
2552
|
|
2076
2553
|
|
2077
|
-
def thumbnail(dir_img_list,figsize=(10,10),dpi=100, dir_save=None, kind=
|
2554
|
+
def thumbnail(dir_img_list, figsize=(10, 10), dpi=100, dir_save=None, kind=".png"):
|
2078
2555
|
"""
|
2079
2556
|
Display a thumbnail figure of all images in the specified directory.
|
2080
2557
|
Args:
|
2081
2558
|
dir_img_list (list): List of the Directory containing the images.
|
2082
2559
|
"""
|
2083
2560
|
num_images = len(dir_img_list)
|
2084
|
-
if not kind.startswith(
|
2085
|
-
kind=
|
2561
|
+
if not kind.startswith("."):
|
2562
|
+
kind = "." + kind
|
2086
2563
|
|
2087
2564
|
if num_images == 0:
|
2088
2565
|
print("No images found to display.")
|
2089
2566
|
return
|
2090
|
-
grid_size = int(num_images
|
2091
|
-
fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize,dpi=dpi)
|
2567
|
+
grid_size = int(num_images**0.5) + 1 # Determine grid size
|
2568
|
+
fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize, dpi=dpi)
|
2092
2569
|
for ax, image_file in zip(axs.flatten(), dir_img_list):
|
2093
2570
|
try:
|
2094
2571
|
img = Image.open(image_file)
|
2095
2572
|
ax.imshow(img)
|
2096
|
-
ax.axis(
|
2573
|
+
ax.axis("off")
|
2097
2574
|
except:
|
2098
2575
|
continue
|
2099
2576
|
# for ax in axs.flatten():
|
@@ -2104,13 +2581,15 @@ def thumbnail(dir_img_list,figsize=(10,10),dpi=100, dir_save=None, kind='.png'):
|
|
2104
2581
|
plt.show()
|
2105
2582
|
else:
|
2106
2583
|
if basename(dir_save):
|
2107
|
-
fname= basename(dir_save) +kind
|
2584
|
+
fname = basename(dir_save) + kind
|
2108
2585
|
else:
|
2109
|
-
fname= "_thumbnail_"+basename(dirname(dir_save)[:-1])+
|
2586
|
+
fname = "_thumbnail_" + basename(dirname(dir_save)[:-1]) + ".png"
|
2110
2587
|
if dirname(dir_img_list[0]) == dirname(dir_save):
|
2111
|
-
figsave(dirname(dir_save[:-1]),fname)
|
2588
|
+
figsave(dirname(dir_save[:-1]), fname)
|
2112
2589
|
else:
|
2113
|
-
figsave(dirname(dir_save),fname)
|
2590
|
+
figsave(dirname(dir_save), fname)
|
2591
|
+
|
2592
|
+
|
2114
2593
|
# usage:
|
2115
2594
|
# fpath = "/Users/macjianfeng/Dropbox/github/python/py2ls/tests/xample_netfinder/images/"
|
2116
2595
|
# thumbnail(listdir(fpath,'png').fpath.to_list(),dir_save=dirname(fpath))
|
@@ -2127,6 +2606,8 @@ def read_mplstyle(style_file):
|
|
2127
2606
|
for i, j in style_dict.items():
|
2128
2607
|
print(f"\n{i}::::{j}")
|
2129
2608
|
return style_dict
|
2609
|
+
|
2610
|
+
|
2130
2611
|
# #example usage:
|
2131
2612
|
# style_file = "/ std-colors.mplstyle"
|
2132
2613
|
# style_dict = read_mplstyle(style_file)
|
@@ -2150,63 +2631,10 @@ def dir_lib(lib_oi):
|
|
2150
2631
|
else:
|
2151
2632
|
print(f"Cannot find the {lib_oi} in site-packages directory.")
|
2152
2633
|
return dir_list
|
2153
|
-
# example usage:
|
2154
|
-
# dir_lib("seaborn")
|
2155
|
-
|
2156
|
-
|
2157
|
-
# set up the colorlist, give the number, or the colormap's name
|
2158
|
-
def get_color(n=1, cmap="auto", by="start"):
|
2159
|
-
# Extract the colormap as a list
|
2160
|
-
def cmap2hex(cmap_name):
|
2161
|
-
cmap_ = matplotlib.pyplot.get_cmap(cmap_name)
|
2162
|
-
colors = [cmap_(i) for i in range(cmap_.N)]
|
2163
|
-
return [matplotlib.colors.rgb2hex(color) for color in colors]
|
2164
|
-
# usage: clist = cmap2hex("viridis")
|
2165
|
-
# cycle times, total number is n (defaultn=10)
|
2166
|
-
def cycle2list(colorlist, n=10):
|
2167
|
-
cycler_ = cycler(tmp=colorlist)
|
2168
|
-
clist = []
|
2169
|
-
for i, c_ in zip(range(n), cycler_()):
|
2170
|
-
clist.append(c_["tmp"])
|
2171
|
-
if i > n:
|
2172
|
-
break
|
2173
|
-
return clist
|
2174
|
-
def hue2rgb(hex_colors):
|
2175
|
-
def hex_to_rgb(hex_color):
|
2176
|
-
"""Converts a hexadecimal color code to RGB values."""
|
2177
|
-
if hex_colors.startswith("#"):
|
2178
|
-
hex_color = hex_color.lstrip("#")
|
2179
|
-
return tuple(int(hex_color[i : i + 2], 16) / 255.0 for i in (0, 2, 4))
|
2180
|
-
if isinstance(hex_colors, str):
|
2181
|
-
return hex_to_rgb(hex_colors)
|
2182
|
-
elif isinstance(hex_colors, (list)):
|
2183
|
-
"""Converts a list of hexadecimal color codes to a list of RGB values."""
|
2184
|
-
rgb_values = [hex_to_rgb(hex_color) for hex_color in hex_colors]
|
2185
|
-
return rgb_values
|
2186
|
-
if "aut" in cmap:
|
2187
|
-
colorlist = [
|
2188
|
-
"#474747",
|
2189
|
-
"#FF2C00",
|
2190
|
-
"#0C5DA5",
|
2191
|
-
"#845B97",
|
2192
|
-
"#58BBCC",
|
2193
|
-
"#FF9500",
|
2194
|
-
"#D57DBE",
|
2195
|
-
]
|
2196
|
-
else:
|
2197
|
-
colorlist = cmap2hex(cmap)
|
2198
|
-
if "st" in by.lower() or "be" in by.lower():
|
2199
|
-
# cycle it
|
2200
|
-
clist = cycle2list(colorlist, n=n)
|
2201
|
-
if "l" in by.lower() or "p" in by.lower():
|
2202
|
-
clist = []
|
2203
|
-
[
|
2204
|
-
clist.append(colorlist[i])
|
2205
|
-
for i in [int(i) for i in np.linspace(0, len(colorlist) - 1, n)]
|
2206
|
-
]
|
2207
2634
|
|
2208
|
-
|
2209
|
-
|
2635
|
+
|
2636
|
+
# example usage:
|
2637
|
+
# dir_lib("seaborn")
|
2210
2638
|
|
2211
2639
|
"""
|
2212
2640
|
# n = 7
|
@@ -2222,7 +2650,18 @@ def get_color(n=1, cmap="auto", by="start"):
|
|
2222
2650
|
|
2223
2651
|
|
2224
2652
|
class FileInfo:
|
2225
|
-
def __init__(
|
2653
|
+
def __init__(
|
2654
|
+
self,
|
2655
|
+
size,
|
2656
|
+
creation_time,
|
2657
|
+
ctime,
|
2658
|
+
mod_time,
|
2659
|
+
mtime,
|
2660
|
+
parent_dir,
|
2661
|
+
fname,
|
2662
|
+
kind,
|
2663
|
+
extra_info=None,
|
2664
|
+
):
|
2226
2665
|
self.size = size
|
2227
2666
|
self.creation_time = creation_time
|
2228
2667
|
self.ctime = ctime
|
@@ -2237,20 +2676,25 @@ class FileInfo:
|
|
2237
2676
|
print("to show the res: 'finfo(fpath).show()'")
|
2238
2677
|
|
2239
2678
|
def __repr__(self):
|
2240
|
-
return (
|
2241
|
-
|
2242
|
-
|
2679
|
+
return (
|
2680
|
+
f"FileInfo(size={self.size} MB, creation_time='{self.creation_time}', "
|
2681
|
+
f"ctime='{self.ctime}', mod_time='{self.mod_time}', mtime='{self.mtime}', "
|
2682
|
+
f"parent_dir='{self.parent_dir}', fname='{self.fname}', kind='{self.kind}')"
|
2683
|
+
)
|
2243
2684
|
|
2244
2685
|
def __str__(self):
|
2245
|
-
return (
|
2246
|
-
|
2247
|
-
|
2248
|
-
|
2249
|
-
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
|
2686
|
+
return (
|
2687
|
+
f"FileInfo:\n"
|
2688
|
+
f" Size: {self.size} MB\n"
|
2689
|
+
f" Creation Time: {self.creation_time}\n"
|
2690
|
+
f" CTime: {self.ctime}\n"
|
2691
|
+
f" Modification Time: {self.mod_time}\n"
|
2692
|
+
f" MTime: {self.mtime}\n"
|
2693
|
+
f" Parent Directory: {self.parent_dir}\n"
|
2694
|
+
f" File Name: {self.fname}\n"
|
2695
|
+
f" Kind: {self.kind}"
|
2696
|
+
)
|
2697
|
+
|
2254
2698
|
def show(self):
|
2255
2699
|
# Convert the object to a dictionary
|
2256
2700
|
return {
|
@@ -2262,12 +2706,27 @@ class FileInfo:
|
|
2262
2706
|
"parent_dir": self.parent_dir,
|
2263
2707
|
"fname": self.fname,
|
2264
2708
|
"kind": self.kind,
|
2265
|
-
**{
|
2709
|
+
**{
|
2710
|
+
key: getattr(self, key)
|
2711
|
+
for key in vars(self)
|
2712
|
+
if key
|
2713
|
+
not in [
|
2714
|
+
"size",
|
2715
|
+
"creation_time",
|
2716
|
+
"ctime",
|
2717
|
+
"mod_time",
|
2718
|
+
"mtime",
|
2719
|
+
"parent_dir",
|
2720
|
+
"fname",
|
2721
|
+
"kind",
|
2722
|
+
]
|
2723
|
+
},
|
2266
2724
|
}
|
2267
2725
|
|
2726
|
+
|
2268
2727
|
def finfo(fpath):
|
2269
2728
|
fname, fmt = os.path.splitext(fpath)
|
2270
|
-
dir_par = os.path.dirname(fpath) +
|
2729
|
+
dir_par = os.path.dirname(fpath) + "/"
|
2271
2730
|
data = {
|
2272
2731
|
"size": round(os.path.getsize(fpath) / 1024 / 1024, 3),
|
2273
2732
|
"creation_time": time.ctime(os.path.getctime(fpath)),
|
@@ -2276,12 +2735,12 @@ def finfo(fpath):
|
|
2276
2735
|
"mtime": time.ctime(os.path.getmtime(fpath)),
|
2277
2736
|
"parent_dir": dir_par,
|
2278
2737
|
"fname": fname.replace(dir_par, ""),
|
2279
|
-
"kind": fmt
|
2738
|
+
"kind": fmt,
|
2280
2739
|
}
|
2281
2740
|
extra_info = {}
|
2282
2741
|
if data["kind"] == ".pdf":
|
2283
2742
|
extra_info = pdfinfo_from_path(fpath)
|
2284
|
-
|
2743
|
+
|
2285
2744
|
return FileInfo(
|
2286
2745
|
size=data["size"],
|
2287
2746
|
creation_time=data["creation_time"],
|
@@ -2291,5 +2750,5 @@ def finfo(fpath):
|
|
2291
2750
|
parent_dir=data["parent_dir"],
|
2292
2751
|
fname=data["fname"],
|
2293
2752
|
kind=data["kind"],
|
2294
|
-
extra_info=extra_info
|
2295
|
-
)
|
2753
|
+
extra_info=extra_info,
|
2754
|
+
)
|