py2ls 0.1.8.0__py3-none-any.whl → 0.1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py CHANGED
@@ -4,23 +4,23 @@ import pandas as pd
4
4
  import json
5
5
  import matplotlib
6
6
  import matplotlib.pyplot as plt
7
- import matplotlib.ticker as tck
7
+ import matplotlib.ticker as tck
8
8
  from cycler import cycler
9
9
  from mpl_toolkits.mplot3d import Axes3D
10
10
  import seaborn as sns
11
11
 
12
- import sys, os,shutil,re, yaml,json,subprocess
12
+ import sys, os, shutil, re, yaml, json, subprocess
13
13
  import importlib.util
14
14
  import time
15
15
  from dateutil import parser
16
16
  from datetime import datetime
17
17
 
18
- from PIL import Image,ImageEnhance, ImageOps,ImageFilter
19
- from rembg import remove,new_session
18
+ from PIL import Image, ImageEnhance, ImageOps, ImageFilter
19
+ from rembg import remove, new_session
20
20
 
21
21
  import docx
22
- from fpdf import FPDF
23
- from lxml import etree
22
+ from fpdf import FPDF
23
+ from lxml import etree
24
24
  from docx import Document
25
25
  from PyPDF2 import PdfReader
26
26
  from pptx import Presentation
@@ -40,24 +40,26 @@ from tqdm import tqdm
40
40
  import mimetypes
41
41
  from pprint import pp
42
42
  from collections import Counter
43
- from fuzzywuzzy import fuzz,process
43
+ from fuzzywuzzy import fuzz, process
44
44
  from langdetect import detect
45
45
  from duckduckgo_search import DDGS
46
46
 
47
47
  from py2ls import netfinder
48
48
 
49
49
  try:
50
- get_ipython().run_line_magic('load_ext', 'autoreload')
51
- get_ipython().run_line_magic('autoreload', '2')
50
+ get_ipython().run_line_magic("load_ext", "autoreload")
51
+ get_ipython().run_line_magic("autoreload", "2")
52
52
  except NameError:
53
53
  pass
54
54
 
55
+
55
56
  def is_package_installed(package_name):
56
57
  """Check if a package is installed."""
57
58
  package_spec = importlib.util.find_spec(package_name)
58
59
  return package_spec is not None
59
-
60
- def upgrade(module='py2ls'):
60
+
61
+
62
+ def upgrade(module="py2ls"):
61
63
  # try:
62
64
  # pkg_resources.get_distribution(module)
63
65
  # except pkg_resources.DistributionNotFound:
@@ -68,37 +70,45 @@ def upgrade(module='py2ls'):
68
70
  except subprocess.CalledProcessError as e:
69
71
  print(f"An error occurred while installing {module}: {e}")
70
72
  try:
71
- subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", module])
73
+ subprocess.check_call(
74
+ [sys.executable, "-m", "pip", "install", "--upgrade", module]
75
+ )
72
76
  except subprocess.CalledProcessError as e:
73
77
  print(f"An error occurred while upgrading py2ls: {e}")
74
78
 
75
79
 
76
- dir_save='/Users/macjianfeng/Dropbox/Downloads/'
80
+ dir_save = "/Users/macjianfeng/Dropbox/Downloads/"
77
81
 
78
82
 
79
83
  def get_version(pkg):
80
84
  import importlib.metadata
85
+
81
86
  def get_v(pkg_name):
82
87
  try:
83
88
  version = importlib.metadata.version(pkg_name)
84
89
  print(f"version {pkg_name} == {version}")
85
90
  except importlib.metadata.PackageNotFoundError:
86
91
  print(f"Package '{pkg_name}' not found")
87
- if isinstance(pkg,str):
92
+
93
+ if isinstance(pkg, str):
88
94
  get_v(pkg)
89
- elif isinstance(pkg,list):
95
+ elif isinstance(pkg, list):
90
96
  [get_v(pkg_) for pkg_ in pkg]
91
- # usage:
97
+
98
+
99
+ # usage:
92
100
  # get_version(['pandas','numpy','py2ls'])
93
-
101
+
102
+
94
103
  def rm_folder(folder_path, verbose=True):
95
104
  try:
96
105
  shutil.rmtree(folder_path)
97
106
  if verbose:
98
- print(f'Successfully deleted {folder_path}')
107
+ print(f"Successfully deleted {folder_path}")
99
108
  except Exception as e:
100
109
  if verbose:
101
- print(f'Failed to delete {folder_path}. Reason: {e}')
110
+ print(f"Failed to delete {folder_path}. Reason: {e}")
111
+
102
112
 
103
113
  def fremove(path, verbose=True):
104
114
  """
@@ -111,20 +121,20 @@ def fremove(path, verbose=True):
111
121
  if os.path.isdir(path):
112
122
  shutil.rmtree(path)
113
123
  if verbose:
114
- print(f'Successfully deleted folder {path}')
124
+ print(f"Successfully deleted folder {path}")
115
125
  elif os.path.isfile(path):
116
126
  os.remove(path)
117
127
  if verbose:
118
- print(f'Successfully deleted file {path}')
128
+ print(f"Successfully deleted file {path}")
119
129
  else:
120
130
  if verbose:
121
- print(f'Path {path} does not exist')
131
+ print(f"Path {path} does not exist")
122
132
  except Exception as e:
123
133
  if verbose:
124
- print(f'Failed to delete {path}. Reason: {e}')
134
+ print(f"Failed to delete {path}. Reason: {e}")
125
135
 
126
136
 
127
- def get_cwd(verbose:bool = True):
137
+ def get_cwd(verbose: bool = True):
128
138
  """
129
139
  get_cwd: to get the current working directory
130
140
  Args:
@@ -138,26 +148,39 @@ def get_cwd(verbose:bool = True):
138
148
  # This works in an interactive environment (like a Jupyter notebook)
139
149
  script_dir = os.getcwd()
140
150
  if verbose:
141
- print("os.getcwd():", script_dir)
151
+ print("os.getcwd():", script_dir)
142
152
  return script_dir
143
153
 
144
- def search(query, limit=5, kind='text', output='df',verbose=False,download=True, dir_save=dir_save):
154
+
155
+ def search(
156
+ query,
157
+ limit=5,
158
+ kind="text",
159
+ output="df",
160
+ verbose=False,
161
+ download=True,
162
+ dir_save=dir_save,
163
+ ):
145
164
  from duckduckgo_search import DDGS
146
- if 'te' in kind.lower():
165
+
166
+ if "te" in kind.lower():
147
167
  results = DDGS().text(query, max_results=limit)
148
- res=pd.DataFrame(results)
149
- res.rename(columns={"href":"links"},inplace=True)
168
+ res = pd.DataFrame(results)
169
+ res.rename(columns={"href": "links"}, inplace=True)
150
170
  if verbose:
151
171
  print(f'searching "{query}": got the results below\n{res}')
152
172
  if download:
153
173
  try:
154
- netfinder.downloader(url=res.links.tolist(), dir_save=dir_save, verbose=verbose)
174
+ netfinder.downloader(
175
+ url=res.links.tolist(), dir_save=dir_save, verbose=verbose
176
+ )
155
177
  except:
156
178
  if verbose:
157
179
  print(f"failed link")
158
180
  return res
159
181
 
160
- def echo(*args,**kwargs):
182
+
183
+ def echo(*args, **kwargs):
161
184
  """
162
185
  query, model="gpt", verbose=True, log=True, dir_save=dir_save
163
186
  a ai chat tool
@@ -172,12 +195,12 @@ def echo(*args,**kwargs):
172
195
  str: the answer from ai
173
196
  """
174
197
  global dir_save
175
-
176
- query=None
177
- model=kwargs.get('model', 'gpt')
178
- verbose=kwargs.get('verbose', True)
179
- log=kwargs.get('log', True)
180
- dir_save=kwargs.get('dir_save', dir_save)
198
+
199
+ query = None
200
+ model = kwargs.get("model", "gpt")
201
+ verbose = kwargs.get("verbose", True)
202
+ log = kwargs.get("log", True)
203
+ dir_save = kwargs.get("dir_save", dir_save)
181
204
  for arg in args:
182
205
  if isinstance(arg, str):
183
206
  if os.path.isdir(arg):
@@ -191,15 +214,17 @@ def echo(*args,**kwargs):
191
214
  elif isinstance(arg, dict):
192
215
  verbose = arg.get("verbose", verbose)
193
216
  log = arg.get("log", log)
217
+
194
218
  def is_in_any(str_candi_short, str_full, ignore_case=True):
195
219
  if isinstance(str_candi_short, str):
196
- str_candi_short=[str_candi_short]
197
- res_bool=[]
220
+ str_candi_short = [str_candi_short]
221
+ res_bool = []
198
222
  if ignore_case:
199
- [res_bool.append(i in str_full.lower()) for i in str_candi_short ]
223
+ [res_bool.append(i in str_full.lower()) for i in str_candi_short]
200
224
  else:
201
- [res_bool.append(i in str_full) for i in str_candi_short ]
225
+ [res_bool.append(i in str_full) for i in str_candi_short]
202
226
  return any(res_bool)
227
+
203
228
  def valid_mod_name(str_fly):
204
229
  if is_in_any(str_fly, "claude-3-haiku"):
205
230
  return "claude-3-haiku"
@@ -210,49 +235,56 @@ def echo(*args,**kwargs):
210
235
  elif is_in_any(str_fly, "mixtral-8x7b"):
211
236
  return "mixtral-8x7b"
212
237
  else:
213
- print(f"not support your model{model}, supported models: 'claude','gpt(default)', 'llama','mixtral'")
214
- return "gpt-3.5" # default model
238
+ print(
239
+ f"not support your model{model}, supported models: 'claude','gpt(default)', 'llama','mixtral'"
240
+ )
241
+ return "gpt-3.5" # default model
242
+
215
243
  model_valid = valid_mod_name(model)
216
- res=DDGS().chat(query, model=model_valid)
244
+ res = DDGS().chat(query, model=model_valid)
217
245
  if verbose:
218
246
  pp(res)
219
247
  if log:
220
- dt_str=datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S')
248
+ dt_str = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d_%H:%M:%S")
221
249
  res_ = f"\n\n####Q:{query}\n\n#####Ans:{dt_str}\n\n>{res}\n"
222
250
  if bool(os.path.basename(dir_save)):
223
251
  fpath = dir_save
224
252
  else:
225
253
  os.makedirs(dir_save, exist_ok=True)
226
254
  fpath = os.path.join(dir_save, f"log_ai.md")
227
- fupdate(fpath=fpath,content=res_)
255
+ fupdate(fpath=fpath, content=res_)
228
256
  print(f"log file:{fpath}")
229
257
  return res
230
258
 
259
+
231
260
  def chat(*args, **kwargs):
232
261
  return echo(*args, **kwargs)
233
262
 
263
+
234
264
  def ai(*args, **kwargs):
235
265
  return echo(*args, **kwargs)
236
266
 
237
- def detect_lang(text, output='lang',verbose=True):
238
- dir_curr_script=os.path.dirname(os.path.abspath(__file__))
239
- dir_lang_code=dir_curr_script+"/data/lang_code_iso639.json"
240
- print(dir_curr_script,os.getcwd(),dir_lang_code)
241
- lang_code_iso639=fload(dir_lang_code)
242
- l_lang,l_code = [],[]
243
- [[l_lang.append(v),l_code.append(k)] for v,k in lang_code_iso639.items()]
267
+
268
+ def detect_lang(text, output="lang", verbose=True):
269
+ dir_curr_script = os.path.dirname(os.path.abspath(__file__))
270
+ dir_lang_code = dir_curr_script + "/data/lang_code_iso639.json"
271
+ print(dir_curr_script, os.getcwd(), dir_lang_code)
272
+ lang_code_iso639 = fload(dir_lang_code)
273
+ l_lang, l_code = [], []
274
+ [[l_lang.append(v), l_code.append(k)] for v, k in lang_code_iso639.items()]
244
275
  try:
245
276
  if is_text(text):
246
- code_detect=detect(text)
247
- if 'c' in output.lower(): # return code
248
- return l_code[strcmp(code_detect,l_code, verbose=verbose)[1]]
277
+ code_detect = detect(text)
278
+ if "c" in output.lower(): # return code
279
+ return l_code[strcmp(code_detect, l_code, verbose=verbose)[1]]
249
280
  else:
250
- return l_lang[strcmp(code_detect,l_code, verbose=verbose)[1]]
281
+ return l_lang[strcmp(code_detect, l_code, verbose=verbose)[1]]
251
282
  else:
252
283
  print(f"{text} is not supported")
253
- return 'no'
284
+ return "no"
254
285
  except:
255
- return 'no'
286
+ return "no"
287
+
256
288
 
257
289
  def is_text(s):
258
290
  has_alpha = any(char.isalpha() for char in s)
@@ -260,7 +292,8 @@ def is_text(s):
260
292
  # no_special = not re.search(r'[^A-Za-z0-9\s]', s)
261
293
  return has_alpha and has_non_alpha
262
294
 
263
- def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR'):
295
+
296
+ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer="WR"):
264
297
  """
265
298
  Compares a search term with a list of candidate strings and finds the best match based on similarity score.
266
299
 
@@ -273,21 +306,23 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR')
273
306
  Returns:
274
307
  tuple: A tuple containing the best match and its index in the candidates list.
275
308
  """
309
+
276
310
  def to_lower(s, ignore_case=True):
277
- #Converts a string or list of strings to lowercase if ignore_case is True.
311
+ # Converts a string or list of strings to lowercase if ignore_case is True.
278
312
  if ignore_case:
279
313
  if isinstance(s, str):
280
314
  return s.lower()
281
315
  elif isinstance(s, list):
282
316
  return [elem.lower() for elem in s]
283
317
  return s
284
- str1_,str2_ = to_lower(search_term, ignore_case),to_lower(candidates, ignore_case)
318
+
319
+ str1_, str2_ = to_lower(search_term, ignore_case), to_lower(candidates, ignore_case)
285
320
  if isinstance(str2_, list):
286
- if 'part' in scorer.lower():
321
+ if "part" in scorer.lower():
287
322
  similarity_scores = [fuzz.partial_ratio(str1_, word) for word in str2_]
288
- elif 'W' in scorer.lower():
323
+ elif "W" in scorer.lower():
289
324
  similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
290
- elif 'Ratio' in scorer.lower():
325
+ elif "Ratio" in scorer.lower():
291
326
  similarity_scores = [fuzz.Ratio(str1_, word) for word in str2_]
292
327
  else:
293
328
  similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
@@ -295,11 +330,11 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR')
295
330
  best_match_score = similarity_scores[best_match_index]
296
331
  else:
297
332
  best_match_index = 0
298
- if 'part' in scorer.lower():
333
+ if "part" in scorer.lower():
299
334
  best_match_score = fuzz.partial_ratio(str1_, str2_)
300
- elif 'W' in scorer.lower():
335
+ elif "W" in scorer.lower():
301
336
  best_match_score = fuzz.WRatio(str1_, str2_)
302
- elif 'Ratio' in scorer.lower():
337
+ elif "Ratio" in scorer.lower():
303
338
  best_match_score = fuzz.Ratio(str1_, str2_)
304
339
  else:
305
340
  best_match_score = fuzz.WRatio(str1_, str2_)
@@ -309,11 +344,13 @@ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR')
309
344
  print(f"建议: {best_match}")
310
345
  return candidates[best_match_index], best_match_index
311
346
 
347
+
312
348
  # Example usaged
313
349
  # str1 = "plos biology"
314
350
  # str2 = ['PLoS Computational Biology', 'PLOS BIOLOGY']
315
351
  # best_match, idx = strcmp(str1, str2, ignore_case=1)
316
352
 
353
+
317
354
  def counter(list_, verbose=True):
318
355
  c = Counter(list_)
319
356
  # Print the name counts
@@ -321,14 +358,15 @@ def counter(list_, verbose=True):
321
358
  if verbose:
322
359
  print(f"{item}: {count}")
323
360
  return c
361
+
362
+
324
363
  # usage:
325
364
  # print(f"Return an iterator over elements repeating each as many times as its count:\n{sorted(c.elements())}")
326
365
  # print(f"Return a list of the n most common elements:\n{c.most_common()}")
327
366
  # print(f"Compute the sum of the counts:\n{c.total()}")
328
367
 
329
368
 
330
-
331
- def str2time(time_str, fmt='24'):
369
+ def str2time(time_str, fmt="24"):
332
370
  """
333
371
  Convert a time string into the specified format.
334
372
  Parameters:
@@ -342,42 +380,47 @@ def str2time(time_str, fmt='24'):
342
380
  %p represents AM or PM.
343
381
  - str: The converted time string.
344
382
  """
383
+
345
384
  def time_len_corr(time_str):
346
- time_str_= ssplit(time_str,by=[':'," ","digital_num"]) if ':' in time_str else None
347
- time_str_split=[]
348
- [time_str_split.append(i) for i in time_str_ if is_num(i)]
385
+ time_str_ = (
386
+ ssplit(time_str, by=[":", " ", "digital_num"]) if ":" in time_str else None
387
+ )
388
+ time_str_split = []
389
+ [time_str_split.append(i) for i in time_str_ if is_num(i)]
349
390
  if time_str_split:
350
- if len(time_str_split)==2:
351
- H,M=time_str_split
352
- time_str_full=H+":"+M+":00"
353
- elif len(time_str_split)==3:
354
- H,M,S=time_str_split
355
- time_str_full=H+":"+M+":"+S
391
+ if len(time_str_split) == 2:
392
+ H, M = time_str_split
393
+ time_str_full = H + ":" + M + ":00"
394
+ elif len(time_str_split) == 3:
395
+ H, M, S = time_str_split
396
+ time_str_full = H + ":" + M + ":" + S
356
397
  else:
357
- time_str_full=time_str_
358
- if 'am' in time_str.lower():
359
- time_str_full+=" AM"
360
- elif "pm"in time_str.lower():
361
- time_str_full +=" PM"
398
+ time_str_full = time_str_
399
+ if "am" in time_str.lower():
400
+ time_str_full += " AM"
401
+ elif "pm" in time_str.lower():
402
+ time_str_full += " PM"
362
403
  return time_str_full
363
- if '12' in fmt:
404
+
405
+ if "12" in fmt:
364
406
  fmt = "%I:%M:%S %p"
365
- elif '24' in fmt:
407
+ elif "24" in fmt:
366
408
  fmt = "%H:%M:%S"
367
409
 
368
410
  try:
369
411
  # Try to parse the time string assuming it could be in 24-hour or 12-hour format
370
- time_obj = datetime.strptime(time_len_corr(time_str), '%H:%M:%S')
412
+ time_obj = datetime.strptime(time_len_corr(time_str), "%H:%M:%S")
371
413
  except ValueError:
372
414
  try:
373
- time_obj = datetime.strptime(time_len_corr(time_str), '%I:%M:%S %p')
415
+ time_obj = datetime.strptime(time_len_corr(time_str), "%I:%M:%S %p")
374
416
  except ValueError as e:
375
417
  raise ValueError(f"Unable to parse time string: {time_str}. Error: {e}")
376
-
418
+
377
419
  # Format the time object to the desired output format
378
420
  formatted_time = time_obj.strftime(fmt)
379
421
  return formatted_time
380
422
 
423
+
381
424
  # # Example usage:
382
425
  # time_str1 = "14:30:45"
383
426
  # time_str2 = "02:30:45 PM"
@@ -388,7 +431,8 @@ def str2time(time_str, fmt='24'):
388
431
  # print(formatted_time1) # Output: 02:30:45 PM
389
432
  # print(formatted_time2) # Output: 14:30:45
390
433
 
391
- def str2date(date_str, fmt='%Y-%m-%d_%H:%M:%S'):
434
+
435
+ def str2date(date_str, fmt="%Y-%m-%d_%H:%M:%S"):
392
436
  """
393
437
  Convert a date string into the specified format.
394
438
  Parameters:
@@ -404,11 +448,14 @@ def str2date(date_str, fmt='%Y-%m-%d_%H:%M:%S'):
404
448
  # Format the date object to the desired output format
405
449
  formatted_date = date_obj.strftime(fmt)
406
450
  return formatted_date
451
+
452
+
407
453
  # str1=str2date(num2str(20240625),fmt="%a %d-%B-%Y")
408
454
  # print(str1)
409
455
  # str2=str2num(str2date(str1,fmt='%a %Y%m%d'))
410
456
  # print(str2)
411
457
 
458
+
412
459
  def str2num(s, *args):
413
460
  delimiter = None
414
461
  round_digits = None
@@ -425,11 +472,11 @@ def str2num(s, *args):
425
472
  except ValueError:
426
473
  try:
427
474
  numerized = numerize(s)
428
- num = int(numerized) if '.' not in numerized else float(numerized)
475
+ num = int(numerized) if "." not in numerized else float(numerized)
429
476
  except Exception as e:
430
477
  # Attempt to handle multiple number segments
431
478
  try:
432
- number_segments = ssplit(s,by='number_strings')
479
+ number_segments = ssplit(s, by="number_strings")
433
480
  nums = []
434
481
  for segment in number_segments:
435
482
  try:
@@ -439,7 +486,9 @@ def str2num(s, *args):
439
486
  if len(nums) == 1:
440
487
  num = nums[0]
441
488
  else:
442
- raise ValueError("Multiple number segments found, cannot determine single numeric value")
489
+ raise ValueError(
490
+ "Multiple number segments found, cannot determine single numeric value"
491
+ )
443
492
  except Exception as e:
444
493
  raise ValueError(f"Cannot convert {s} to a number: {e}")
445
494
 
@@ -454,6 +503,8 @@ def str2num(s, *args):
454
503
  return num_str
455
504
 
456
505
  return num
506
+
507
+
457
508
  # Examples
458
509
  # print(str2num("123")) # Output: 123
459
510
  # print(str2num("123.456", 2)) # Output: 123.46
@@ -495,13 +546,15 @@ def num2str(num, *args):
495
546
  num_str = "{:,}".format(int(num_str_parts[0]))
496
547
 
497
548
  return num_str
549
+
550
+
498
551
  # Examples
499
552
  # print(num2str(123),type(num2str(123))) # Output: "123"
500
553
  # print(num2str(123.456, 2),type(num2str(123.456, 2))) # Output: "123.46"
501
554
  # print(num2str(7000.125, 2),type(num2str(7000.125, 2))) # Output: "7000.13"
502
555
  # print(num2str(12345.6789, ","),type(num2str(12345.6789, ","))) # Output: "12,345.6789"
503
556
  # print(num2str(7000.00, ","),type(num2str(7000.00, ","))) # Output: "7,000.00"
504
- def sreplace(*args,**kwargs):
557
+ def sreplace(*args, **kwargs):
505
558
  """
506
559
  sreplace(text, by=None, robust=True)
507
560
  Replace specified substrings in the input text with provided replacements.
@@ -515,19 +568,19 @@ def sreplace(*args,**kwargs):
515
568
  str: The text after replacements have been made.
516
569
  """
517
570
  text = None
518
- by = kwargs.get('by', None)
519
- robust = kwargs.get('robust', True)
520
-
571
+ by = kwargs.get("by", None)
572
+ robust = kwargs.get("robust", True)
573
+
521
574
  for arg in args:
522
- if isinstance(arg,str):
523
- text=arg
524
- elif isinstance(arg,dict):
525
- by=arg
526
- elif isinstance(arg,bool):
527
- robust=arg
575
+ if isinstance(arg, str):
576
+ text = arg
577
+ elif isinstance(arg, dict):
578
+ by = arg
579
+ elif isinstance(arg, bool):
580
+ robust = arg
528
581
  else:
529
582
  Error(f"{type(arg)} is not supported")
530
-
583
+
531
584
  # Default replacements for newline and tab characters
532
585
  default_replacements = {
533
586
  "\a": "",
@@ -558,47 +611,76 @@ def sreplace(*args,**kwargs):
558
611
  for k, v in by.items():
559
612
  text = text.replace(k, v)
560
613
  return text
614
+
615
+
561
616
  # usage:
562
617
  # sreplace(text, by=dict(old_str='new_str'), robust=True)
563
618
 
564
- def paper_size(paper_type_str='a4'):
565
- df=pd.DataFrame({'a0':[841,1189],'a1':[594,841],'a2':[420,594],'a3':[297,420],'a4':[210,297],'a5':[148,210],'a6':[105,148],'a7':[74,105],
566
- 'b0':[1028,1456],'b1':[707,1000],'b2':[514,728],'b3':[364,514],'b4':[257,364],'b5':[182,257],'b6':[128,182],
567
- 'letter': [215.9, 279.4],'legal':[215.9, 355.6],'business card':[85.6, 53.98],
568
- 'photo china passport':[33,48],'passport single':[125,88],'visa':[105,74],'sim':[25,15]})
619
+
620
+ def paper_size(paper_type_str="a4"):
621
+ df = pd.DataFrame(
622
+ {
623
+ "a0": [841, 1189],
624
+ "a1": [594, 841],
625
+ "a2": [420, 594],
626
+ "a3": [297, 420],
627
+ "a4": [210, 297],
628
+ "a5": [148, 210],
629
+ "a6": [105, 148],
630
+ "a7": [74, 105],
631
+ "b0": [1028, 1456],
632
+ "b1": [707, 1000],
633
+ "b2": [514, 728],
634
+ "b3": [364, 514],
635
+ "b4": [257, 364],
636
+ "b5": [182, 257],
637
+ "b6": [128, 182],
638
+ "letter": [215.9, 279.4],
639
+ "legal": [215.9, 355.6],
640
+ "business card": [85.6, 53.98],
641
+ "photo china passport": [33, 48],
642
+ "passport single": [125, 88],
643
+ "visa": [105, 74],
644
+ "sim": [25, 15],
645
+ }
646
+ )
569
647
  for name in df.columns:
570
648
  if paper_type_str in name.lower():
571
- paper_type=name
649
+ paper_type = name
572
650
  if not paper_type:
573
- paper_type='a4' # default
651
+ paper_type = "a4" # default
574
652
  return df[paper_type].tolist()
575
653
 
654
+
576
655
  def docx2pdf(dir_docx, dir_pdf=None):
577
656
  if dir_pdf:
578
- convert(dir_docx,dir_pdf)
657
+ convert(dir_docx, dir_pdf)
579
658
  else:
580
659
  convert(dir_docx)
581
660
 
582
- def img2pdf(dir_img, kind="jpeg",page=None, dir_save=None, page_size="a4", dpi=300):
661
+
662
+ def img2pdf(dir_img, kind="jpeg", page=None, dir_save=None, page_size="a4", dpi=300):
583
663
  def mm_to_point(size):
584
- return (image2pdf.mm_to_pt(size[0]),image2pdf.mm_to_pt(size[1]))
664
+ return (image2pdf.mm_to_pt(size[0]), image2pdf.mm_to_pt(size[1]))
665
+
585
666
  def set_dpi(x):
586
- dpix=dpiy=x
667
+ dpix = dpiy = x
587
668
  return image2pdf.get_fixed_dpi_layout_fun((dpix, dpiy))
669
+
588
670
  if not kind.startswith("."):
589
- kind="."+kind
671
+ kind = "." + kind
590
672
  if dir_save is None:
591
- dir_save = dir_img.replace(kind,'.pdf')
592
- imgs = []
673
+ dir_save = dir_img.replace(kind, ".pdf")
674
+ imgs = []
593
675
  if os.path.isdir(dir_img):
594
676
  if not dir_save.endswith(".pdf"):
595
- dir_save+="#merged_img2pdf.pdf"
677
+ dir_save += "#merged_img2pdf.pdf"
596
678
  if page is None:
597
- select_range = listdir(dir_img,kind=kind).fpath
679
+ select_range = listdir(dir_img, kind=kind).fpath
598
680
  else:
599
- if not isinstance(page, (np.ndarray,list,range)):
600
- page=[page]
601
- select_range = listdir(dir_img,kind=kind)['fpath'][page]
681
+ if not isinstance(page, (np.ndarray, list, range)):
682
+ page = [page]
683
+ select_range = listdir(dir_img, kind=kind)["fpath"][page]
602
684
  for fname in select_range:
603
685
  if not fname.endswith(kind):
604
686
  continue
@@ -607,24 +689,27 @@ def img2pdf(dir_img, kind="jpeg",page=None, dir_save=None, page_size="a4", dpi=3
607
689
  continue
608
690
  imgs.append(path)
609
691
  else:
610
- imgs=[os.path.isdir(dir_img),dir_img]
692
+ imgs = [os.path.isdir(dir_img), dir_img]
611
693
 
612
694
  if page_size:
613
- if isinstance(page_size,str):
614
- pdf_in_mm=mm_to_point(paper_size(page_size))
695
+ if isinstance(page_size, str):
696
+ pdf_in_mm = mm_to_point(paper_size(page_size))
615
697
  else:
616
698
  print("default: page_size = (210,297)")
617
- pdf_in_mm=mm_to_point(page_size)
699
+ pdf_in_mm = mm_to_point(page_size)
618
700
  print(f"page size was set to {page_size}")
619
- p_size= image2pdf.get_layout_fun(pdf_in_mm)
701
+ p_size = image2pdf.get_layout_fun(pdf_in_mm)
620
702
  else:
621
703
  p_size = set_dpi(dpi)
622
- with open(dir_save,"wb") as f:
704
+ with open(dir_save, "wb") as f:
623
705
  f.write(image2pdf.convert(imgs, layout_fun=p_size))
706
+
707
+
624
708
  # usage:
625
709
  # dir_img="/Users/macjianfeng/Dropbox/00-Personal/2015-History/2012-2015_兰州大学/120901-大学课件/生物统计学 陆卫/复习题/"
626
710
  # img2pdf(dir_img,kind='tif', page=range(3,7,2))
627
711
 
712
+
628
713
  def pdf2ppt(dir_pdf, dir_ppt):
629
714
  prs = Presentation()
630
715
 
@@ -639,21 +724,26 @@ def pdf2ppt(dir_pdf, dir_ppt):
639
724
  text = page.extract_text()
640
725
 
641
726
  # Add a slide for each page's content
642
- slide_layout = prs.slide_layouts[5] # Use slide layout that suits your needs
727
+ slide_layout = prs.slide_layouts[
728
+ 5
729
+ ] # Use slide layout that suits your needs
643
730
  slide = prs.slides.add_slide(slide_layout)
644
731
  slide.shapes.title.text = f"Page {page_num + 1}"
645
- slide.shapes.add_textbox(Inches(1), Inches(1.5), Inches(8), Inches(5)).text = text
732
+ slide.shapes.add_textbox(
733
+ Inches(1), Inches(1.5), Inches(8), Inches(5)
734
+ ).text = text
646
735
 
647
736
  # Save the PowerPoint presentation
648
737
  prs.save(dir_ppt)
649
738
  print(f"Conversion from {dir_pdf} to {dir_ppt} complete.")
650
739
 
651
740
 
652
- def ssplit(text, by="space", verbose=False,strict=False, **kws):
741
+ def ssplit(text, by="space", verbose=False, strict=False, **kws):
653
742
  if isinstance(text, list):
654
- nested_list= [ssplit(i,by=by,verbose=verbose,**kws) for i in text]
743
+ nested_list = [ssplit(i, by=by, verbose=verbose, **kws) for i in text]
655
744
  flat_list = [item for sublist in nested_list for item in sublist]
656
745
  return flat_list
746
+
657
747
  def split_by_word_length(text, length):
658
748
  return [word for word in text.split() if len(word) == length]
659
749
 
@@ -677,10 +767,10 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
677
767
  return split_text
678
768
 
679
769
  def split_by_regex_lookahead(text, pattern):
680
- return re.split(f'(?<={pattern})', text)
681
-
770
+ return re.split(f"(?<={pattern})", text)
771
+
682
772
  def split_by_regex_end(text, pattern):
683
- return re.split(f'(?={pattern})', text)
773
+ return re.split(f"(?={pattern})", text)
684
774
 
685
775
  # def split_by_sentence_endings(text):
686
776
  # return re.split(r"(?<=[.!?])", text)
@@ -688,24 +778,27 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
688
778
  # return re.split(r"([^\x00-\x7F\w\s,.!?:\"'()\-]+)", text)
689
779
  # return re.split(r"[^\x00-\x7F]+", text)
690
780
  return re.split(r"([^\x00-\x7F]+)", text)
781
+
691
782
  def split_by_consecutive_non_alphanumeric(text):
692
783
  return re.split(r"\W+", text)
693
784
 
694
785
  def split_by_fixed_length_chunks(text, length):
695
786
  return [text[i : i + length] for i in range(0, len(text), length)]
696
- def split_by_sent_num(text,n=10):
787
+
788
+ def split_by_sent_num(text, n=10):
697
789
  # split text into sentences
698
- text_split_by_sent=sent_tokenize(text)
699
- cut_loc_array=np.arange(0,len(text_split_by_sent),n)
700
- if cut_loc_array[-1]!=len(text_split_by_sent):
701
- cut_loc=np.append(cut_loc_array,len(text_split_by_sent))
790
+ text_split_by_sent = sent_tokenize(text)
791
+ cut_loc_array = np.arange(0, len(text_split_by_sent), n)
792
+ if cut_loc_array[-1] != len(text_split_by_sent):
793
+ cut_loc = np.append(cut_loc_array, len(text_split_by_sent))
702
794
  else:
703
795
  cut_loc = cut_loc_array
704
796
  # get text in section (e.g., every 10 sentences)
705
- text_section=[]
706
- for i,j in pairwise(cut_loc):
797
+ text_section = []
798
+ for i, j in pairwise(cut_loc):
707
799
  text_section.append(text_split_by_sent[i:j])
708
800
  return text_section
801
+
709
802
  def split_general(text, by, verbose=False, ignore_case=False):
710
803
  if ignore_case:
711
804
  if verbose:
@@ -717,8 +810,10 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
717
810
  if verbose:
718
811
  print(f"used {by} to split, ignore_case=False")
719
812
  return text.split(by)
813
+
720
814
  def reg_split(text, pattern):
721
815
  return re.split(pattern, text)
816
+
722
817
  if ("sp" in by or "white" in by) and not strict:
723
818
  if verbose:
724
819
  print(f"splited by space")
@@ -735,14 +830,20 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
735
830
  if verbose:
736
831
  print(f"split_by_multiple_delimiters: ['|','&']")
737
832
  return split_by_multiple_delimiters(text, by)
738
- elif all([("digi" in by or "num" in by),not 'sent' in by, not 'str' in by]) and not strict:
833
+ elif (
834
+ all([("digi" in by or "num" in by), not "sent" in by, not "str" in by])
835
+ and not strict
836
+ ):
739
837
  if verbose:
740
838
  print(f"splited by digital (numbers)")
741
839
  return re.split(r"(\d+)", text)
742
- elif all([("digi" in by or "num" in by), 'str' in by]) and not strict:
840
+ elif all([("digi" in by or "num" in by), "str" in by]) and not strict:
743
841
  if verbose:
744
842
  print(f"Splitting by (number strings)")
745
- pattern = re.compile(r'\b((?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?)(?:[-\s]?(?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?))*)\b', re.IGNORECASE)
843
+ pattern = re.compile(
844
+ r"\b((?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?)(?:[-\s]?(?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion|and|[\d,]+(?:\.\d+)?))*)\b",
845
+ re.IGNORECASE,
846
+ )
746
847
  return re.split(pattern, text)
747
848
  elif ("pun" in by) and not strict:
748
849
  if verbose:
@@ -760,12 +861,12 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
760
861
  if verbose:
761
862
  print(f"splited by word")
762
863
  return word_tokenize(text)
763
- elif ("sen" in by and not 'num' in by) and not strict:
864
+ elif ("sen" in by and not "num" in by) and not strict:
764
865
  if verbose:
765
866
  print(f"splited by sentence")
766
867
  return sent_tokenize(text)
767
- elif ('sen' in by and 'num' in by) and not strict:
768
- return split_by_sent_num(text,**kws)
868
+ elif ("sen" in by and "num" in by) and not strict:
869
+ return split_by_sent_num(text, **kws)
769
870
  elif ("cha" in by) and not strict:
770
871
  if verbose:
771
872
  print(f"splited by chracters")
@@ -803,32 +904,32 @@ def ssplit(text, by="space", verbose=False,strict=False, **kws):
803
904
  if verbose:
804
905
  print(f"splited by customed, re; => {by}")
805
906
  return reg_split(text, **kws)
806
- elif ('lang' in by or 'eng' in by) and not strict:
907
+ elif ("lang" in by or "eng" in by) and not strict:
807
908
  return split_non_ascii(text)
808
909
  else:
809
910
  return split_general(text, by, verbose=verbose, **kws)
810
911
 
811
912
 
812
- def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
913
+ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png", verbose=True, **kws):
813
914
  df_dir_img_single_page = pd.DataFrame()
814
915
  dir_single_page = []
815
916
  if verbose:
816
917
  pp(pdfinfo_from_path(dir_pdf))
817
918
  if isinstance(page, tuple) and page:
818
919
  page = list(page)
819
- if isinstance(page,int):
820
- page=[page]
920
+ if isinstance(page, int):
921
+ page = [page]
821
922
  if page is None:
822
923
  page = [pdfinfo_from_path(dir_pdf)["Pages"]]
823
- if len(page)==1 and page != pdfinfo_from_path(dir_pdf)["Pages"]:
824
- page=[page[0], page[0]]
924
+ if len(page) == 1 and page != pdfinfo_from_path(dir_pdf)["Pages"]:
925
+ page = [page[0], page[0]]
825
926
  else:
826
- page=[1, page[0]]
927
+ page = [1, page[0]]
827
928
  pages = convert_from_path(dir_pdf, first_page=page[0], last_page=page[1], **kws)
828
929
  if dir_save is None:
829
930
  dir_save = newfolder(dirname(dir_pdf), basename(dir_pdf).split(".")[0] + "_img")
830
931
  for i, page in enumerate(pages):
831
- if verbose:
932
+ if verbose:
832
933
  print(f"processing page: {i+1}")
833
934
  if i < 9:
834
935
  dir_img_each_page = dir_save + f"page_0{i+1}.png"
@@ -839,6 +940,7 @@ def pdf2img(dir_pdf, dir_save=None, page=None, kind="png",verbose=True, **kws):
839
940
  df_dir_img_single_page["fpath"] = dir_single_page
840
941
  return df_dir_img_single_page
841
942
 
943
+
842
944
  # dir_pdf = "/Users/macjianfeng/Dropbox/github/python/240308_Python Data Science Handbook.pdf"
843
945
  # df_page = pdf2img(dir_pdf, page=[1, 5],dpi=300)
844
946
  def get_encoding(fpath, alternative_encodings=None, verbose=False):
@@ -855,14 +957,37 @@ def get_encoding(fpath, alternative_encodings=None, verbose=False):
855
957
  """
856
958
  if alternative_encodings is None:
857
959
  alternative_encodings = [
858
- 'utf-8', 'latin1', 'windows-1252', 'iso-8859-1',
859
- 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5',
860
- 'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-9',
861
- 'windows-1250', 'windows-1251', 'windows-1253', 'windows-1254',
862
- 'windows-1255', 'windows-1256', 'windows-1257', 'windows-1258',
863
- 'big5', 'gb18030', 'shift_jis', 'euc_jp', 'koi8_r',
864
- 'mac_roman', 'mac_central_europe', 'mac_greek', 'mac_cyrillic',
865
- 'mac_arabic', 'mac_hebrew'
960
+ "utf-8",
961
+ "latin1",
962
+ "windows-1252",
963
+ "iso-8859-1",
964
+ "iso-8859-2",
965
+ "iso-8859-3",
966
+ "iso-8859-4",
967
+ "iso-8859-5",
968
+ "iso-8859-6",
969
+ "iso-8859-7",
970
+ "iso-8859-8",
971
+ "iso-8859-9",
972
+ "windows-1250",
973
+ "windows-1251",
974
+ "windows-1253",
975
+ "windows-1254",
976
+ "windows-1255",
977
+ "windows-1256",
978
+ "windows-1257",
979
+ "windows-1258",
980
+ "big5",
981
+ "gb18030",
982
+ "shift_jis",
983
+ "euc_jp",
984
+ "koi8_r",
985
+ "mac_roman",
986
+ "mac_central_europe",
987
+ "mac_greek",
988
+ "mac_cyrillic",
989
+ "mac_arabic",
990
+ "mac_hebrew",
866
991
  ]
867
992
 
868
993
  if not os.path.isfile(fpath):
@@ -870,7 +995,7 @@ def get_encoding(fpath, alternative_encodings=None, verbose=False):
870
995
 
871
996
  for enc in alternative_encodings:
872
997
  try:
873
- with open(fpath, mode='r', encoding=enc) as file:
998
+ with open(fpath, mode="r", encoding=enc) as file:
874
999
  file.read() # Try to read the file
875
1000
  if verbose:
876
1001
  print(f"Successfully detected encoding: {enc}")
@@ -895,6 +1020,7 @@ def fload(fpath, kind=None, **kwargs):
895
1020
  Returns:
896
1021
  content: The content loaded from the file.
897
1022
  """
1023
+
898
1024
  def load_txt_md(fpath):
899
1025
  with open(fpath, "r") as file:
900
1026
  content = file.read()
@@ -920,7 +1046,7 @@ def fload(fpath, kind=None, **kwargs):
920
1046
  root = tree.getroot()
921
1047
  return etree.tostring(root, pretty_print=True).decode()
922
1048
 
923
- def load_csv(fpath, engine='pyarrow',**kwargs):
1049
+ def load_csv(fpath, engine="pyarrow", **kwargs):
924
1050
  print(f"engine={engine}")
925
1051
  df = pd.read_csv(fpath, engine=engine, **kwargs)
926
1052
  return df
@@ -928,35 +1054,36 @@ def fload(fpath, kind=None, **kwargs):
928
1054
  def load_xlsx(fpath, **kwargs):
929
1055
  df = pd.read_excel(fpath, **kwargs)
930
1056
  return df
931
- def load_ipynb(fpath,**kwargs):
932
- as_version=kwargs.get("as_version",4)
1057
+
1058
+ def load_ipynb(fpath, **kwargs):
1059
+ as_version = kwargs.get("as_version", 4)
933
1060
  with open(fpath, "r") as file:
934
1061
  nb = nbformat.read(file, as_version=as_version)
935
1062
  md_exporter = MarkdownExporter()
936
1063
  md_body, _ = md_exporter.from_notebook_node(nb)
937
1064
  return md_body
938
-
939
- def load_pdf(fpath, page='all', verbose=False, **kwargs):
1065
+
1066
+ def load_pdf(fpath, page="all", verbose=False, **kwargs):
940
1067
  """
941
- Parameters:
942
- fpath: The path to the PDF file to be loaded.
943
- page (optional):
944
- Specifies which page or pages to extract text from. By default, it's set to "all", which means text from all
945
- pages will be returned. It can also be an integer to specify a single page number or a list of integers to
946
- specify multiple pages.
947
- verbose (optional):
948
- If True, prints the total number of pages processed.
949
- Functionality:
950
- It initializes an empty dictionary text_dict to store page numbers as keys and their corresponding text as values.
951
- It iterates through each page of the PDF file using a for loop.
952
- For each page, it extracts the text using PyPDF2's extract_text() method and stores it in text_dict with the page number incremented by 1 as the key.
953
- If the page parameter is an integer, it converts it into a list containing that single page number to ensure consistency in handling.
954
- If the page parameter is a NumPy array, it converts it to a list using the tolist() method to ensure compatibility with list operations.
955
- If verbose is True, it prints the total number of pages processed.
956
- If page is a list, it combines the text of the specified pages into a single string combined_text and returns it.
957
- If page is set to "all", it returns the entire text_dict containing text of all pages.
958
- If page is an integer, it returns the text of the specified page number.
959
- If the specified page is not found, it returns the string "Page is not found".
1068
+ Parameters:
1069
+ fpath: The path to the PDF file to be loaded.
1070
+ page (optional):
1071
+ Specifies which page or pages to extract text from. By default, it's set to "all", which means text from all
1072
+ pages will be returned. It can also be an integer to specify a single page number or a list of integers to
1073
+ specify multiple pages.
1074
+ verbose (optional):
1075
+ If True, prints the total number of pages processed.
1076
+ Functionality:
1077
+ It initializes an empty dictionary text_dict to store page numbers as keys and their corresponding text as values.
1078
+ It iterates through each page of the PDF file using a for loop.
1079
+ For each page, it extracts the text using PyPDF2's extract_text() method and stores it in text_dict with the page number incremented by 1 as the key.
1080
+ If the page parameter is an integer, it converts it into a list containing that single page number to ensure consistency in handling.
1081
+ If the page parameter is a NumPy array, it converts it to a list using the tolist() method to ensure compatibility with list operations.
1082
+ If verbose is True, it prints the total number of pages processed.
1083
+ If page is a list, it combines the text of the specified pages into a single string combined_text and returns it.
1084
+ If page is set to "all", it returns the entire text_dict containing text of all pages.
1085
+ If page is an integer, it returns the text of the specified page number.
1086
+ If the specified page is not found, it returns the string "Page is not found".
960
1087
  """
961
1088
  text_dict = {}
962
1089
  with open(fpath, "rb") as file:
@@ -989,18 +1116,52 @@ def fload(fpath, kind=None, **kwargs):
989
1116
  def load_docx(fpath):
990
1117
  doc = Document(fpath)
991
1118
  content = [para.text for para in doc.paragraphs]
992
- return content
1119
+ return content
993
1120
 
994
1121
  if kind is None:
995
1122
  _, kind = os.path.splitext(fpath)
996
1123
  kind = kind.lower()
997
1124
 
998
- kind = kind.lstrip('.').lower()
999
- img_types=[ 'bmp','eps', 'gif', 'icns', 'ico', 'im', 'jpg','jpeg', 'jpeg2000','msp', 'pcx', 'png', 'ppm', 'sgi', 'spider', 'tga','tiff','webp',"json"]
1000
- doc_types = ["docx", "txt", "md", "html", "json", "yaml", "xml", "csv", "xlsx", "pdf","ipynb"]
1125
+ kind = kind.lstrip(".").lower()
1126
+ img_types = [
1127
+ "bmp",
1128
+ "eps",
1129
+ "gif",
1130
+ "icns",
1131
+ "ico",
1132
+ "im",
1133
+ "jpg",
1134
+ "jpeg",
1135
+ "jpeg2000",
1136
+ "msp",
1137
+ "pcx",
1138
+ "png",
1139
+ "ppm",
1140
+ "sgi",
1141
+ "spider",
1142
+ "tga",
1143
+ "tiff",
1144
+ "webp",
1145
+ "json",
1146
+ ]
1147
+ doc_types = [
1148
+ "docx",
1149
+ "txt",
1150
+ "md",
1151
+ "html",
1152
+ "json",
1153
+ "yaml",
1154
+ "xml",
1155
+ "csv",
1156
+ "xlsx",
1157
+ "pdf",
1158
+ "ipynb",
1159
+ ]
1001
1160
  supported_types = [*doc_types, *img_types]
1002
1161
  if kind not in supported_types:
1003
- raise ValueError(f"Error:\n{kind} is not in the supported list {supported_types}")
1162
+ raise ValueError(
1163
+ f"Error:\n{kind} is not in the supported list {supported_types}"
1164
+ )
1004
1165
  if kind == "docx":
1005
1166
  return load_docx(fpath)
1006
1167
  elif kind == "txt" or kind == "md":
@@ -1024,9 +1185,12 @@ def fload(fpath, kind=None, **kwargs):
1024
1185
  return load_pdf(fpath, **kwargs)
1025
1186
  elif kind.lower() in img_types:
1026
1187
  print(f'Image ".{kind}" is loaded.')
1027
- return load_img(fpath)
1188
+ return load_img(fpath)
1028
1189
  else:
1029
- raise ValueError(f"Error:\n{kind} is not in the supported list {supported_types}")
1190
+ raise ValueError(
1191
+ f"Error:\n{kind} is not in the supported list {supported_types}"
1192
+ )
1193
+
1030
1194
 
1031
1195
  # Example usage
1032
1196
  # txt_content = fload('sample.txt')
@@ -1039,6 +1203,7 @@ def fload(fpath, kind=None, **kwargs):
1039
1203
  # xlsx_content = fload('sample.xlsx')
1040
1204
  # docx_content = fload('sample.docx')
1041
1205
 
1206
+
1042
1207
  def fupdate(fpath, content=None):
1043
1208
  """
1044
1209
  Update a file by adding new content at the top and moving the old content to the bottom.
@@ -1055,34 +1220,37 @@ def fupdate(fpath, content=None):
1055
1220
  """
1056
1221
  content = content or ""
1057
1222
  if os.path.exists(fpath):
1058
- with open(fpath, 'r') as file:
1223
+ with open(fpath, "r") as file:
1059
1224
  old_content = file.read()
1060
1225
  else:
1061
- old_content = ''
1062
-
1063
- with open(fpath, 'w') as file:
1226
+ old_content = ""
1227
+
1228
+ with open(fpath, "w") as file:
1064
1229
  file.write(content)
1065
1230
  file.write(old_content)
1231
+
1232
+
1066
1233
  def fappend(fpath, content=None):
1067
1234
  """
1068
1235
  append new content at the end.
1069
1236
  """
1070
1237
  content = content or ""
1071
1238
  if os.path.exists(fpath):
1072
- with open(fpath, 'r') as file:
1239
+ with open(fpath, "r") as file:
1073
1240
  old_content = file.read()
1074
1241
  else:
1075
- old_content = ''
1076
-
1077
- with open(fpath, 'w') as file:
1242
+ old_content = ""
1243
+
1244
+ with open(fpath, "w") as file:
1078
1245
  file.write(old_content)
1079
1246
  file.write(content)
1080
-
1247
+
1248
+
1081
1249
  def fsave(
1082
1250
  fpath,
1083
1251
  content,
1084
- mode='w',
1085
- how ='overwrite',
1252
+ mode="w",
1253
+ how="overwrite",
1086
1254
  kind=None,
1087
1255
  font_name="Times",
1088
1256
  font_size=10,
@@ -1102,16 +1270,16 @@ def fsave(
1102
1270
  Returns:
1103
1271
  None
1104
1272
  """
1105
- def save_content(fpath, content, mode=mode, how='overwrite'):
1106
- if 'wri' in how.lower():
1107
- with open(fpath, mode, encoding='utf-8') as file:
1273
+
1274
+ def save_content(fpath, content, mode=mode, how="overwrite"):
1275
+ if "wri" in how.lower():
1276
+ with open(fpath, mode, encoding="utf-8") as file:
1108
1277
  file.write(content)
1109
- elif 'upd' in how.lower():
1278
+ elif "upd" in how.lower():
1110
1279
  fupdate(fpath, content=content)
1111
- elif 'app' in how.lower():
1280
+ elif "app" in how.lower():
1112
1281
  fappend(fpath, content=content)
1113
1282
 
1114
-
1115
1283
  def save_docx(fpath, content, font_name, font_size, spacing):
1116
1284
  if isinstance(content, str):
1117
1285
  content = content.split(". ")
@@ -1126,45 +1294,40 @@ def fsave(
1126
1294
  paragraph.space_after = docx.shared.Pt(spacing)
1127
1295
  doc.save(fpath)
1128
1296
 
1129
-
1130
- def save_txt_md(fpath, content, sep="\n",mode='w'):
1131
- # Ensure content is a single string
1297
+ def save_txt_md(fpath, content, sep="\n", mode="w"):
1298
+ # Ensure content is a single string
1132
1299
  if isinstance(content, list):
1133
1300
  content = sep.join(content)
1134
- save_content(fpath, sep.join(content),mode)
1301
+ save_content(fpath, sep.join(content), mode)
1135
1302
 
1136
-
1137
- def save_html(fpath, content, font_name, font_size,mode='w'):
1303
+ def save_html(fpath, content, font_name, font_size, mode="w"):
1138
1304
  html_content = "<html><body>"
1139
1305
  for paragraph_text in content:
1140
1306
  html_content += f'<p style="font-family:{font_name}; font-size:{font_size}px;">{paragraph_text}</p>'
1141
1307
  html_content += "</body></html>"
1142
- save_content(fpath, html_content,mode)
1143
-
1308
+ save_content(fpath, html_content, mode)
1144
1309
 
1145
1310
  def save_pdf(fpath, content, font_name, font_size):
1146
1311
  pdf = FPDF()
1147
1312
  pdf.add_page()
1148
1313
  # pdf.add_font('Arial','',r'/System/Library/Fonts/Supplemental/Arial.ttf',uni=True)
1149
- pdf.set_font(font_name, '',font_size)
1314
+ pdf.set_font(font_name, "", font_size)
1150
1315
  for paragraph_text in content:
1151
1316
  pdf.multi_cell(0, 10, paragraph_text)
1152
- pdf.ln(h = '')
1153
- pdf.output(fpath,'F')
1154
-
1317
+ pdf.ln(h="")
1318
+ pdf.output(fpath, "F")
1155
1319
 
1156
1320
  def save_csv(fpath, data, **kwargs):
1157
1321
  df = pd.DataFrame(data)
1158
1322
  df.to_csv(fpath, **kwargs)
1159
1323
 
1160
-
1161
1324
  def save_xlsx(fpath, data, **kwargs):
1162
1325
  df = pd.DataFrame(data)
1163
1326
  df.to_excel(fpath, **kwargs)
1164
1327
 
1165
- def save_ipynb(fpath,data,**kwargs):
1328
+ def save_ipynb(fpath, data, **kwargs):
1166
1329
  # Split the content by code fences to distinguish between code and markdown
1167
- parts = data.split('```')
1330
+ parts = data.split("```")
1168
1331
  cells = []
1169
1332
 
1170
1333
  for i, part in enumerate(parts):
@@ -1176,31 +1339,32 @@ def fsave(
1176
1339
  cells.append(nbformat.v4.new_code_cell(part.strip()))
1177
1340
  # Create a new notebook
1178
1341
  nb = nbformat.v4.new_notebook()
1179
- nb['cells'] = cells
1342
+ nb["cells"] = cells
1180
1343
  # Write the notebook to a file
1181
- with open(fpath, 'w', encoding='utf-8') as ipynb_file:
1344
+ with open(fpath, "w", encoding="utf-8") as ipynb_file:
1182
1345
  nbformat.write(nb, ipynb_file)
1183
-
1346
+
1184
1347
  # def save_json(fpath, data, **kwargs):
1185
1348
  # with open(fpath, "w") as file:
1186
1349
  # json.dump(data, file, **kwargs)
1187
1350
 
1188
- def save_json(fpath_fname, var_dict_or_df):
1351
+ def save_json(fpath_fname, var_dict_or_df):
1189
1352
  with open(fpath_fname, "w") as f_json:
1190
1353
  # Check if var_dict_or_df is a DataFrame
1191
1354
  if isinstance(var_dict_or_df, pd.DataFrame):
1192
1355
  # Convert DataFrame to a list of dictionaries
1193
1356
  var_dict_or_df = var_dict_or_df.to_dict(orient="dict")
1194
-
1357
+
1195
1358
  # Check if var_dict_or_df is a dictionary
1196
1359
  if isinstance(var_dict_or_df, dict):
1197
1360
  # Convert NumPy arrays to lists
1198
1361
  for key, value in var_dict_or_df.items():
1199
1362
  if isinstance(value, np.ndarray):
1200
1363
  var_dict_or_df[key] = value.tolist()
1201
-
1364
+
1202
1365
  # Save the dictionary or list of dictionaries to a JSON file
1203
1366
  json.dump(var_dict_or_df, f_json, indent=4)
1367
+
1204
1368
  # # Example usage:
1205
1369
  # sets = {"title": "mse_path_ MSE"}
1206
1370
  # jsonsave("/.json", sets)
@@ -1210,7 +1374,6 @@ def fsave(
1210
1374
  with open(fpath, "w") as file:
1211
1375
  yaml.dump(data, file, **kwargs)
1212
1376
 
1213
-
1214
1377
  def save_xml(fpath, data):
1215
1378
  root = etree.Element("root")
1216
1379
  if isinstance(data, dict):
@@ -1239,18 +1402,18 @@ def fsave(
1239
1402
  "json",
1240
1403
  "xml",
1241
1404
  "yaml",
1242
- "ipynb"
1405
+ "ipynb",
1243
1406
  ]:
1244
1407
  print(
1245
1408
  f"Warning:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
1246
1409
  )
1247
1410
 
1248
- if kind == "docx" or kind=="doc":
1411
+ if kind == "docx" or kind == "doc":
1249
1412
  save_docx(fpath, content, font_name, font_size, spacing)
1250
1413
  elif kind == "txt":
1251
- save_txt_md(fpath, content, sep="",mode=mode)
1414
+ save_txt_md(fpath, content, sep="", mode=mode)
1252
1415
  elif kind == "md":
1253
- save_txt_md(fpath, content, sep="",mode=mode)
1416
+ save_txt_md(fpath, content, sep="", mode=mode)
1254
1417
  elif kind == "html":
1255
1418
  save_html(fpath, content, font_name, font_size)
1256
1419
  elif kind == "pdf":
@@ -1260,20 +1423,20 @@ def fsave(
1260
1423
  elif kind == "xlsx":
1261
1424
  save_xlsx(fpath, content, **kwargs)
1262
1425
  elif kind == "json":
1263
- save_json(fpath, content)
1426
+ save_json(fpath, content)
1264
1427
  elif kind == "xml":
1265
- save_xml(fpath, content)
1428
+ save_xml(fpath, content)
1266
1429
  elif kind == "yaml":
1267
1430
  save_yaml(fpath, content, **kwargs)
1268
1431
  elif kind == "ipynb":
1269
- save_ipynb(fpath, content, **kwargs)
1432
+ save_ipynb(fpath, content, **kwargs)
1270
1433
  else:
1271
- try:
1434
+ try:
1272
1435
  netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
1273
1436
  except:
1274
1437
  print(
1275
1438
  f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
1276
- )
1439
+ )
1277
1440
 
1278
1441
 
1279
1442
  # # Example usage
@@ -1294,47 +1457,61 @@ def fsave(
1294
1457
  # fsave(dir_save + "sample.yaml", yaml_content)
1295
1458
  # fsave(dir_save + "sample.xml", xml_content)
1296
1459
 
1460
+
1297
1461
  def addpath(fpath):
1298
- sys.path.insert(0,dir)
1462
+ sys.path.insert(0, dir)
1463
+
1464
+
1299
1465
  def dirname(fpath):
1300
1466
  """
1301
1467
  dirname: Extracting Directory Name from a File Path
1302
1468
  Args:
1303
- fpath (str): the file or directory path
1469
+ fpath (str): the file or directory path
1304
1470
  Returns:
1305
1471
  str: directory, without filename
1306
1472
  """
1307
- dirname_=os.path.dirname(fpath)
1308
- if not dirname_.endswith('/'):
1309
- dirname_=dirname_+"/"
1473
+ dirname_ = os.path.dirname(fpath)
1474
+ if not dirname_.endswith("/"):
1475
+ dirname_ = dirname_ + "/"
1310
1476
  return dirname_
1311
1477
 
1312
- def dir_name(fpath): # same as "dirname"
1478
+
1479
+ def dir_name(fpath): # same as "dirname"
1313
1480
  return dirname(fpath)
1481
+
1482
+
1314
1483
  def basename(fpath):
1315
1484
  """
1316
1485
  basename: # Output: file.txt
1317
1486
  Args:
1318
- fpath (str): the file or directory path
1487
+ fpath (str): the file or directory path
1319
1488
  Returns:
1320
1489
  str: # Output: file.txt
1321
1490
  """
1322
1491
  return os.path.basename(fpath)
1492
+
1493
+
1323
1494
  def flist(fpath, contains="all"):
1324
- all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
1495
+ all_files = [
1496
+ os.path.join(fpath, f)
1497
+ for f in os.listdir(fpath)
1498
+ if os.path.isfile(os.path.join(fpath, f))
1499
+ ]
1325
1500
  if isinstance(contains, list):
1326
1501
  filt_files = []
1327
1502
  for filter_ in contains:
1328
1503
  filt_files.extend(flist(fpath, filter_))
1329
1504
  return filt_files
1330
1505
  else:
1331
- if 'all' in contains.lower():
1506
+ if "all" in contains.lower():
1332
1507
  return all_files
1333
1508
  else:
1334
1509
  filt_files = [f for f in all_files if isa(f, contains)]
1335
1510
  return filt_files
1511
+
1512
+
1336
1513
  def sort_kind(df, by="name", ascending=True):
1337
- if df[by].dtype == 'object': # Check if the column contains string values
1514
+ if df[by].dtype == "object": # Check if the column contains string values
1338
1515
  if ascending:
1339
1516
  sorted_index = df[by].str.lower().argsort()
1340
1517
  else:
@@ -1347,7 +1524,8 @@ def sort_kind(df, by="name", ascending=True):
1347
1524
  sorted_df = df.iloc[sorted_index].reset_index(drop=True)
1348
1525
  return sorted_df
1349
1526
 
1350
- def isa(*args,**kwargs):
1527
+
1528
+ def isa(*args, **kwargs):
1351
1529
  """
1352
1530
  fpath, contains='img'
1353
1531
  containss file paths based on the specified contains.
@@ -1360,30 +1538,33 @@ def isa(*args,**kwargs):
1360
1538
  """
1361
1539
  for arg in args:
1362
1540
  if isinstance(arg, str):
1363
- if '/' in arg or '\\' in arg:
1541
+ if "/" in arg or "\\" in arg:
1364
1542
  fpath = arg
1365
1543
  else:
1366
- contains=arg
1367
- if 'img' in contains.lower() or 'image' in contains.lower():
1544
+ contains = arg
1545
+ if "img" in contains.lower() or "image" in contains.lower():
1368
1546
  return is_image(fpath)
1369
- elif 'doc' in contains.lower():
1547
+ elif "doc" in contains.lower():
1370
1548
  return is_document(fpath)
1371
- elif 'zip' in contains.lower():
1549
+ elif "zip" in contains.lower():
1372
1550
  return is_zip(fpath)
1373
- elif 'dir' in contains.lower() or ('f' in contains.lower() and 'd' in contains.lower()):
1551
+ elif "dir" in contains.lower() or (
1552
+ "f" in contains.lower() and "d" in contains.lower()
1553
+ ):
1374
1554
  return os.path.isdir(fpath)
1375
- elif 'fi' in contains.lower():#file
1555
+ elif "fi" in contains.lower(): # file
1376
1556
  return os.path.isfile(fpath)
1377
- elif 'num' in contains.lower():#file
1557
+ elif "num" in contains.lower(): # file
1378
1558
  return os.path.isfile(fpath)
1379
- elif 'text' in contains.lower() or 'txt' in contains.lower():#file
1559
+ elif "text" in contains.lower() or "txt" in contains.lower(): # file
1380
1560
  return is_text(fpath)
1381
- elif 'color' in contains.lower():#file
1561
+ elif "color" in contains.lower(): # file
1382
1562
  return is_str_color(fpath)
1383
1563
  else:
1384
1564
  print(f"{contains} was not set up correctly")
1385
1565
  return False
1386
1566
 
1567
+
1387
1568
  def listdir(
1388
1569
  rootdir,
1389
1570
  kind="folder",
@@ -1391,7 +1572,7 @@ def listdir(
1391
1572
  ascending=True,
1392
1573
  contains=None,
1393
1574
  orient="list",
1394
- output="df" # 'list','dict','records','index','series'
1575
+ output="df", # 'list','dict','records','index','series'
1395
1576
  ):
1396
1577
  if not kind.startswith("."):
1397
1578
  kind = "." + kind
@@ -1420,12 +1601,12 @@ def listdir(
1420
1601
  is_file = kind.lower() in file_extension.lower() and (
1421
1602
  os.path.isfile(item_path)
1422
1603
  )
1423
- if kind in ['.doc','.img','.zip']: #选择大的类别
1604
+ if kind in [".doc", ".img", ".zip"]: # 选择大的类别
1424
1605
  if kind != ".folder" and not isa(item_path, kind):
1425
1606
  continue
1426
- elif kind in ['.all']:
1607
+ elif kind in [".all"]:
1427
1608
  return flist(fpath, contains=contains)
1428
- else: #精确到文件的后缀
1609
+ else: # 精确到文件的后缀
1429
1610
  if not is_folder and not is_file:
1430
1611
  continue
1431
1612
  f["name"].append(filename)
@@ -1433,9 +1614,15 @@ def listdir(
1433
1614
  f["path"].append(os.path.join(os.path.dirname(item_path), item))
1434
1615
  fpath = os.path.join(os.path.dirname(item_path), item)
1435
1616
  f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
1436
- f["created_time"].append(pd.to_datetime(os.path.getctime(item_path), unit='s'))
1437
- f["modified_time"].append(pd.to_datetime(os.path.getmtime(item_path), unit='s'))
1438
- f['last_open_time'].append(pd.to_datetime(os.path.getatime(item_path), unit='s'))
1617
+ f["created_time"].append(
1618
+ pd.to_datetime(os.path.getctime(item_path), unit="s")
1619
+ )
1620
+ f["modified_time"].append(
1621
+ pd.to_datetime(os.path.getmtime(item_path), unit="s")
1622
+ )
1623
+ f["last_open_time"].append(
1624
+ pd.to_datetime(os.path.getatime(item_path), unit="s")
1625
+ )
1439
1626
  f["fname"].append(filename) # will be removed
1440
1627
  f["fpath"].append(fpath) # will be removed
1441
1628
  i += 1
@@ -1464,35 +1651,39 @@ def listdir(
1464
1651
  elif "s" in sort_by.lower() and "z" in sort_by.lower():
1465
1652
  f = sort_kind(f, by="size", ascending=ascending)
1466
1653
 
1467
- if 'df' in output:
1654
+ if "df" in output:
1468
1655
  return f
1469
1656
  else:
1470
- if 'l' in orient.lower(): # list # default
1657
+ if "l" in orient.lower(): # list # default
1471
1658
  res_output = Box(f.to_dict(orient="list"))
1472
1659
  return res_output
1473
- if 'd' in orient.lower(): # dict
1660
+ if "d" in orient.lower(): # dict
1474
1661
  return Box(f.to_dict(orient="dict"))
1475
- if 'r' in orient.lower(): # records
1662
+ if "r" in orient.lower(): # records
1476
1663
  return Box(f.to_dict(orient="records"))
1477
- if 'in' in orient.lower(): # records
1664
+ if "in" in orient.lower(): # records
1478
1665
  return Box(f.to_dict(orient="index"))
1479
- if 'se' in orient.lower(): # records
1666
+ if "se" in orient.lower(): # records
1480
1667
  return Box(f.to_dict(orient="series"))
1481
1668
 
1669
+
1482
1670
  # Example usage:
1483
1671
  # result = listdir('your_root_directory')
1484
1672
  # print(result)
1485
1673
  # df=listdir("/", contains='sss',sort_by='name',ascending=False)
1486
- # print(df.fname.to_list(),"\n",df.fpath.to_list())
1674
+ # print(df.fname.to_list(),"\n",df.fpath.to_list())
1487
1675
  def list_func(lib_name, opt="call"):
1488
1676
  if opt == "call":
1489
1677
  funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
1490
1678
  else:
1491
1679
  funcs = dir(lib_name)
1492
1680
  return funcs
1681
+
1682
+
1493
1683
  def func_list(lib_name, opt="call"):
1494
1684
  return list_func(lib_name, opt=opt)
1495
1685
 
1686
+
1496
1687
  def mkdir(*args, **kwargs):
1497
1688
  """
1498
1689
  newfolder(pardir, chdir)
@@ -1503,17 +1694,17 @@ def mkdir(*args, **kwargs):
1503
1694
  Returns:
1504
1695
  mkdir, giving a option if exists_ok or not
1505
1696
  """
1506
- overwrite=kwargs.get("overwrite",False)
1697
+ overwrite = kwargs.get("overwrite", False)
1507
1698
  for arg in args:
1508
- if isinstance(arg, (str,list)):
1699
+ if isinstance(arg, (str, list)):
1509
1700
  if "/" in arg or "\\" in arg:
1510
- pardir=arg
1511
- print(f'pardir{pardir}')
1701
+ pardir = arg
1702
+ print(f"pardir{pardir}")
1512
1703
  else:
1513
1704
  chdir = arg
1514
- print(f'chdir{chdir}')
1515
- elif isinstance(arg,bool):
1516
- overwrite=arg
1705
+ print(f"chdir{chdir}")
1706
+ elif isinstance(arg, bool):
1707
+ overwrite = arg
1517
1708
  print(overwrite)
1518
1709
  else:
1519
1710
  print(f"{arg}Error: not support a {type(arg)} type")
@@ -1526,7 +1717,7 @@ def mkdir(*args, **kwargs):
1526
1717
  if isinstance(pardir, str): # Dir_parents should be 'str' type
1527
1718
  pardir = os.path.normpath(pardir)
1528
1719
  # Get the slash type: "/" or "\"
1529
- stype = '/' if '/' in pardir else '\\'
1720
+ stype = "/" if "/" in pardir else "\\"
1530
1721
  # Check if the parent directory exists and is a directory path
1531
1722
  if os.path.isdir(pardir):
1532
1723
  os.chdir(pardir) # Set current path
@@ -1538,80 +1729,83 @@ def mkdir(*args, **kwargs):
1538
1729
  # Check if the subfolder already exists
1539
1730
  child_tmp = os.path.join(pardir, folder)
1540
1731
  if not os.path.isdir(child_tmp):
1541
- os.mkdir('./' + folder)
1542
- print(f'\n {folder} was created successfully!\n')
1732
+ os.mkdir("./" + folder)
1733
+ print(f"\n {folder} was created successfully!\n")
1543
1734
  else:
1544
1735
  if overwrite:
1545
1736
  shutil.rmtree(child_tmp)
1546
- os.mkdir('./' + folder)
1547
- print(f'\n {folder} overwrite! \n')
1737
+ os.mkdir("./" + folder)
1738
+ print(f"\n {folder} overwrite! \n")
1548
1739
  else:
1549
- print(f'\n {folder} already exists! \n')
1740
+ print(f"\n {folder} already exists! \n")
1550
1741
  rootdir.append(child_tmp + stype) # Note down
1551
1742
  else:
1552
- print('\nWarning: Dir_child doesn\'t exist\n')
1743
+ print("\nWarning: Dir_child doesn't exist\n")
1553
1744
  else:
1554
- print('\nWarning: Dir_parent is not a directory path\n')
1745
+ print("\nWarning: Dir_parent is not a directory path\n")
1555
1746
  # Dir is the main output, if only one dir, then str type is inconvenient
1556
1747
  if len(rootdir) == 1:
1557
1748
  rootdir = rootdir[0]
1558
1749
  return rootdir
1559
1750
 
1560
- def figsave(*args,dpi=300):
1751
+
1752
+ def figsave(*args, dpi=300):
1561
1753
  dir_save = None
1562
- fname = None
1754
+ fname = None
1563
1755
  for arg in args:
1564
1756
  if isinstance(arg, str):
1565
- if '/' in arg or '\\' in arg:
1757
+ if "/" in arg or "\\" in arg:
1566
1758
  dir_save = arg
1567
- elif '/' not in arg and '\\' not in arg:
1759
+ elif "/" not in arg and "\\" not in arg:
1568
1760
  fname = arg
1569
1761
  # Backup original values
1570
- if '/' in dir_save:
1571
- if dir_save[-1] != '/':
1572
- dir_save = dir_save + '/'
1573
- elif '\\' in dir_save:
1574
- if dir_save[-1] != '\\':
1575
- dir_save = dir_save + '\\'
1762
+ if "/" in dir_save:
1763
+ if dir_save[-1] != "/":
1764
+ dir_save = dir_save + "/"
1765
+ elif "\\" in dir_save:
1766
+ if dir_save[-1] != "\\":
1767
+ dir_save = dir_save + "\\"
1576
1768
  else:
1577
- raise ValueError('Check the Path of dir_save Directory')
1578
- ftype = fname.split('.')[-1]
1579
- if len(fname.split('.')) == 1:
1580
- ftype = 'nofmt'
1581
- fname = dir_save + fname + '.' + ftype
1769
+ raise ValueError("Check the Path of dir_save Directory")
1770
+ ftype = fname.split(".")[-1]
1771
+ if len(fname.split(".")) == 1:
1772
+ ftype = "nofmt"
1773
+ fname = dir_save + fname + "." + ftype
1582
1774
  else:
1583
1775
  fname = dir_save + fname
1584
1776
  # Save figure based on file type
1585
- if ftype.lower() == 'eps':
1586
- plt.savefig(fname, format='eps', bbox_inches='tight')
1587
- plt.savefig(fname.replace('.eps', '.pdf'),
1588
- format='pdf', bbox_inches='tight',dpi=dpi)
1589
- elif ftype.lower() == 'nofmt': # default: both "tif" and "pdf"
1590
- fname_corr=fname.replace('nofmt','pdf')
1591
- plt.savefig(fname_corr, format='pdf', bbox_inches='tight',dpi=dpi)
1592
- fname=fname.replace('nofmt','tif')
1593
- plt.savefig(fname, format='tiff', dpi=dpi, bbox_inches='tight')
1777
+ if ftype.lower() == "eps":
1778
+ plt.savefig(fname, format="eps", bbox_inches="tight")
1779
+ plt.savefig(
1780
+ fname.replace(".eps", ".pdf"), format="pdf", bbox_inches="tight", dpi=dpi
1781
+ )
1782
+ elif ftype.lower() == "nofmt": # default: both "tif" and "pdf"
1783
+ fname_corr = fname.replace("nofmt", "pdf")
1784
+ plt.savefig(fname_corr, format="pdf", bbox_inches="tight", dpi=dpi)
1785
+ fname = fname.replace("nofmt", "tif")
1786
+ plt.savefig(fname, format="tiff", dpi=dpi, bbox_inches="tight")
1594
1787
  print(f"default saving filetype: both 'tif' and 'pdf")
1595
- elif ftype.lower() == 'pdf':
1596
- plt.savefig(fname, format='pdf', bbox_inches='tight',dpi=dpi)
1597
- elif ftype.lower() in ['jpg', 'jpeg']:
1598
- plt.savefig(fname, format='jpeg', dpi=dpi, bbox_inches='tight')
1599
- elif ftype.lower() == 'png':
1600
- plt.savefig(fname, format='png', dpi=dpi,
1601
- bbox_inches='tight', transparent=True)
1602
- elif ftype.lower() in ['tiff', 'tif']:
1603
- plt.savefig(fname, format='tiff', dpi=dpi, bbox_inches='tight')
1604
- elif ftype.lower() == 'emf':
1605
- plt.savefig(fname, format='emf', dpi=dpi, bbox_inches='tight')
1606
- elif ftype.lower() == 'fig':
1607
- plt.savefig(fname, format='pdf', bbox_inches='tight',dpi=dpi)
1608
- print(f'\nSaved @: dpi={dpi}\n{fname}')
1788
+ elif ftype.lower() == "pdf":
1789
+ plt.savefig(fname, format="pdf", bbox_inches="tight", dpi=dpi)
1790
+ elif ftype.lower() in ["jpg", "jpeg"]:
1791
+ plt.savefig(fname, format="jpeg", dpi=dpi, bbox_inches="tight")
1792
+ elif ftype.lower() == "png":
1793
+ plt.savefig(fname, format="png", dpi=dpi, bbox_inches="tight", transparent=True)
1794
+ elif ftype.lower() in ["tiff", "tif"]:
1795
+ plt.savefig(fname, format="tiff", dpi=dpi, bbox_inches="tight")
1796
+ elif ftype.lower() == "emf":
1797
+ plt.savefig(fname, format="emf", dpi=dpi, bbox_inches="tight")
1798
+ elif ftype.lower() == "fig":
1799
+ plt.savefig(fname, format="pdf", bbox_inches="tight", dpi=dpi)
1800
+ print(f"\nSaved @: dpi={dpi}\n{fname}")
1609
1801
 
1610
1802
 
1611
1803
  def is_str_color(s):
1612
1804
  # Regular expression pattern for hexadecimal color codes
1613
1805
  color_code_pattern = r"^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$"
1614
1806
  return re.match(color_code_pattern, s) is not None
1807
+
1808
+
1615
1809
  def is_num(s):
1616
1810
  """
1617
1811
  Check if a string can be converted to a number (int or float).
@@ -1625,58 +1819,72 @@ def is_num(s):
1625
1819
  return True
1626
1820
  except ValueError:
1627
1821
  return False
1822
+
1823
+
1628
1824
  def isnum(s):
1629
1825
  return is_num(s)
1826
+
1827
+
1630
1828
  def is_image(fpath):
1631
1829
  mime_type, _ = mimetypes.guess_type(fpath)
1632
- if mime_type and mime_type.startswith('image'):
1830
+ if mime_type and mime_type.startswith("image"):
1633
1831
  return True
1634
1832
  else:
1635
1833
  return False
1834
+
1835
+
1636
1836
  def is_document(fpath):
1637
1837
  mime_type, _ = mimetypes.guess_type(fpath)
1638
1838
  if mime_type and (
1639
- mime_type.startswith('text/') or
1640
- mime_type == 'application/pdf' or
1641
- mime_type == 'application/msword' or
1642
- mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
1643
- mime_type == 'application/vnd.ms-excel' or
1644
- mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
1645
- mime_type == 'application/vnd.ms-powerpoint' or
1646
- mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
1839
+ mime_type.startswith("text/")
1840
+ or mime_type == "application/pdf"
1841
+ or mime_type == "application/msword"
1842
+ or mime_type
1843
+ == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
1844
+ or mime_type == "application/vnd.ms-excel"
1845
+ or mime_type
1846
+ == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
1847
+ or mime_type == "application/vnd.ms-powerpoint"
1848
+ or mime_type
1849
+ == "application/vnd.openxmlformats-officedocument.presentationml.presentation"
1647
1850
  ):
1648
1851
  return True
1649
1852
  else:
1650
1853
  return False
1854
+
1855
+
1651
1856
  def is_zip(fpath):
1652
1857
  mime_type, _ = mimetypes.guess_type(fpath)
1653
- if mime_type == 'application/zip':
1858
+ if mime_type == "application/zip":
1654
1859
  return True
1655
1860
  else:
1656
1861
  return False
1657
1862
 
1658
1863
 
1659
- def adjust_spines(ax=None, spines=['left', 'bottom'],distance=2):
1864
+ def adjust_spines(ax=None, spines=["left", "bottom"], distance=2):
1660
1865
  if ax is None:
1661
1866
  ax = plt.gca()
1662
1867
  for loc, spine in ax.spines.items():
1663
1868
  if loc in spines:
1664
- spine.set_position(('outward', distance)) # outward by 2 points
1869
+ spine.set_position(("outward", distance)) # outward by 2 points
1665
1870
  # spine.set_smart_bounds(True)
1666
1871
  else:
1667
- spine.set_color('none') # don't draw spine
1872
+ spine.set_color("none") # don't draw spine
1668
1873
  # turn off ticks where there is no spine
1669
- if 'left' in spines:
1670
- ax.yaxis.set_ticks_position('left')
1874
+ if "left" in spines:
1875
+ ax.yaxis.set_ticks_position("left")
1671
1876
  else:
1672
1877
  ax.yaxis.set_ticks([])
1673
- if 'bottom' in spines:
1674
- ax.xaxis.set_ticks_position('bottom')
1878
+ if "bottom" in spines:
1879
+ ax.xaxis.set_ticks_position("bottom")
1675
1880
  else:
1676
1881
  # no xaxis ticks
1677
1882
  ax.xaxis.set_ticks([])
1883
+
1884
+
1678
1885
  # And then plot the data:
1679
1886
 
1887
+
1680
1888
  def add_colorbar(im, width=None, pad=None, **kwargs):
1681
1889
  # usage: add_colorbar(im, width=0.01, pad=0.005, label="PSD (dB)", shrink=0.8)
1682
1890
  l, b, w, h = im.axes.get_position().bounds # get boundaries
@@ -1685,6 +1893,8 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
1685
1893
  fig = im.axes.figure # get figure of image
1686
1894
  cax = fig.add_axes([l + w + pad, b, width, h]) # define cbar Axes
1687
1895
  return fig.colorbar(im, cax=cax, **kwargs) # draw cbar
1896
+
1897
+
1688
1898
  # =============================================================================
1689
1899
  # # for plot figures: setting rcParams
1690
1900
  # usage: set_pub()
@@ -1697,13 +1907,16 @@ def add_colorbar(im, width=None, pad=None, **kwargs):
1697
1907
 
1698
1908
 
1699
1909
  def list2str(x_str):
1700
- s = ''.join(str(x) for x in x_str)
1910
+ s = "".join(str(x) for x in x_str)
1701
1911
  return s
1912
+
1913
+
1702
1914
  def str2list(str_):
1703
1915
  l = []
1704
1916
  [l.append(x) for x in str_]
1705
1917
  return l
1706
1918
 
1919
+
1707
1920
  def load_img(fpath):
1708
1921
  """
1709
1922
  Load an image from the specified file path.
@@ -1723,6 +1936,7 @@ def load_img(fpath):
1723
1936
  except OSError:
1724
1937
  raise OSError(f"Unable to open file '{fpath}' or it is not a valid image file.")
1725
1938
 
1939
+
1726
1940
  def apply_filter(img, *args):
1727
1941
  # def apply_filter(img, filter_name, filter_value=None):
1728
1942
  """
@@ -1734,42 +1948,47 @@ def apply_filter(img, *args):
1734
1948
  Returns:
1735
1949
  PIL.Image: The filtered image.
1736
1950
  """
1951
+
1737
1952
  def correct_filter_name(filter_name):
1738
- if 'bl' in filter_name.lower() and 'box' not in filter_name.lower():
1739
- return 'BLUR'
1740
- elif 'cont' in filter_name.lower():
1741
- return 'Contour'
1742
- elif 'det' in filter_name.lower():
1743
- return 'Detail'
1744
- elif 'edg' in filter_name.lower() and 'mo' not in filter_name.lower() and 'f' not in filter_name.lower():
1745
- return 'EDGE_ENHANCE'
1746
- elif 'edg' in filter_name.lower() and 'mo' in filter_name.lower():
1747
- return 'EDGE_ENHANCE_MORE'
1748
- elif 'emb' in filter_name.lower():
1749
- return 'EMBOSS'
1750
- elif 'edg' in filter_name.lower() and 'f' in filter_name.lower():
1751
- return 'FIND_EDGES'
1752
- elif 'sh' in filter_name.lower() and 'mo' not in filter_name.lower():
1753
- return 'SHARPEN'
1754
- elif 'sm' in filter_name.lower() and 'mo' not in filter_name.lower():
1755
- return 'SMOOTH'
1756
- elif 'sm' in filter_name.lower() and 'mo' in filter_name.lower():
1757
- return 'SMOOTH_MORE'
1758
- elif 'min' in filter_name.lower():
1759
- return 'MIN_FILTER'
1760
- elif 'max' in filter_name.lower():
1761
- return 'MAX_FILTER'
1762
- elif 'mod' in filter_name.lower():
1763
- return 'MODE_FILTER'
1764
- elif 'mul' in filter_name.lower():
1765
- return 'MULTIBAND_FILTER'
1766
- elif 'gau' in filter_name.lower():
1767
- return 'GAUSSIAN_BLUR'
1768
- elif 'box' in filter_name.lower():
1769
- return 'BOX_BLUR'
1770
- elif 'med' in filter_name.lower():
1771
- return 'MEDIAN_FILTER'
1772
- else:
1953
+ if "bl" in filter_name.lower() and "box" not in filter_name.lower():
1954
+ return "BLUR"
1955
+ elif "cont" in filter_name.lower():
1956
+ return "Contour"
1957
+ elif "det" in filter_name.lower():
1958
+ return "Detail"
1959
+ elif (
1960
+ "edg" in filter_name.lower()
1961
+ and "mo" not in filter_name.lower()
1962
+ and "f" not in filter_name.lower()
1963
+ ):
1964
+ return "EDGE_ENHANCE"
1965
+ elif "edg" in filter_name.lower() and "mo" in filter_name.lower():
1966
+ return "EDGE_ENHANCE_MORE"
1967
+ elif "emb" in filter_name.lower():
1968
+ return "EMBOSS"
1969
+ elif "edg" in filter_name.lower() and "f" in filter_name.lower():
1970
+ return "FIND_EDGES"
1971
+ elif "sh" in filter_name.lower() and "mo" not in filter_name.lower():
1972
+ return "SHARPEN"
1973
+ elif "sm" in filter_name.lower() and "mo" not in filter_name.lower():
1974
+ return "SMOOTH"
1975
+ elif "sm" in filter_name.lower() and "mo" in filter_name.lower():
1976
+ return "SMOOTH_MORE"
1977
+ elif "min" in filter_name.lower():
1978
+ return "MIN_FILTER"
1979
+ elif "max" in filter_name.lower():
1980
+ return "MAX_FILTER"
1981
+ elif "mod" in filter_name.lower():
1982
+ return "MODE_FILTER"
1983
+ elif "mul" in filter_name.lower():
1984
+ return "MULTIBAND_FILTER"
1985
+ elif "gau" in filter_name.lower():
1986
+ return "GAUSSIAN_BLUR"
1987
+ elif "box" in filter_name.lower():
1988
+ return "BOX_BLUR"
1989
+ elif "med" in filter_name.lower():
1990
+ return "MEDIAN_FILTER"
1991
+ else:
1773
1992
  supported_filters = [
1774
1993
  "BLUR",
1775
1994
  "CONTOUR",
@@ -1843,21 +2062,232 @@ def apply_filter(img, *args):
1843
2062
  return img.filter(supported_filters[filter_name](bands))
1844
2063
  else:
1845
2064
  if filter_value is not None:
1846
- print(f"{filter_name} doesn't require a value for {filter_value}, but it remains unaffected")
2065
+ print(
2066
+ f"{filter_name} doesn't require a value for {filter_value}, but it remains unaffected"
2067
+ )
1847
2068
  return img.filter(supported_filters[filter_name])
1848
2069
 
1849
2070
 
1850
- def imgsets(
1851
- img,
1852
- sets=None,
1853
- show=True,
1854
- show_axis=False,
1855
- size=None,
1856
- dpi=100,
1857
- figsize=None,
1858
- auto=False,
1859
- filter_kws=None,
1860
- ):
2071
+ # def imgsetss(
2072
+ # img,
2073
+ # sets=None,
2074
+ # show=True,
2075
+ # show_axis=False,
2076
+ # size=None,
2077
+ # dpi=100,
2078
+ # figsize=None,
2079
+ # auto=False,
2080
+ # filter_kws=None,
2081
+ # ):
2082
+ # """
2083
+ # Apply various enhancements and filters to an image using PIL's ImageEnhance and ImageFilter modules.
2084
+
2085
+ # Args:
2086
+ # img (PIL.Image): The input image.
2087
+ # sets (dict): A dictionary specifying the enhancements, filters, and their parameters.
2088
+ # show (bool): Whether to display the enhanced image.
2089
+ # show_axis (bool): Whether to display axes on the image plot.
2090
+ # size (tuple): The size of the thumbnail, cover, contain, or fit operation.
2091
+ # dpi (int): Dots per inch for the displayed image.
2092
+ # figsize (tuple): The size of the figure for displaying the image.
2093
+ # auto (bool): Whether to automatically enhance the image based on its characteristics.
2094
+
2095
+ # Returns:
2096
+ # PIL.Image: The enhanced image.
2097
+
2098
+ # Supported enhancements and filters:
2099
+ # - "sharpness": Adjusts the sharpness of the image. Values > 1 increase sharpness, while values < 1 decrease sharpness.
2100
+ # - "contrast": Adjusts the contrast of the image. Values > 1 increase contrast, while values < 1 decrease contrast.
2101
+ # - "brightness": Adjusts the brightness of the image. Values > 1 increase brightness, while values < 1 decrease brightness.
2102
+ # - "color": Adjusts the color saturation of the image. Values > 1 increase saturation, while values < 1 decrease saturation.
2103
+ # - "rotate": Rotates the image by the specified angle.
2104
+ # - "crop" or "cut": Crops the image. The value should be a tuple specifying the crop box as (left, upper, right, lower).
2105
+ # - "size": Resizes the image to the specified dimensions.
2106
+ # - "thumbnail": Resizes the image to fit within the given size while preserving aspect ratio.
2107
+ # - "cover": Resizes and crops the image to fill the specified size.
2108
+ # - "contain": Resizes the image to fit within the specified size, adding borders if necessary.
2109
+ # - "fit": Resizes and pads the image to fit within the specified size.
2110
+ # - "filter": Applies various filters to the image (e.g., BLUR, CONTOUR, EDGE_ENHANCE).
2111
+
2112
+ # Note:
2113
+ # The "color" and "enhance" enhancements are not implemented in this function.
2114
+ # """
2115
+ # supported_filters = [
2116
+ # "BLUR",
2117
+ # "CONTOUR",
2118
+ # "DETAIL",
2119
+ # "EDGE_ENHANCE",
2120
+ # "EDGE_ENHANCE_MORE",
2121
+ # "EMBOSS",
2122
+ # "FIND_EDGES",
2123
+ # "SHARPEN",
2124
+ # "SMOOTH",
2125
+ # "SMOOTH_MORE",
2126
+ # "MIN_FILTER",
2127
+ # "MAX_FILTER",
2128
+ # "MODE_FILTER",
2129
+ # "MULTIBAND_FILTER",
2130
+ # "GAUSSIAN_BLUR",
2131
+ # "BOX_BLUR",
2132
+ # "MEDIAN_FILTER",
2133
+ # ]
2134
+ # print("sets: a dict,'sharp:1.2','color','contrast:'auto' or 1.2','bright', 'crop: x_upperleft,y_upperleft, x_lowerright, y_lowerright','rotation','resize','rem or background'")
2135
+ # print(f"usage: filter_kws 'dict' below:")
2136
+ # pp([str(i).lower() for i in supported_filters])
2137
+ # print("\nlog:\n")
2138
+ # def confirm_rembg_models(model_name):
2139
+ # models_support = [
2140
+ # "u2net",
2141
+ # "u2netp",
2142
+ # "u2net_human_seg",
2143
+ # "u2net_cloth_seg",
2144
+ # "silueta",
2145
+ # "isnet-general-use",
2146
+ # "isnet-anime",
2147
+ # "sam",
2148
+ # ]
2149
+ # if model_name in models_support:
2150
+ # print(f"model_name: {model_name}")
2151
+ # return model_name
2152
+ # else:
2153
+ # print(f"{model_name} cannot be found, check the name:{models_support}, default('isnet-general-use') has been used")
2154
+ # return "isnet-general-use"
2155
+ # def auto_enhance(img):
2156
+ # """
2157
+ # Automatically enhances the image based on its characteristics.
2158
+ # Args:
2159
+ # img (PIL.Image): The input image.
2160
+ # Returns:
2161
+ # dict: A dictionary containing the optimal enhancement values.
2162
+ # """
2163
+ # # Determine the bit depth based on the image mode
2164
+ # if img.mode in ["1", "L", "P", "RGB", "YCbCr", "LAB", "HSV"]:
2165
+ # # 8-bit depth per channel
2166
+ # bit_depth = 8
2167
+ # elif img.mode in ["RGBA", "CMYK"]:
2168
+ # # 8-bit depth per channel + alpha (RGBA) or additional channels (CMYK)
2169
+ # bit_depth = 8
2170
+ # elif img.mode in ["I", "F"]:
2171
+ # # 16-bit depth per channel (integer or floating-point)
2172
+ # bit_depth = 16
2173
+ # else:
2174
+ # raise ValueError("Unsupported image mode")
2175
+ # # Calculate the brightness and contrast for each channel
2176
+ # num_channels = len(img.getbands())
2177
+ # brightness_factors = []
2178
+ # contrast_factors = []
2179
+ # for channel in range(num_channels):
2180
+ # channel_histogram = img.split()[channel].histogram()
2181
+ # brightness = sum(i * w for i, w in enumerate(channel_histogram))/sum(channel_histogram)
2182
+ # channel_min, channel_max = img.split()[channel].getextrema()
2183
+ # contrast = channel_max - channel_min
2184
+ # # Adjust calculations based on bit depth
2185
+ # normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
2186
+ # brightness_factor = (1.0 + (brightness - normalization_factor / 2) / normalization_factor)
2187
+ # contrast_factor = (1.0 + (contrast - normalization_factor / 2) / normalization_factor)
2188
+ # brightness_factors.append(brightness_factor)
2189
+ # contrast_factors.append(contrast_factor)
2190
+ # # Calculate the average brightness and contrast factors across channels
2191
+ # avg_brightness_factor = sum(brightness_factors) / num_channels
2192
+ # avg_contrast_factor = sum(contrast_factors) / num_channels
2193
+ # return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
2194
+ # # Load image if input is a file path
2195
+ # if isinstance(img, str):
2196
+ # img = load_img(img)
2197
+ # img_update = img.copy()
2198
+ # # Auto-enhance image if requested
2199
+ # if auto:
2200
+ # auto_params = auto_enhance(img_update)
2201
+ # sets.update(auto_params)
2202
+ # if sets is None:
2203
+ # sets = {}
2204
+ # for k, value in sets.items():
2205
+ # if "shar" in k.lower():
2206
+ # enhancer = ImageEnhance.Sharpness(img_update)
2207
+ # img_update = enhancer.enhance(value)
2208
+ # elif "col" in k.lower() and 'bg' not in k.lower():
2209
+ # enhancer = ImageEnhance.Color(img_update)
2210
+ # img_update = enhancer.enhance(value)
2211
+ # elif "contr" in k.lower():
2212
+ # if value and isinstance(value,(float,int)):
2213
+ # enhancer = ImageEnhance.Contrast(img_update)
2214
+ # img_update = enhancer.enhance(value)
2215
+ # else:
2216
+ # print('autocontrasted')
2217
+ # img_update = ImageOps.autocontrast(img_update)
2218
+ # elif "bri" in k.lower():
2219
+ # enhancer = ImageEnhance.Brightness(img_update)
2220
+ # img_update = enhancer.enhance(value)
2221
+ # elif "cro" in k.lower() or "cut" in k.lower():
2222
+ # img_update=img_update.crop(value)
2223
+ # elif "rota" in k.lower():
2224
+ # img_update = img_update.rotate(value)
2225
+ # elif "si" in k.lower():
2226
+ # img_update = img_update.resize(value)
2227
+ # elif "thum" in k.lower():
2228
+ # img_update.thumbnail(value)
2229
+ # elif "cover" in k.lower():
2230
+ # img_update = ImageOps.cover(img_update, size=value)
2231
+ # elif "contain" in k.lower():
2232
+ # img_update = ImageOps.contain(img_update, size=value)
2233
+ # elif "fit" in k.lower():
2234
+ # img_update = ImageOps.fit(img_update, size=value)
2235
+ # elif "pad" in k.lower():
2236
+ # img_update = ImageOps.pad(img_update, size=value)
2237
+ # elif 'rem' in k.lower() or 'rm' in k.lower() or 'back' in k.lower():
2238
+ # if value and isinstance(value,(int,float,list)):
2239
+ # print('example usage: {"rm":[alpha_matting_background_threshold(20),alpha_matting_foreground_threshold(270),alpha_matting_erode_sive(11)]}')
2240
+ # print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
2241
+ # # ### Parameters:
2242
+ # # data (Union[bytes, PILImage, np.ndarray]): The input image data.
2243
+ # # alpha_matting (bool, optional): Flag indicating whether to use alpha matting. Defaults to False.
2244
+ # # alpha_matting_foreground_threshold (int, optional): Foreground threshold for alpha matting. Defaults to 240.
2245
+ # # alpha_matting_background_threshold (int, optional): Background threshold for alpha matting. Defaults to 10.
2246
+ # # alpha_matting_erode_size (int, optional): Erosion size for alpha matting. Defaults to 10.
2247
+ # # session (Optional[BaseSession], optional): A session object for the 'u2net' model. Defaults to None.
2248
+ # # only_mask (bool, optional): Flag indicating whether to return only the binary masks. Defaults to False.
2249
+ # # post_process_mask (bool, optional): Flag indicating whether to post-process the masks. Defaults to False.
2250
+ # # bgcolor (Optional[Tuple[int, int, int, int]], optional): Background color for the cutout image. Defaults to None.
2251
+ # # ###
2252
+ # if isinstance(value,int):
2253
+ # value=[value]
2254
+ # if len(value) <2:
2255
+ # img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value)
2256
+ # elif 2<=len(value)<3:
2257
+ # img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1])
2258
+ # elif 3<=len(value)<4:
2259
+ # img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1],alpha_matting_erode_size=value[2])
2260
+ # if isinstance(value,tuple): # replace the background color
2261
+ # if len(value)==3:
2262
+ # value+=(255,)
2263
+ # img_update = remove(img_update, bgcolor=value)
2264
+ # if isinstance(value,str):
2265
+ # if confirm_rembg_models(value):
2266
+ # img_update=remove(img_update,session=new_session(value))
2267
+ # else:
2268
+ # img_update=remove(img_update)
2269
+ # elif 'bgcolor' in k.lower():
2270
+ # if isinstance(value,list):
2271
+ # value=tuple(value)
2272
+ # if isinstance(value,tuple): # replace the background color
2273
+ # if len(value)==3:
2274
+ # value+=(255,)
2275
+ # img_update = remove(img_update, bgcolor=value)
2276
+ # if filter_kws:
2277
+ # for filter_name, filter_value in filter_kws.items():
2278
+ # img_update = apply_filter(img_update, filter_name, filter_value)
2279
+ # # Display the image if requested
2280
+ # if show:
2281
+ # if figsize is None:
2282
+ # plt.figure(dpi=dpi)
2283
+ # else:
2284
+ # plt.figure(figsize=figsize, dpi=dpi)
2285
+ # plt.imshow(img_update)
2286
+ # plt.axis("on") if show_axis else plt.axis("off")
2287
+ # return img_update
2288
+
2289
+
2290
+ def imgsets(img, **kwargs):
1861
2291
  """
1862
2292
  Apply various enhancements and filters to an image using PIL's ImageEnhance and ImageFilter modules.
1863
2293
 
@@ -1892,28 +2322,31 @@ def imgsets(
1892
2322
  The "color" and "enhance" enhancements are not implemented in this function.
1893
2323
  """
1894
2324
  supported_filters = [
1895
- "BLUR",
1896
- "CONTOUR",
1897
- "DETAIL",
1898
- "EDGE_ENHANCE",
1899
- "EDGE_ENHANCE_MORE",
1900
- "EMBOSS",
1901
- "FIND_EDGES",
1902
- "SHARPEN",
1903
- "SMOOTH",
1904
- "SMOOTH_MORE",
1905
- "MIN_FILTER",
1906
- "MAX_FILTER",
1907
- "MODE_FILTER",
1908
- "MULTIBAND_FILTER",
1909
- "GAUSSIAN_BLUR",
1910
- "BOX_BLUR",
1911
- "MEDIAN_FILTER",
1912
- ]
1913
- print("sets: a dict,'sharp:1.2','color','contrast:'auto' or 1.2','bright', 'crop: x_upperleft,y_upperleft, x_lowerright, y_lowerright','rotation','resize','rem or background'")
2325
+ "BLUR",
2326
+ "CONTOUR",
2327
+ "DETAIL",
2328
+ "EDGE_ENHANCE",
2329
+ "EDGE_ENHANCE_MORE",
2330
+ "EMBOSS",
2331
+ "FIND_EDGES",
2332
+ "SHARPEN",
2333
+ "SMOOTH",
2334
+ "SMOOTH_MORE",
2335
+ "MIN_FILTER",
2336
+ "MAX_FILTER",
2337
+ "MODE_FILTER",
2338
+ "MULTIBAND_FILTER",
2339
+ "GAUSSIAN_BLUR",
2340
+ "BOX_BLUR",
2341
+ "MEDIAN_FILTER",
2342
+ ]
2343
+ print(
2344
+ "sets: a dict,'sharp:1.2','color','contrast:'auto' or 1.2','bright', 'crop: x_upperleft,y_upperleft, x_lowerright, y_lowerright','rotation','resize','rem or background'"
2345
+ )
1914
2346
  print(f"usage: filter_kws 'dict' below:")
1915
2347
  pp([str(i).lower() for i in supported_filters])
1916
2348
  print("\nlog:\n")
2349
+
1917
2350
  def confirm_rembg_models(model_name):
1918
2351
  models_support = [
1919
2352
  "u2net",
@@ -1929,8 +2362,11 @@ def imgsets(
1929
2362
  print(f"model_name: {model_name}")
1930
2363
  return model_name
1931
2364
  else:
1932
- print(f"{model_name} cannot be found, check the name:{models_support}, default('isnet-general-use') has been used")
2365
+ print(
2366
+ f"{model_name} cannot be found, check the name:{models_support}, default('isnet-general-use') has been used"
2367
+ )
1933
2368
  return "isnet-general-use"
2369
+
1934
2370
  def auto_enhance(img):
1935
2371
  """
1936
2372
  Automatically enhances the image based on its characteristics.
@@ -1957,48 +2393,61 @@ def imgsets(
1957
2393
  contrast_factors = []
1958
2394
  for channel in range(num_channels):
1959
2395
  channel_histogram = img.split()[channel].histogram()
1960
- brightness = sum(i * w for i, w in enumerate(channel_histogram))/sum(channel_histogram)
2396
+ brightness = sum(i * w for i, w in enumerate(channel_histogram)) / sum(
2397
+ channel_histogram
2398
+ )
1961
2399
  channel_min, channel_max = img.split()[channel].getextrema()
1962
2400
  contrast = channel_max - channel_min
1963
2401
  # Adjust calculations based on bit depth
1964
2402
  normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
1965
- brightness_factor = (1.0 + (brightness - normalization_factor / 2) / normalization_factor)
1966
- contrast_factor = (1.0 + (contrast - normalization_factor / 2) / normalization_factor)
2403
+ brightness_factor = (
2404
+ 1.0 + (brightness - normalization_factor / 2) / normalization_factor
2405
+ )
2406
+ contrast_factor = (
2407
+ 1.0 + (contrast - normalization_factor / 2) / normalization_factor
2408
+ )
1967
2409
  brightness_factors.append(brightness_factor)
1968
2410
  contrast_factors.append(contrast_factor)
1969
2411
  # Calculate the average brightness and contrast factors across channels
1970
2412
  avg_brightness_factor = sum(brightness_factors) / num_channels
1971
2413
  avg_contrast_factor = sum(contrast_factors) / num_channels
1972
2414
  return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
2415
+
1973
2416
  # Load image if input is a file path
1974
2417
  if isinstance(img, str):
1975
2418
  img = load_img(img)
1976
2419
  img_update = img.copy()
1977
2420
  # Auto-enhance image if requested
2421
+
2422
+ auto = kwargs.get("auto", False)
2423
+ show = kwargs.get("show", True)
2424
+ show_axis = kwargs.get("show_axis", False)
2425
+ size = kwargs.get("size", None)
2426
+ figsize = kwargs.get("figsize", None)
2427
+ dpi = kwargs.get("dpi", 100)
2428
+
1978
2429
  if auto:
1979
- auto_params = auto_enhance(img_update)
1980
- sets.update(auto_params)
1981
- if sets is None:
1982
- sets = {}
1983
- for k, value in sets.items():
2430
+ kwargs = {**auto_enhance(img_update), **kwargs}
2431
+
2432
+ for k, value in kwargs.items():
1984
2433
  if "shar" in k.lower():
1985
2434
  enhancer = ImageEnhance.Sharpness(img_update)
1986
2435
  img_update = enhancer.enhance(value)
1987
- elif "col" in k.lower() and 'bg' not in k.lower():
2436
+ elif "col" in k.lower() and "bg" not in k.lower():
1988
2437
  enhancer = ImageEnhance.Color(img_update)
1989
2438
  img_update = enhancer.enhance(value)
1990
2439
  elif "contr" in k.lower():
1991
- if value and isinstance(value,(float,int)):
2440
+ if value and isinstance(value, (float, int)):
1992
2441
  enhancer = ImageEnhance.Contrast(img_update)
1993
2442
  img_update = enhancer.enhance(value)
1994
2443
  else:
1995
- print('autocontrasted')
2444
+ print("autocontrasted")
1996
2445
  img_update = ImageOps.autocontrast(img_update)
1997
2446
  elif "bri" in k.lower():
1998
2447
  enhancer = ImageEnhance.Brightness(img_update)
1999
2448
  img_update = enhancer.enhance(value)
2000
2449
  elif "cro" in k.lower() or "cut" in k.lower():
2001
- img_update=img_update.crop(value)
2450
+ img_update = img_update.crop(value)
2002
2451
  elif "rota" in k.lower():
2003
2452
  img_update = img_update.rotate(value)
2004
2453
  elif "si" in k.lower():
@@ -2010,12 +2459,22 @@ def imgsets(
2010
2459
  elif "contain" in k.lower():
2011
2460
  img_update = ImageOps.contain(img_update, size=value)
2012
2461
  elif "fit" in k.lower():
2013
- img_update = ImageOps.fit(img_update, size=value)
2462
+ if isinstance(value, dict):
2463
+ if filter_kws:
2464
+ for filter_name, filter_value in filter_kws.items():
2465
+ img_update = apply_filter(img_update, filter_name, filter_value)
2466
+ else:
2467
+ img_update = ImageOps.fit(img_update, size=value)
2014
2468
  elif "pad" in k.lower():
2015
2469
  img_update = ImageOps.pad(img_update, size=value)
2016
- elif 'rem' in k.lower() or 'rm' in k.lower() or 'back' in k.lower():
2017
- if value and isinstance(value,(int,float,list)):
2018
- print('example usage: {"rm":[alpha_matting_background_threshold(20),alpha_matting_foreground_threshold(270),alpha_matting_erode_sive(11)]}')
2470
+ elif "rem" in k.lower() or "rm" in k.lower() or "back" in k.lower():
2471
+ if isinstance(value, bool):
2472
+ session = new_session("isnet-general-use")
2473
+ img_update = remove(img_update, session=session)
2474
+ elif value and isinstance(value, (int, float, list)):
2475
+ print(
2476
+ 'example usage: {"rm":[alpha_matting_background_threshold(20),alpha_matting_foreground_threshold(270),alpha_matting_erode_sive(11)]}'
2477
+ )
2019
2478
  print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
2020
2479
  # ### Parameters:
2021
2480
  # data (Union[bytes, PILImage, np.ndarray]): The input image data.
@@ -2028,33 +2487,45 @@ def imgsets(
2028
2487
  # post_process_mask (bool, optional): Flag indicating whether to post-process the masks. Defaults to False.
2029
2488
  # bgcolor (Optional[Tuple[int, int, int, int]], optional): Background color for the cutout image. Defaults to None.
2030
2489
  # ###
2031
- if isinstance(value,int):
2032
- value=[value]
2033
- if len(value) <2:
2034
- img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value)
2035
- elif 2<=len(value)<3:
2036
- img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1])
2037
- elif 3<=len(value)<4:
2038
- img_update = remove(img_update,alpha_matting=True,alpha_matting_background_threshold=value[0],alpha_matting_foreground_threshold=value[1],alpha_matting_erode_size=value[2])
2039
- if isinstance(value,tuple): # replace the background color
2040
- if len(value)==3:
2041
- value+=(255,)
2490
+ if isinstance(value, int):
2491
+ value = [value]
2492
+ if len(value) < 2:
2493
+ img_update = remove(
2494
+ img_update,
2495
+ alpha_matting=True,
2496
+ alpha_matting_background_threshold=value,
2497
+ )
2498
+ elif 2 <= len(value) < 3:
2499
+ img_update = remove(
2500
+ img_update,
2501
+ alpha_matting=True,
2502
+ alpha_matting_background_threshold=value[0],
2503
+ alpha_matting_foreground_threshold=value[1],
2504
+ )
2505
+ elif 3 <= len(value) < 4:
2506
+ img_update = remove(
2507
+ img_update,
2508
+ alpha_matting=True,
2509
+ alpha_matting_background_threshold=value[0],
2510
+ alpha_matting_foreground_threshold=value[1],
2511
+ alpha_matting_erode_size=value[2],
2512
+ )
2513
+ elif isinstance(value, tuple): # replace the background color
2514
+ if len(value) == 3:
2515
+ value += (255,)
2042
2516
  img_update = remove(img_update, bgcolor=value)
2043
- if isinstance(value,str):
2517
+ elif isinstance(value, str):
2044
2518
  if confirm_rembg_models(value):
2045
- img_update=remove(img_update,session=new_session(value))
2519
+ img_update = remove(img_update, session=new_session(value))
2046
2520
  else:
2047
- img_update=remove(img_update)
2048
- elif 'bgcolor' in k.lower():
2049
- if isinstance(value,list):
2050
- value=tuple(value)
2051
- if isinstance(value,tuple): # replace the background color
2052
- if len(value)==3:
2053
- value+=(255,)
2521
+ img_update = remove(img_update)
2522
+ elif "bg" in k.lower() and "color" in k.lower():
2523
+ if isinstance(value, list):
2524
+ value = tuple(value)
2525
+ if isinstance(value, tuple): # replace the background color
2526
+ if len(value) == 3:
2527
+ value += (255,)
2054
2528
  img_update = remove(img_update, bgcolor=value)
2055
- if filter_kws:
2056
- for filter_name, filter_value in filter_kws.items():
2057
- img_update = apply_filter(img_update, filter_name, filter_value)
2058
2529
  # Display the image if requested
2059
2530
  if show:
2060
2531
  if figsize is None:
@@ -2064,6 +2535,8 @@ def imgsets(
2064
2535
  plt.imshow(img_update)
2065
2536
  plt.axis("on") if show_axis else plt.axis("off")
2066
2537
  return img_update
2538
+
2539
+
2067
2540
  # # usage:
2068
2541
  # img = imgsets(
2069
2542
  # fpath,
@@ -2074,26 +2547,26 @@ def imgsets(
2074
2547
  # )
2075
2548
 
2076
2549
 
2077
- def thumbnail(dir_img_list,figsize=(10,10),dpi=100, dir_save=None, kind='.png'):
2550
+ def thumbnail(dir_img_list, figsize=(10, 10), dpi=100, dir_save=None, kind=".png"):
2078
2551
  """
2079
2552
  Display a thumbnail figure of all images in the specified directory.
2080
2553
  Args:
2081
2554
  dir_img_list (list): List of the Directory containing the images.
2082
2555
  """
2083
2556
  num_images = len(dir_img_list)
2084
- if not kind.startswith('.'):
2085
- kind='.'+kind
2557
+ if not kind.startswith("."):
2558
+ kind = "." + kind
2086
2559
 
2087
2560
  if num_images == 0:
2088
2561
  print("No images found to display.")
2089
2562
  return
2090
- grid_size = int(num_images ** 0.5) + 1 # Determine grid size
2091
- fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize,dpi=dpi)
2563
+ grid_size = int(num_images**0.5) + 1 # Determine grid size
2564
+ fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize, dpi=dpi)
2092
2565
  for ax, image_file in zip(axs.flatten(), dir_img_list):
2093
2566
  try:
2094
2567
  img = Image.open(image_file)
2095
2568
  ax.imshow(img)
2096
- ax.axis('off')
2569
+ ax.axis("off")
2097
2570
  except:
2098
2571
  continue
2099
2572
  # for ax in axs.flatten():
@@ -2104,13 +2577,15 @@ def thumbnail(dir_img_list,figsize=(10,10),dpi=100, dir_save=None, kind='.png'):
2104
2577
  plt.show()
2105
2578
  else:
2106
2579
  if basename(dir_save):
2107
- fname= basename(dir_save) +kind
2580
+ fname = basename(dir_save) + kind
2108
2581
  else:
2109
- fname= "_thumbnail_"+basename(dirname(dir_save)[:-1])+'.png'
2582
+ fname = "_thumbnail_" + basename(dirname(dir_save)[:-1]) + ".png"
2110
2583
  if dirname(dir_img_list[0]) == dirname(dir_save):
2111
- figsave(dirname(dir_save[:-1]),fname)
2584
+ figsave(dirname(dir_save[:-1]), fname)
2112
2585
  else:
2113
- figsave(dirname(dir_save),fname)
2586
+ figsave(dirname(dir_save), fname)
2587
+
2588
+
2114
2589
  # usage:
2115
2590
  # fpath = "/Users/macjianfeng/Dropbox/github/python/py2ls/tests/xample_netfinder/images/"
2116
2591
  # thumbnail(listdir(fpath,'png').fpath.to_list(),dir_save=dirname(fpath))
@@ -2127,6 +2602,8 @@ def read_mplstyle(style_file):
2127
2602
  for i, j in style_dict.items():
2128
2603
  print(f"\n{i}::::{j}")
2129
2604
  return style_dict
2605
+
2606
+
2130
2607
  # #example usage:
2131
2608
  # style_file = "/ std-colors.mplstyle"
2132
2609
  # style_dict = read_mplstyle(style_file)
@@ -2150,8 +2627,10 @@ def dir_lib(lib_oi):
2150
2627
  else:
2151
2628
  print(f"Cannot find the {lib_oi} in site-packages directory.")
2152
2629
  return dir_list
2630
+
2631
+
2153
2632
  # example usage:
2154
- # dir_lib("seaborn")
2633
+ # dir_lib("seaborn")
2155
2634
 
2156
2635
 
2157
2636
  # set up the colorlist, give the number, or the colormap's name
@@ -2162,6 +2641,7 @@ def get_color(n=1, cmap="auto", by="start"):
2162
2641
  colors = [cmap_(i) for i in range(cmap_.N)]
2163
2642
  return [matplotlib.colors.rgb2hex(color) for color in colors]
2164
2643
  # usage: clist = cmap2hex("viridis")
2644
+
2165
2645
  # cycle times, total number is n (defaultn=10)
2166
2646
  def cycle2list(colorlist, n=10):
2167
2647
  cycler_ = cycler(tmp=colorlist)
@@ -2171,18 +2651,21 @@ def get_color(n=1, cmap="auto", by="start"):
2171
2651
  if i > n:
2172
2652
  break
2173
2653
  return clist
2654
+
2174
2655
  def hue2rgb(hex_colors):
2175
2656
  def hex_to_rgb(hex_color):
2176
2657
  """Converts a hexadecimal color code to RGB values."""
2177
2658
  if hex_colors.startswith("#"):
2178
2659
  hex_color = hex_color.lstrip("#")
2179
2660
  return tuple(int(hex_color[i : i + 2], 16) / 255.0 for i in (0, 2, 4))
2661
+
2180
2662
  if isinstance(hex_colors, str):
2181
2663
  return hex_to_rgb(hex_colors)
2182
2664
  elif isinstance(hex_colors, (list)):
2183
2665
  """Converts a list of hexadecimal color codes to a list of RGB values."""
2184
2666
  rgb_values = [hex_to_rgb(hex_color) for hex_color in hex_colors]
2185
2667
  return rgb_values
2668
+
2186
2669
  if "aut" in cmap:
2187
2670
  colorlist = [
2188
2671
  "#474747",
@@ -2195,7 +2678,7 @@ def get_color(n=1, cmap="auto", by="start"):
2195
2678
  ]
2196
2679
  else:
2197
2680
  colorlist = cmap2hex(cmap)
2198
- if "st" in by.lower() or "be" in by.lower():
2681
+ if "st" in by.lower() or "be" in by.lower():
2199
2682
  # cycle it
2200
2683
  clist = cycle2list(colorlist, n=n)
2201
2684
  if "l" in by.lower() or "p" in by.lower():
@@ -2208,6 +2691,7 @@ def get_color(n=1, cmap="auto", by="start"):
2208
2691
  return clist # a color list
2209
2692
  # example usage: clist = get_color(4,cmap="auto", by="start") # get_color(4, cmap="hot", by="linspace")
2210
2693
 
2694
+
2211
2695
  """
2212
2696
  # n = 7
2213
2697
  # clist = get_color(n, cmap="auto", how="linspace") # get_color(100)
@@ -2222,7 +2706,18 @@ def get_color(n=1, cmap="auto", by="start"):
2222
2706
 
2223
2707
 
2224
2708
  class FileInfo:
2225
- def __init__(self, size, creation_time, ctime, mod_time, mtime, parent_dir, fname, kind, extra_info=None):
2709
+ def __init__(
2710
+ self,
2711
+ size,
2712
+ creation_time,
2713
+ ctime,
2714
+ mod_time,
2715
+ mtime,
2716
+ parent_dir,
2717
+ fname,
2718
+ kind,
2719
+ extra_info=None,
2720
+ ):
2226
2721
  self.size = size
2227
2722
  self.creation_time = creation_time
2228
2723
  self.ctime = ctime
@@ -2237,20 +2732,25 @@ class FileInfo:
2237
2732
  print("to show the res: 'finfo(fpath).show()'")
2238
2733
 
2239
2734
  def __repr__(self):
2240
- return (f"FileInfo(size={self.size} MB, creation_time='{self.creation_time}', "
2241
- f"ctime='{self.ctime}', mod_time='{self.mod_time}', mtime='{self.mtime}', "
2242
- f"parent_dir='{self.parent_dir}', fname='{self.fname}', kind='{self.kind}')")
2735
+ return (
2736
+ f"FileInfo(size={self.size} MB, creation_time='{self.creation_time}', "
2737
+ f"ctime='{self.ctime}', mod_time='{self.mod_time}', mtime='{self.mtime}', "
2738
+ f"parent_dir='{self.parent_dir}', fname='{self.fname}', kind='{self.kind}')"
2739
+ )
2243
2740
 
2244
2741
  def __str__(self):
2245
- return (f"FileInfo:\n"
2246
- f" Size: {self.size} MB\n"
2247
- f" Creation Time: {self.creation_time}\n"
2248
- f" CTime: {self.ctime}\n"
2249
- f" Modification Time: {self.mod_time}\n"
2250
- f" MTime: {self.mtime}\n"
2251
- f" Parent Directory: {self.parent_dir}\n"
2252
- f" File Name: {self.fname}\n"
2253
- f" Kind: {self.kind}")
2742
+ return (
2743
+ f"FileInfo:\n"
2744
+ f" Size: {self.size} MB\n"
2745
+ f" Creation Time: {self.creation_time}\n"
2746
+ f" CTime: {self.ctime}\n"
2747
+ f" Modification Time: {self.mod_time}\n"
2748
+ f" MTime: {self.mtime}\n"
2749
+ f" Parent Directory: {self.parent_dir}\n"
2750
+ f" File Name: {self.fname}\n"
2751
+ f" Kind: {self.kind}"
2752
+ )
2753
+
2254
2754
  def show(self):
2255
2755
  # Convert the object to a dictionary
2256
2756
  return {
@@ -2262,12 +2762,27 @@ class FileInfo:
2262
2762
  "parent_dir": self.parent_dir,
2263
2763
  "fname": self.fname,
2264
2764
  "kind": self.kind,
2265
- **{key: getattr(self, key) for key in vars(self) if key not in ["size", "creation_time", "ctime", "mod_time", "mtime", "parent_dir", "fname", "kind"]}
2765
+ **{
2766
+ key: getattr(self, key)
2767
+ for key in vars(self)
2768
+ if key
2769
+ not in [
2770
+ "size",
2771
+ "creation_time",
2772
+ "ctime",
2773
+ "mod_time",
2774
+ "mtime",
2775
+ "parent_dir",
2776
+ "fname",
2777
+ "kind",
2778
+ ]
2779
+ },
2266
2780
  }
2267
2781
 
2782
+
2268
2783
  def finfo(fpath):
2269
2784
  fname, fmt = os.path.splitext(fpath)
2270
- dir_par = os.path.dirname(fpath) + '/'
2785
+ dir_par = os.path.dirname(fpath) + "/"
2271
2786
  data = {
2272
2787
  "size": round(os.path.getsize(fpath) / 1024 / 1024, 3),
2273
2788
  "creation_time": time.ctime(os.path.getctime(fpath)),
@@ -2276,12 +2791,12 @@ def finfo(fpath):
2276
2791
  "mtime": time.ctime(os.path.getmtime(fpath)),
2277
2792
  "parent_dir": dir_par,
2278
2793
  "fname": fname.replace(dir_par, ""),
2279
- "kind": fmt
2794
+ "kind": fmt,
2280
2795
  }
2281
2796
  extra_info = {}
2282
2797
  if data["kind"] == ".pdf":
2283
2798
  extra_info = pdfinfo_from_path(fpath)
2284
-
2799
+
2285
2800
  return FileInfo(
2286
2801
  size=data["size"],
2287
2802
  creation_time=data["creation_time"],
@@ -2291,5 +2806,5 @@ def finfo(fpath):
2291
2806
  parent_dir=data["parent_dir"],
2292
2807
  fname=data["fname"],
2293
2808
  kind=data["kind"],
2294
- extra_info=extra_info
2295
- )
2809
+ extra_info=extra_info,
2810
+ )