py2ls 0.1.4.6__py3-none-any.whl → 0.1.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py CHANGED
@@ -39,6 +39,382 @@ import mimetypes
39
39
  from pprint import pp
40
40
  from dateutil import parser
41
41
  from datetime import datetime
42
+ from collections import Counter
43
+ from fuzzywuzzy import fuzz,process
44
+ from py2ls import netfinder
45
+ from langdetect import detect
46
+ import shutil
47
+ from duckduckgo_search import DDGS
48
+
49
+
50
+ dir_save='/Users/macjianfeng/Dropbox/Downloads/'
51
+
52
+ def rm_folder(folder_path, verbose=True):
53
+ try:
54
+ shutil.rmtree(folder_path)
55
+ if verbose:
56
+ print(f'Successfully deleted {folder_path}')
57
+ except Exception as e:
58
+ if verbose:
59
+ print(f'Failed to delete {folder_path}. Reason: {e}')
60
+
61
+
62
+ def search(query, limit=5, kind='text', output='df',verbose=False,download=True, dir_save=dir_save):
63
+ from duckduckgo_search import DDGS
64
+ if 'te' in kind.lower():
65
+ results = DDGS().text(query, max_results=limit)
66
+ res=pd.DataFrame(results)
67
+ res.rename(columns={"href":"links"},inplace=True)
68
+ if verbose:
69
+ print(f'searching "{query}": got the results below\n{res}')
70
+ if download:
71
+ try:
72
+ netfinder.downloader(url=res.links.tolist(), dir_save=dir_save, verbose=verbose)
73
+ except:
74
+ if verbose:
75
+ print(f"failed link")
76
+ return res
77
+
78
+ def echo(*args,**kwargs):
79
+ """
80
+ query, model="gpt", verbose=True, log=True, dir_save=dir_save
81
+ a ai chat tool
82
+ Args:
83
+ query (str): _description_
84
+ model (str, optional): _description_. Defaults to "gpt".
85
+ verbose (bool, optional): _description_. Defaults to True.
86
+ log (bool, optional): _description_. Defaults to True.
87
+ dir_save (str, path, optional): _description_. Defaults to dir_save.
88
+
89
+ Returns:
90
+ str: the answer from ai
91
+ """
92
+ global dir_save
93
+
94
+ query=None
95
+ model=kwargs.get('model', 'gpt')
96
+ verbose=kwargs.get('verbose', True)
97
+ log=kwargs.get('log', True)
98
+ dir_save=kwargs.get('dir_save', dir_save)
99
+ for arg in args:
100
+ if isinstance(arg, str):
101
+ if os.path.isdir(arg):
102
+ dir_save = arg
103
+ # elif os.path.isfile(arg):
104
+ # dir_save = dirname(arg)
105
+ elif len(arg) <= 5:
106
+ model = arg
107
+ else:
108
+ query = arg
109
+ elif isinstance(arg, dict):
110
+ verbose = arg.get("verbose", verbose)
111
+ log = arg.get("log", log)
112
+ def is_in_any(str_candi_short, str_full, ignore_case=True):
113
+ if isinstance(str_candi_short, str):
114
+ str_candi_short=[str_candi_short]
115
+ res_bool=[]
116
+ if ignore_case:
117
+ [res_bool.append(i in str_full.lower()) for i in str_candi_short ]
118
+ else:
119
+ [res_bool.append(i in str_full) for i in str_candi_short ]
120
+ return any(res_bool)
121
+ def valid_mod_name(str_fly):
122
+ if is_in_any(str_fly, "claude-3-haiku"):
123
+ return "claude-3-haiku"
124
+ elif is_in_any(str_fly, "gpt-3.5"):
125
+ return "gpt-3.5"
126
+ elif is_in_any(str_fly, "llama-3-70b"):
127
+ return "llama-3-70b"
128
+ elif is_in_any(str_fly, "mixtral-8x7b"):
129
+ return "mixtral-8x7b"
130
+ else:
131
+ print(f"not support your model{model}, supported models: 'claude','gpt(default)', 'llama','mixtral'")
132
+ return "gpt-3.5" # default model
133
+ model_valid = valid_mod_name(model)
134
+ res=DDGS().chat(query, model=model_valid)
135
+ if verbose:
136
+ pp(res)
137
+ if log:
138
+ dt_str=datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S')
139
+ res_ = f"\n\n####Q:{query}\n\n#####Ans:{dt_str}\n\n>{res}\n"
140
+ if bool(os.path.basename(dir_save)):
141
+ fpath = dir_save
142
+ else:
143
+ os.makedirs(dir_save, exist_ok=True)
144
+ fpath = os.path.join(dir_save, f"log_ai.md")
145
+ fupdate(fpath=fpath,content=res_)
146
+ print(f"log file:{fpath}")
147
+ return res
148
+
149
+ def chat(*args, **kwargs):
150
+ if len(args) == 1 and isinstance(args[0], str):
151
+ kwargs['query'] = args[0]
152
+ return echo(**kwargs)
153
+
154
+ def ai(*args, **kwargs):
155
+ if len(args) == 1 and isinstance(args[0], str):
156
+ kwargs['query'] = args[0]
157
+ return echo(**kwargs)
158
+
159
+ def detect_lang(text, output='lang',verbose=True):
160
+ lang_code_iso639={'Abkhazian': 'ab',
161
+ 'Afar': 'aa',
162
+ 'Afrikaans': 'af',
163
+ 'Akan': 'ak',
164
+ 'Albanian': 'sq',
165
+ 'Amharic': 'am',
166
+ 'Arabic': 'ar',
167
+ 'Armenian': 'hy',
168
+ 'Assamese': 'as',
169
+ # 'Avaric': 'av',
170
+ 'Aymara': 'ay',
171
+ 'Azerbaijani': 'az',
172
+ 'Bashkir': 'ba',
173
+ 'Basque': 'eu',
174
+ 'Belarusian': 'be',
175
+ 'Bislama': 'bi',
176
+ 'Breton': 'br',
177
+ 'Burmese': 'my',
178
+ 'Catalan, Valencian': 'ca',
179
+ 'Chamorro': 'ch',
180
+ 'Chichewa, Chewa, Nyanja': 'ny',
181
+ 'Chinese': 'zh',
182
+ 'Corsican': 'co',
183
+ 'Cree': 'cr',
184
+ 'Croatian': 'hr',
185
+ 'Danish': 'da',
186
+ 'Dutch, Flemish': 'nl',
187
+ 'Dzongkha': 'dz',
188
+ 'English': 'en',
189
+ 'Finnish': 'fi',
190
+ 'French': 'fr',
191
+ 'Galician': 'gl',
192
+ 'Georgian': 'ka',
193
+ 'German': 'de',
194
+ 'Greek, Modern (1453–)': 'el',
195
+ 'Gujarati': 'gu',
196
+ 'Hausa': 'ha',
197
+ 'Hebrew': 'he',
198
+ 'Hindi': 'hi',
199
+ 'Hungarian': 'hu',
200
+ 'Icelandic': 'is',
201
+ 'Italian': 'it',
202
+ 'Kikuyu, Gikuyu': 'ki',
203
+ 'Korean': 'ko',
204
+ 'Kurdish': 'ku',
205
+ 'Latin': 'la',
206
+ 'Limburgan, Limburger, Limburgish': 'li',
207
+ 'Luba-Katanga': 'lu',
208
+ 'Macedonian': 'mk',
209
+ 'Malay': 'ms',
210
+ 'Nauru': 'na',
211
+ 'North Ndebele': 'nd',
212
+ 'Nepali': 'ne',
213
+ 'Norwegian': 'no',
214
+ 'Norwegian Nynorsk': 'nn',
215
+ 'Sichuan Yi, Nuosu': 'ii',
216
+ 'Occitan': 'oc',
217
+ 'Ojibwa': 'oj',
218
+ 'Oriya': 'or',
219
+ 'Ossetian, Ossetic': 'os',
220
+ 'Persian': 'fa',
221
+ 'Punjabi, Panjabi': 'pa',
222
+ 'Quechua': 'qu',
223
+ 'Romanian, Moldavian, Moldovan': 'ro',
224
+ 'Russian': 'ru',
225
+ 'Samoan': 'sm',
226
+ 'Sanskrit': 'sa',
227
+ 'Serbian': 'sr',
228
+ 'Shona': 'sn',
229
+ 'Sinhala, Sinhalese': 'si',
230
+ 'Slovenian': 'sl',
231
+ 'Somali': 'so',
232
+ 'Sundanese': 'su',
233
+ 'Swahili': 'sw',
234
+ 'Swati': 'ss',
235
+ 'Tajik': 'tg',
236
+ 'Tamil': 'ta',
237
+ 'Telugu': 'te',
238
+ 'Thai': 'th',
239
+ 'Tibetan': 'bo',
240
+ 'Tigrinya': 'ti',
241
+ 'Tonga (Tonga Islands)': 'to',
242
+ 'Tsonga': 'ts',
243
+ 'Twi': 'tw',
244
+ 'Ukrainian': 'uk',
245
+ 'Urdu': 'ur',
246
+ 'Uzbek': 'uz',
247
+ 'Venda': 've',
248
+ 'Vietnamese': 'vi',
249
+ 'Volapük': 'vo',
250
+ 'Welsh': 'cy',
251
+ 'Wolof': 'wo',
252
+ 'Xhosa': 'xh',
253
+ 'Yiddish': 'yi',
254
+ 'Yoruba': 'yo',
255
+ 'Zulu': 'zu'}
256
+ l_lang,l_code = [],[]
257
+ [[l_lang.append(v),l_code.append(k)] for v,k in lang_code_iso639.items()]
258
+ try:
259
+ if is_text(text):
260
+ code_detect=detect(text)
261
+ if 'c' in output.lower(): # return code
262
+ return l_code[strcmp(code_detect,l_code, verbose=verbose)[1]]
263
+ else:
264
+ return l_lang[strcmp(code_detect,l_code, verbose=verbose)[1]]
265
+ else:
266
+ print(f"{text} is not supported")
267
+ return 'no'
268
+ except:
269
+ return 'no'
270
+
271
+ def is_text(s):
272
+ has_alpha = any(char.isalpha() for char in s)
273
+ has_non_alpha = any(not char.isalpha() for char in s)
274
+ # no_special = not re.search(r'[^A-Za-z0-9\s]', s)
275
+ return has_alpha and has_non_alpha
276
+
277
+ def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR'):
278
+ """
279
+ Compares a search term with a list of candidate strings and finds the best match based on similarity score.
280
+
281
+ Parameters:
282
+ search_term (str): The term to be searched for.
283
+ candidates (list of str): A list of candidate strings to compare against the search term.
284
+ ignore_case (bool): If True, the comparison ignores case differences.
285
+ verbose (bool): If True, prints the similarity score and the best match.
286
+
287
+ Returns:
288
+ tuple: A tuple containing the best match and its index in the candidates list.
289
+ """
290
+ def to_lower(s, ignore_case=True):
291
+ #Converts a string or list of strings to lowercase if ignore_case is True.
292
+ if ignore_case:
293
+ if isinstance(s, str):
294
+ return s.lower()
295
+ elif isinstance(s, list):
296
+ return [elem.lower() for elem in s]
297
+ return s
298
+ str1_,str2_ = to_lower(search_term, ignore_case),to_lower(candidates, ignore_case)
299
+ if isinstance(str2_, list):
300
+ if 'part' in scorer.lower():
301
+ similarity_scores = [fuzz.partial_ratio(str1_, word) for word in str2_]
302
+ elif 'W' in scorer.lower():
303
+ similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
304
+ elif 'Ratio' in scorer.lower():
305
+ similarity_scores = [fuzz.Ratio(str1_, word) for word in str2_]
306
+ else:
307
+ similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
308
+ best_match_index = similarity_scores.index(max(similarity_scores))
309
+ best_match_score = similarity_scores[best_match_index]
310
+ else:
311
+ best_match_index = 0
312
+ if 'part' in scorer.lower():
313
+ best_match_score = fuzz.partial_ratio(str1_, str2_)
314
+ elif 'W' in scorer.lower():
315
+ best_match_score = fuzz.WRatio(str1_, str2_)
316
+ elif 'Ratio' in scorer.lower():
317
+ best_match_score = fuzz.Ratio(str1_, str2_)
318
+ else:
319
+ best_match_score = fuzz.WRatio(str1_, str2_)
320
+ if verbose:
321
+ print(f"\nbest_match is: {candidates[best_match_index],best_match_score}")
322
+ best_match = process.extract(search_term, candidates)
323
+ print(f"建议: {best_match}")
324
+ return candidates[best_match_index], best_match_index
325
+
326
+ # Example usaged
327
+ # str1 = "plos biology"
328
+ # str2 = ['PLoS Computational Biology', 'PLOS BIOLOGY']
329
+ # best_match, idx = strcmp(str1, str2, ignore_case=1)
330
+
331
+ def counter(list_, verbose=True):
332
+ c = Counter(list_)
333
+ # Print the name counts
334
+ for item, count in c.items():
335
+ if verbose:
336
+ print(f"{item}: {count}")
337
+ return c
338
+ # usage:
339
+ # print(f"Return an iterator over elements repeating each as many times as its count:\n{sorted(c.elements())}")
340
+ # print(f"Return a list of the n most common elements:\n{c.most_common()}")
341
+ # print(f"Compute the sum of the counts:\n{c.total()}")
342
+
343
+ def is_num(s):
344
+ """
345
+ Check if a string can be converted to a number (int or float).
346
+ Parameters:
347
+ - s (str): The string to check.
348
+ Returns:
349
+ - bool: True if the string can be converted to a number, False otherwise.
350
+ """
351
+ try:
352
+ float(s) # Try converting the string to a float
353
+ return True
354
+ except ValueError:
355
+ return False
356
+ def isnum(s):
357
+ return is_num(s)
358
+
359
+ def str2time(time_str, fmt='24'):
360
+ """
361
+ Convert a time string into the specified format.
362
+ Parameters:
363
+ - time_str (str): The time string to be converted.
364
+ - fmt (str): The format to convert the time to. Defaults to '%H:%M:%S'.
365
+ Returns:
366
+ %I represents the hour in 12-hour format.
367
+ %H represents the hour in 24-hour format (00 through 23).
368
+ %M represents the minute.
369
+ %S represents the second.
370
+ %p represents AM or PM.
371
+ - str: The converted time string.
372
+ """
373
+ def time_len_corr(time_str):
374
+ time_str_= ssplit(time_str,by=[':'," ","digital_num"]) if ':' in time_str else None
375
+ time_str_split=[]
376
+ [time_str_split.append(i) for i in time_str_ if is_num(i)]
377
+ if time_str_split:
378
+ if len(time_str_split)==2:
379
+ H,M=time_str_split
380
+ time_str_full=H+":"+M+":00"
381
+ elif len(time_str_split)==3:
382
+ H,M,S=time_str_split
383
+ time_str_full=H+":"+M+":"+S
384
+ else:
385
+ time_str_full=time_str_
386
+ if 'am' in time_str.lower():
387
+ time_str_full+=" AM"
388
+ elif "pm"in time_str.lower():
389
+ time_str_full +=" PM"
390
+ return time_str_full
391
+ if '12' in fmt:
392
+ fmt = "%I:%M:%S %p"
393
+ elif '24' in fmt:
394
+ fmt = "%H:%M:%S"
395
+
396
+ try:
397
+ # Try to parse the time string assuming it could be in 24-hour or 12-hour format
398
+ time_obj = datetime.strptime(time_len_corr(time_str), '%H:%M:%S')
399
+ except ValueError:
400
+ try:
401
+ time_obj = datetime.strptime(time_len_corr(time_str), '%I:%M:%S %p')
402
+ except ValueError as e:
403
+ raise ValueError(f"Unable to parse time string: {time_str}. Error: {e}")
404
+
405
+ # Format the time object to the desired output format
406
+ formatted_time = time_obj.strftime(fmt)
407
+ return formatted_time
408
+
409
+ # # Example usage:
410
+ # time_str1 = "14:30:45"
411
+ # time_str2 = "02:30:45 PM"
412
+
413
+ # formatted_time1 = str2time(time_str1, fmt='12') # Convert to 12-hour format
414
+ # formatted_time2 = str2time(time_str2, fmt='24') # Convert to 24-hour format
415
+
416
+ # print(formatted_time1) # Output: 02:30:45 PM
417
+ # print(formatted_time2) # Output: 14:30:45
42
418
 
43
419
  def str2date(date_str, fmt='%Y-%m-%d_%H:%M:%S'):
44
420
  """
@@ -153,18 +529,33 @@ def num2str(num, *args):
153
529
  # print(num2str(7000.125, 2),type(num2str(7000.125, 2))) # Output: "7000.13"
154
530
  # print(num2str(12345.6789, ","),type(num2str(12345.6789, ","))) # Output: "12,345.6789"
155
531
  # print(num2str(7000.00, ","),type(num2str(7000.00, ","))) # Output: "7,000.00"
156
- def sreplace(text, dict_replace=None, robust=True):
532
+ def sreplace(*args,**kwargs):
157
533
  """
534
+ sreplace(text, by=None, robust=True)
158
535
  Replace specified substrings in the input text with provided replacements.
159
536
  Args:
160
537
  text (str): The input text where replacements will be made.
161
- dict_replace (dict, optional): A dictionary containing substrings to be replaced as keys
538
+ by (dict, optional): A dictionary containing substrings to be replaced as keys
162
539
  and their corresponding replacements as values. Defaults to {".com": "..come", "\n": " ", "\t": " ", " ": " "}.
163
540
  robust (bool, optional): If True, additional default replacements for newline and tab characters will be applied.
164
541
  Default is False.
165
542
  Returns:
166
543
  str: The text after replacements have been made.
167
544
  """
545
+ text = None
546
+ by = kwargs.get('by', None)
547
+ robust = kwargs.get('robust', True)
548
+
549
+ for arg in args:
550
+ if isinstance(arg,str):
551
+ text=arg
552
+ elif isinstance(arg,dict):
553
+ by=arg
554
+ elif isinstance(arg,bool):
555
+ robust=arg
556
+ else:
557
+ Error(f"{type(arg)} is not supported")
558
+
168
559
  # Default replacements for newline and tab characters
169
560
  default_replacements = {
170
561
  "\a": "",
@@ -185,19 +576,18 @@ def sreplace(text, dict_replace=None, robust=True):
185
576
  }
186
577
 
187
578
  # If dict_replace is None, use the default dictionary
188
- if dict_replace is None:
189
- dict_replace = {}
190
-
579
+ if by is None:
580
+ by = {}
191
581
  # If robust is True, update the dictionary with default replacements
192
582
  if robust:
193
- dict_replace.update(default_replacements)
583
+ by.update(default_replacements)
194
584
 
195
585
  # Iterate over each key-value pair in the dictionary and replace substrings accordingly
196
- for k, v in dict_replace.items():
586
+ for k, v in by.items():
197
587
  text = text.replace(k, v)
198
588
  return text
199
589
  # usage:
200
- # sreplace(text, dict_replace=dict(old_str='new_str'), robust=True)
590
+ # sreplace(text, by=dict(old_str='new_str'), robust=True)
201
591
 
202
592
  def paper_size(paper_type_str='a4'):
203
593
  df=pd.DataFrame({'a0':[841,1189],'a1':[594,841],'a2':[420,594],'a3':[297,420],'a4':[210,297],'a5':[148,210],'a6':[105,148],'a7':[74,105],
@@ -339,10 +729,10 @@ def ssplit(text, by="space", verbose=False, **kws):
339
729
  if verbose:
340
730
  print(f"split_by_word_length(text, length)")
341
731
  return split_by_word_length(text, **kws) # split_by_word_length(text, length)
342
- elif "," in by:
343
- if verbose:
344
- print(f"splited by ','")
345
- return text.split(",")
732
+ # elif "," in by:
733
+ # if verbose:
734
+ # print(f"splited by ','")
735
+ # return text.split(",")
346
736
  elif isinstance(by, list):
347
737
  if verbose:
348
738
  print(f"split_by_multiple_delimiters: ['|','&']")
@@ -600,6 +990,31 @@ def fload(fpath, kind=None, **kwargs):
600
990
  # xlsx_content = fload('sample.xlsx')
601
991
  # docx_content = fload('sample.docx')
602
992
 
993
+ def fupdate(fpath, content=None):
994
+ """
995
+ Update a file by adding new content at the top and moving the old content to the bottom.
996
+ Parameters
997
+ ----------
998
+ fpath : str
999
+ The file path where the content should be updated.
1000
+ content : str, optional
1001
+ The new content to add at the top of the file. If not provided, the function will not add any new content.
1002
+ Notes
1003
+ -----
1004
+ - If the file at `fpath` does not exist, it will be created.
1005
+ - The new content will be added at the top, followed by the old content of the file.
1006
+ """
1007
+ content = content or ""
1008
+ if os.path.exists(fpath):
1009
+ with open(fpath, 'r') as file:
1010
+ old_content = file.read()
1011
+ else:
1012
+ old_content = ''
1013
+
1014
+ with open(fpath, 'w') as file:
1015
+ file.write(content)
1016
+ file.write(old_content)
1017
+
603
1018
  def fsave(
604
1019
  fpath,
605
1020
  content,
@@ -607,6 +1022,7 @@ def fsave(
607
1022
  font_name="Times",
608
1023
  font_size=10,
609
1024
  spacing=6,
1025
+ mode='w',
610
1026
  **kwargs,
611
1027
  ):
612
1028
  """
@@ -622,8 +1038,8 @@ def fsave(
622
1038
  Returns:
623
1039
  None
624
1040
  """
625
- def save_content(fpath, content):
626
- with open(fpath, "w", encoding='utf-8') as file:
1041
+ def save_content(fpath, content, mode=mode):
1042
+ with open(fpath, mode, encoding='utf-8') as file:
627
1043
  file.write(content)
628
1044
 
629
1045
 
@@ -642,19 +1058,19 @@ def fsave(
642
1058
  doc.save(fpath)
643
1059
 
644
1060
 
645
- def save_txt_md(fpath, content, sep="\n"):
1061
+ def save_txt_md(fpath, content, sep="\n",mode='w'):
646
1062
  # Ensure content is a single string
647
1063
  if isinstance(content, list):
648
1064
  content = sep.join(content)
649
- save_content(fpath, sep.join(content))
1065
+ save_content(fpath, sep.join(content),mode)
650
1066
 
651
1067
 
652
- def save_html(fpath, content, font_name, font_size):
1068
+ def save_html(fpath, content, font_name, font_size,mode='w'):
653
1069
  html_content = "<html><body>"
654
1070
  for paragraph_text in content:
655
1071
  html_content += f'<p style="font-family:{font_name}; font-size:{font_size}px;">{paragraph_text}</p>'
656
1072
  html_content += "</body></html>"
657
- save_content(fpath, html_content)
1073
+ save_content(fpath, html_content,mode)
658
1074
 
659
1075
 
660
1076
  def save_pdf(fpath, content, font_name, font_size):
@@ -737,16 +1153,16 @@ def fsave(
737
1153
  "xml",
738
1154
  "yaml",
739
1155
  ]:
740
- raise ValueError(
741
- f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
1156
+ print(
1157
+ f"Warning:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
742
1158
  )
743
1159
 
744
1160
  if kind == "docx" or kind=="doc":
745
1161
  save_docx(fpath, content, font_name, font_size, spacing)
746
1162
  elif kind == "txt":
747
- save_txt_md(fpath, content, sep="")
1163
+ save_txt_md(fpath, content, sep="",mode=mode)
748
1164
  elif kind == "md":
749
- save_txt_md(fpath, content, sep="")
1165
+ save_txt_md(fpath, content, sep="",mode=mode)
750
1166
  elif kind == "html":
751
1167
  save_html(fpath, content, font_name, font_size)
752
1168
  elif kind == "pdf":
@@ -766,9 +1182,12 @@ def fsave(
766
1182
  elif kind == "yaml":
767
1183
  save_yaml(fpath, content, **kwargs) # Assuming content is a serializable object
768
1184
  else:
769
- raise ValueError(
770
- f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
771
- )
1185
+ try:
1186
+ netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
1187
+ except:
1188
+ print(
1189
+ f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
1190
+ )
772
1191
 
773
1192
 
774
1193
  # # Example usage
@@ -792,15 +1211,116 @@ def fsave(
792
1211
  def addpath(fpath):
793
1212
  sys.path.insert(0,dir)
794
1213
  def dirname(fpath):
1214
+ """
1215
+ dirname: Extracting Directory Name from a File Path
1216
+ Args:
1217
+ fpath (str): the file or directory path
1218
+ Returns:
1219
+ str: directory, without filename
1220
+ """
795
1221
  dirname_=os.path.dirname(fpath)
796
1222
  if not dirname_.endswith('/'):
797
1223
  dirname_=dirname_+"/"
798
1224
  return dirname_
799
- def dir_name(fpath):
1225
+
1226
+ def dir_name(fpath): # same as "dirname"
800
1227
  return dirname(fpath)
801
1228
  def basename(fpath):
1229
+ """
1230
+ basename: # Output: file.txt
1231
+ Args:
1232
+ fpath (str): the file or directory path
1233
+ Returns:
1234
+ str: # Output: file.txt
1235
+ """
802
1236
  return os.path.basename(fpath)
1237
+ def flist(fpath, contains="all"):
1238
+ all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
1239
+ if isinstance(contains, list):
1240
+ filt_files = []
1241
+ for filter_ in contains:
1242
+ filt_files.extend(flist(fpath, filter_))
1243
+ return filt_files
1244
+ else:
1245
+ if 'all' in contains.lower():
1246
+ return all_files
1247
+ else:
1248
+ filt_files = [f for f in all_files if isa(f, contains)]
1249
+ return filt_files
1250
+ def sort_kind(df, by="name", ascending=True):
1251
+ if df[by].dtype == 'object': # Check if the column contains string values
1252
+ if ascending:
1253
+ sorted_index = df[by].str.lower().argsort()
1254
+ else:
1255
+ sorted_index = df[by].str.lower().argsort()[::-1]
1256
+ else:
1257
+ if ascending:
1258
+ sorted_index = df[by].argsort()
1259
+ else:
1260
+ sorted_index = df[by].argsort()[::-1]
1261
+ sorted_df = df.iloc[sorted_index].reset_index(drop=True)
1262
+ return sorted_df
803
1263
 
1264
+ def isa(*args,**kwargs):
1265
+ """
1266
+ fpath, contains='img'
1267
+ containss file paths based on the specified contains.
1268
+ Args:
1269
+ fpath (str): Path to the file.
1270
+ contains (str): contains of file to contains. Default is 'img' for images. Other options include 'doc' for documents,
1271
+ 'zip' for ZIP archives, and 'other' for other types of files.
1272
+ Returns:
1273
+ bool: True if the file matches the contains, False otherwise.
1274
+ """
1275
+ for arg in args:
1276
+ if isinstance(arg, str):
1277
+ if '/' in arg or '\\' in arg:
1278
+ fpath = arg
1279
+ else:
1280
+ contains=arg
1281
+ if 'img' in contains.lower() or 'image' in contains.lower():
1282
+ return is_image(fpath)
1283
+ elif 'doc' in contains.lower():
1284
+ return is_document(fpath)
1285
+ elif 'zip' in contains.lower():
1286
+ return is_zip(fpath)
1287
+ elif 'dir' in contains.lower() or ('f' in contains.lower() and 'd' in contains.lower()):
1288
+ return bool(('/' in fpath) or ('\\' in fpath))
1289
+ elif 'fi' in contains.lower():#file
1290
+ return os.path.isfile(fpath)
1291
+ else:
1292
+ print(f"{contains} was not set up correctly")
1293
+ return False
1294
+
1295
+ def is_image(fpath):
1296
+ mime_type, _ = mimetypes.guess_type(fpath)
1297
+ if mime_type and mime_type.startswith('image'):
1298
+ return True
1299
+ else:
1300
+ return False
1301
+
1302
+ def is_document(fpath):
1303
+ mime_type, _ = mimetypes.guess_type(fpath)
1304
+ if mime_type and (
1305
+ mime_type.startswith('text/') or
1306
+ mime_type == 'application/pdf' or
1307
+ mime_type == 'application/msword' or
1308
+ mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
1309
+ mime_type == 'application/vnd.ms-excel' or
1310
+ mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
1311
+ mime_type == 'application/vnd.ms-powerpoint' or
1312
+ mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
1313
+ ):
1314
+ return True
1315
+ else:
1316
+ return False
1317
+
1318
+ def is_zip(fpath):
1319
+ mime_type, _ = mimetypes.guess_type(fpath)
1320
+ if mime_type == 'application/zip':
1321
+ return True
1322
+ else:
1323
+ return False
804
1324
  def listdir(
805
1325
  rootdir,
806
1326
  kind="folder",
@@ -810,83 +1330,6 @@ def listdir(
810
1330
  orient="list",
811
1331
  output="df"
812
1332
  ):
813
- def sort_kind(df, by="name", ascending=True):
814
- if df[by].dtype == 'object': # Check if the column contains string values
815
- if ascending:
816
- sorted_index = df[by].str.lower().argsort()
817
- else:
818
- sorted_index = df[by].str.lower().argsort()[::-1]
819
- else:
820
- if ascending:
821
- sorted_index = df[by].argsort()
822
- else:
823
- sorted_index = df[by].argsort()[::-1]
824
- sorted_df = df.iloc[sorted_index].reset_index(drop=True)
825
- return sorted_df
826
-
827
- def flist(fpath, filter="all"):
828
- all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
829
- if isinstance(filter, list):
830
- filt_files = []
831
- for filter_ in filter:
832
- filt_files.extend(flist(fpath, filter_))
833
- return filt_files
834
- else:
835
- if 'all' in filter.lower():
836
- return all_files
837
- else:
838
- filt_files = [f for f in all_files if istype(f, filter)]
839
- return filt_files
840
-
841
- def istype(fpath, filter='img'):
842
- """
843
- Filters file paths based on the specified filter.
844
- Args:
845
- fpath (str): Path to the file.
846
- filter (str): Filter of file to filter. Default is 'img' for images. Other options include 'doc' for documents,
847
- 'zip' for ZIP archives, and 'other' for other types of files.
848
- Returns:
849
- bool: True if the file matches the filter, False otherwise.
850
- """
851
- if 'img' in filter.lower():
852
- return is_image(fpath)
853
- elif 'doc' in filter.lower():
854
- return is_document(fpath)
855
- elif 'zip' in filter.lower():
856
- return is_zip(fpath)
857
- else:
858
- return False
859
-
860
- def is_image(fpath):
861
- mime_type, _ = mimetypes.guess_type(fpath)
862
- if mime_type and mime_type.startswith('image'):
863
- return True
864
- else:
865
- return False
866
-
867
- def is_document(fpath):
868
- mime_type, _ = mimetypes.guess_type(fpath)
869
- if mime_type and (
870
- mime_type.startswith('text/') or
871
- mime_type == 'application/pdf' or
872
- mime_type == 'application/msword' or
873
- mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
874
- mime_type == 'application/vnd.ms-excel' or
875
- mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
876
- mime_type == 'application/vnd.ms-powerpoint' or
877
- mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
878
- ):
879
- return True
880
- else:
881
- return False
882
-
883
- def is_zip(fpath):
884
- mime_type, _ = mimetypes.guess_type(fpath)
885
- if mime_type == 'application/zip':
886
- return True
887
- else:
888
- return False
889
-
890
1333
  if not kind.startswith("."):
891
1334
  kind = "." + kind
892
1335
 
@@ -915,8 +1358,10 @@ def listdir(
915
1358
  os.path.isfile(item_path)
916
1359
  )
917
1360
  if kind in ['.doc','.img','.zip']: #选择大的类别
918
- if kind != ".folder" and not istype(item_path, kind):
1361
+ if kind != ".folder" and not isa(item_path, kind):
919
1362
  continue
1363
+ elif kind in ['.all']:
1364
+ return flist(fpath, contains=contains)
920
1365
  else: #精确到文件的后缀
921
1366
  if not is_folder and not is_file:
922
1367
  continue
@@ -989,26 +1434,45 @@ def func_list(lib_name, opt="call"):
989
1434
  funcs = dir(lib_name)
990
1435
  return funcs
991
1436
 
992
- def newfolder(pardir, chdir):
993
- import os
1437
+ def newfolder(*args, **kwargs):
1438
+ """
1439
+ newfolder(pardir, chdir)
1440
+
1441
+ Args:
1442
+ pardir (dir): parent dir
1443
+ chdir (str): children dir
1444
+ overwrite (bool): overwrite?
1445
+
1446
+ Returns:
1447
+ mkdir, giving a option if exists_ok or not
1448
+ """
1449
+ overwrite=kwargs.get("overwrite",False)
1450
+ for arg in args:
1451
+ if isinstance(arg, str):
1452
+ if "/" in arg or "\\" in arg:
1453
+ pardir=arg
1454
+ print(f'pardir{pardir}')
1455
+ else:
1456
+ chdir = arg
1457
+ print(f'chdir{chdir}')
1458
+ elif isinstance(arg,bool):
1459
+ overwrite=arg
1460
+ print(overwrite)
1461
+ else:
1462
+ print(f"{arg}Error: not support a {type(arg)} type")
994
1463
  rootdir = []
995
1464
  # Convert string to list
996
1465
  if isinstance(chdir, str):
997
1466
  chdir = [chdir]
998
-
999
1467
  # Subfoldername should be unique
1000
1468
  chdir = list(set(chdir))
1001
-
1002
1469
  if isinstance(pardir, str): # Dir_parents should be 'str' type
1003
1470
  pardir = os.path.normpath(pardir)
1004
-
1005
1471
  # Get the slash type: "/" or "\"
1006
1472
  stype = '/' if '/' in pardir else '\\'
1007
-
1008
1473
  # Check if the parent directory exists and is a directory path
1009
1474
  if os.path.isdir(pardir):
1010
1475
  os.chdir(pardir) # Set current path
1011
-
1012
1476
  # Check if subdirectories are not empty
1013
1477
  if chdir:
1014
1478
  chdir.sort()
@@ -1020,22 +1484,21 @@ def newfolder(pardir, chdir):
1020
1484
  os.mkdir('./' + folder)
1021
1485
  print(f'\n {folder} was created successfully!\n')
1022
1486
  else:
1023
- print(f'\n {folder} already exists! \n')
1024
-
1487
+ if overwrite:
1488
+ shutil.rmtree(child_tmp)
1489
+ os.mkdir('./' + folder)
1490
+ print(f'\n {folder} overwrite! \n')
1491
+ else:
1492
+ print(f'\n {folder} already exists! \n')
1025
1493
  rootdir.append(child_tmp + stype) # Note down
1026
-
1027
1494
  else:
1028
1495
  print('\nWarning: Dir_child doesn\'t exist\n')
1029
-
1030
1496
  else:
1031
1497
  print('\nWarning: Dir_parent is not a directory path\n')
1032
-
1033
1498
  # Dir is the main output, if only one dir, then str type is inconvenient
1034
1499
  if len(rootdir) == 1:
1035
1500
  rootdir = rootdir[0]
1036
-
1037
1501
  return rootdir
1038
-
1039
1502
 
1040
1503
  def figsave(*args,dpi=300):
1041
1504
  DirSave = None