py2ls 0.1.4.6__py3-none-any.whl → 0.1.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/config +1 -0
- py2ls/ips.py +581 -118
- py2ls/netfinder.py +452 -128
- py2ls/translator.py +172 -121
- {py2ls-0.1.4.6.dist-info → py2ls-0.1.4.8.dist-info}/METADATA +1 -1
- {py2ls-0.1.4.6.dist-info → py2ls-0.1.4.8.dist-info}/RECORD +7 -7
- {py2ls-0.1.4.6.dist-info → py2ls-0.1.4.8.dist-info}/WHEEL +1 -1
py2ls/ips.py
CHANGED
@@ -39,6 +39,382 @@ import mimetypes
|
|
39
39
|
from pprint import pp
|
40
40
|
from dateutil import parser
|
41
41
|
from datetime import datetime
|
42
|
+
from collections import Counter
|
43
|
+
from fuzzywuzzy import fuzz,process
|
44
|
+
from py2ls import netfinder
|
45
|
+
from langdetect import detect
|
46
|
+
import shutil
|
47
|
+
from duckduckgo_search import DDGS
|
48
|
+
|
49
|
+
|
50
|
+
dir_save='/Users/macjianfeng/Dropbox/Downloads/'
|
51
|
+
|
52
|
+
def rm_folder(folder_path, verbose=True):
|
53
|
+
try:
|
54
|
+
shutil.rmtree(folder_path)
|
55
|
+
if verbose:
|
56
|
+
print(f'Successfully deleted {folder_path}')
|
57
|
+
except Exception as e:
|
58
|
+
if verbose:
|
59
|
+
print(f'Failed to delete {folder_path}. Reason: {e}')
|
60
|
+
|
61
|
+
|
62
|
+
def search(query, limit=5, kind='text', output='df',verbose=False,download=True, dir_save=dir_save):
|
63
|
+
from duckduckgo_search import DDGS
|
64
|
+
if 'te' in kind.lower():
|
65
|
+
results = DDGS().text(query, max_results=limit)
|
66
|
+
res=pd.DataFrame(results)
|
67
|
+
res.rename(columns={"href":"links"},inplace=True)
|
68
|
+
if verbose:
|
69
|
+
print(f'searching "{query}": got the results below\n{res}')
|
70
|
+
if download:
|
71
|
+
try:
|
72
|
+
netfinder.downloader(url=res.links.tolist(), dir_save=dir_save, verbose=verbose)
|
73
|
+
except:
|
74
|
+
if verbose:
|
75
|
+
print(f"failed link")
|
76
|
+
return res
|
77
|
+
|
78
|
+
def echo(*args,**kwargs):
|
79
|
+
"""
|
80
|
+
query, model="gpt", verbose=True, log=True, dir_save=dir_save
|
81
|
+
a ai chat tool
|
82
|
+
Args:
|
83
|
+
query (str): _description_
|
84
|
+
model (str, optional): _description_. Defaults to "gpt".
|
85
|
+
verbose (bool, optional): _description_. Defaults to True.
|
86
|
+
log (bool, optional): _description_. Defaults to True.
|
87
|
+
dir_save (str, path, optional): _description_. Defaults to dir_save.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
str: the answer from ai
|
91
|
+
"""
|
92
|
+
global dir_save
|
93
|
+
|
94
|
+
query=None
|
95
|
+
model=kwargs.get('model', 'gpt')
|
96
|
+
verbose=kwargs.get('verbose', True)
|
97
|
+
log=kwargs.get('log', True)
|
98
|
+
dir_save=kwargs.get('dir_save', dir_save)
|
99
|
+
for arg in args:
|
100
|
+
if isinstance(arg, str):
|
101
|
+
if os.path.isdir(arg):
|
102
|
+
dir_save = arg
|
103
|
+
# elif os.path.isfile(arg):
|
104
|
+
# dir_save = dirname(arg)
|
105
|
+
elif len(arg) <= 5:
|
106
|
+
model = arg
|
107
|
+
else:
|
108
|
+
query = arg
|
109
|
+
elif isinstance(arg, dict):
|
110
|
+
verbose = arg.get("verbose", verbose)
|
111
|
+
log = arg.get("log", log)
|
112
|
+
def is_in_any(str_candi_short, str_full, ignore_case=True):
|
113
|
+
if isinstance(str_candi_short, str):
|
114
|
+
str_candi_short=[str_candi_short]
|
115
|
+
res_bool=[]
|
116
|
+
if ignore_case:
|
117
|
+
[res_bool.append(i in str_full.lower()) for i in str_candi_short ]
|
118
|
+
else:
|
119
|
+
[res_bool.append(i in str_full) for i in str_candi_short ]
|
120
|
+
return any(res_bool)
|
121
|
+
def valid_mod_name(str_fly):
|
122
|
+
if is_in_any(str_fly, "claude-3-haiku"):
|
123
|
+
return "claude-3-haiku"
|
124
|
+
elif is_in_any(str_fly, "gpt-3.5"):
|
125
|
+
return "gpt-3.5"
|
126
|
+
elif is_in_any(str_fly, "llama-3-70b"):
|
127
|
+
return "llama-3-70b"
|
128
|
+
elif is_in_any(str_fly, "mixtral-8x7b"):
|
129
|
+
return "mixtral-8x7b"
|
130
|
+
else:
|
131
|
+
print(f"not support your model{model}, supported models: 'claude','gpt(default)', 'llama','mixtral'")
|
132
|
+
return "gpt-3.5" # default model
|
133
|
+
model_valid = valid_mod_name(model)
|
134
|
+
res=DDGS().chat(query, model=model_valid)
|
135
|
+
if verbose:
|
136
|
+
pp(res)
|
137
|
+
if log:
|
138
|
+
dt_str=datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S')
|
139
|
+
res_ = f"\n\n####Q:{query}\n\n#####Ans:{dt_str}\n\n>{res}\n"
|
140
|
+
if bool(os.path.basename(dir_save)):
|
141
|
+
fpath = dir_save
|
142
|
+
else:
|
143
|
+
os.makedirs(dir_save, exist_ok=True)
|
144
|
+
fpath = os.path.join(dir_save, f"log_ai.md")
|
145
|
+
fupdate(fpath=fpath,content=res_)
|
146
|
+
print(f"log file:{fpath}")
|
147
|
+
return res
|
148
|
+
|
149
|
+
def chat(*args, **kwargs):
|
150
|
+
if len(args) == 1 and isinstance(args[0], str):
|
151
|
+
kwargs['query'] = args[0]
|
152
|
+
return echo(**kwargs)
|
153
|
+
|
154
|
+
def ai(*args, **kwargs):
|
155
|
+
if len(args) == 1 and isinstance(args[0], str):
|
156
|
+
kwargs['query'] = args[0]
|
157
|
+
return echo(**kwargs)
|
158
|
+
|
159
|
+
def detect_lang(text, output='lang',verbose=True):
|
160
|
+
lang_code_iso639={'Abkhazian': 'ab',
|
161
|
+
'Afar': 'aa',
|
162
|
+
'Afrikaans': 'af',
|
163
|
+
'Akan': 'ak',
|
164
|
+
'Albanian': 'sq',
|
165
|
+
'Amharic': 'am',
|
166
|
+
'Arabic': 'ar',
|
167
|
+
'Armenian': 'hy',
|
168
|
+
'Assamese': 'as',
|
169
|
+
# 'Avaric': 'av',
|
170
|
+
'Aymara': 'ay',
|
171
|
+
'Azerbaijani': 'az',
|
172
|
+
'Bashkir': 'ba',
|
173
|
+
'Basque': 'eu',
|
174
|
+
'Belarusian': 'be',
|
175
|
+
'Bislama': 'bi',
|
176
|
+
'Breton': 'br',
|
177
|
+
'Burmese': 'my',
|
178
|
+
'Catalan, Valencian': 'ca',
|
179
|
+
'Chamorro': 'ch',
|
180
|
+
'Chichewa, Chewa, Nyanja': 'ny',
|
181
|
+
'Chinese': 'zh',
|
182
|
+
'Corsican': 'co',
|
183
|
+
'Cree': 'cr',
|
184
|
+
'Croatian': 'hr',
|
185
|
+
'Danish': 'da',
|
186
|
+
'Dutch, Flemish': 'nl',
|
187
|
+
'Dzongkha': 'dz',
|
188
|
+
'English': 'en',
|
189
|
+
'Finnish': 'fi',
|
190
|
+
'French': 'fr',
|
191
|
+
'Galician': 'gl',
|
192
|
+
'Georgian': 'ka',
|
193
|
+
'German': 'de',
|
194
|
+
'Greek, Modern (1453–)': 'el',
|
195
|
+
'Gujarati': 'gu',
|
196
|
+
'Hausa': 'ha',
|
197
|
+
'Hebrew': 'he',
|
198
|
+
'Hindi': 'hi',
|
199
|
+
'Hungarian': 'hu',
|
200
|
+
'Icelandic': 'is',
|
201
|
+
'Italian': 'it',
|
202
|
+
'Kikuyu, Gikuyu': 'ki',
|
203
|
+
'Korean': 'ko',
|
204
|
+
'Kurdish': 'ku',
|
205
|
+
'Latin': 'la',
|
206
|
+
'Limburgan, Limburger, Limburgish': 'li',
|
207
|
+
'Luba-Katanga': 'lu',
|
208
|
+
'Macedonian': 'mk',
|
209
|
+
'Malay': 'ms',
|
210
|
+
'Nauru': 'na',
|
211
|
+
'North Ndebele': 'nd',
|
212
|
+
'Nepali': 'ne',
|
213
|
+
'Norwegian': 'no',
|
214
|
+
'Norwegian Nynorsk': 'nn',
|
215
|
+
'Sichuan Yi, Nuosu': 'ii',
|
216
|
+
'Occitan': 'oc',
|
217
|
+
'Ojibwa': 'oj',
|
218
|
+
'Oriya': 'or',
|
219
|
+
'Ossetian, Ossetic': 'os',
|
220
|
+
'Persian': 'fa',
|
221
|
+
'Punjabi, Panjabi': 'pa',
|
222
|
+
'Quechua': 'qu',
|
223
|
+
'Romanian, Moldavian, Moldovan': 'ro',
|
224
|
+
'Russian': 'ru',
|
225
|
+
'Samoan': 'sm',
|
226
|
+
'Sanskrit': 'sa',
|
227
|
+
'Serbian': 'sr',
|
228
|
+
'Shona': 'sn',
|
229
|
+
'Sinhala, Sinhalese': 'si',
|
230
|
+
'Slovenian': 'sl',
|
231
|
+
'Somali': 'so',
|
232
|
+
'Sundanese': 'su',
|
233
|
+
'Swahili': 'sw',
|
234
|
+
'Swati': 'ss',
|
235
|
+
'Tajik': 'tg',
|
236
|
+
'Tamil': 'ta',
|
237
|
+
'Telugu': 'te',
|
238
|
+
'Thai': 'th',
|
239
|
+
'Tibetan': 'bo',
|
240
|
+
'Tigrinya': 'ti',
|
241
|
+
'Tonga (Tonga Islands)': 'to',
|
242
|
+
'Tsonga': 'ts',
|
243
|
+
'Twi': 'tw',
|
244
|
+
'Ukrainian': 'uk',
|
245
|
+
'Urdu': 'ur',
|
246
|
+
'Uzbek': 'uz',
|
247
|
+
'Venda': 've',
|
248
|
+
'Vietnamese': 'vi',
|
249
|
+
'Volapük': 'vo',
|
250
|
+
'Welsh': 'cy',
|
251
|
+
'Wolof': 'wo',
|
252
|
+
'Xhosa': 'xh',
|
253
|
+
'Yiddish': 'yi',
|
254
|
+
'Yoruba': 'yo',
|
255
|
+
'Zulu': 'zu'}
|
256
|
+
l_lang,l_code = [],[]
|
257
|
+
[[l_lang.append(v),l_code.append(k)] for v,k in lang_code_iso639.items()]
|
258
|
+
try:
|
259
|
+
if is_text(text):
|
260
|
+
code_detect=detect(text)
|
261
|
+
if 'c' in output.lower(): # return code
|
262
|
+
return l_code[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
263
|
+
else:
|
264
|
+
return l_lang[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
265
|
+
else:
|
266
|
+
print(f"{text} is not supported")
|
267
|
+
return 'no'
|
268
|
+
except:
|
269
|
+
return 'no'
|
270
|
+
|
271
|
+
def is_text(s):
|
272
|
+
has_alpha = any(char.isalpha() for char in s)
|
273
|
+
has_non_alpha = any(not char.isalpha() for char in s)
|
274
|
+
# no_special = not re.search(r'[^A-Za-z0-9\s]', s)
|
275
|
+
return has_alpha and has_non_alpha
|
276
|
+
|
277
|
+
def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR'):
|
278
|
+
"""
|
279
|
+
Compares a search term with a list of candidate strings and finds the best match based on similarity score.
|
280
|
+
|
281
|
+
Parameters:
|
282
|
+
search_term (str): The term to be searched for.
|
283
|
+
candidates (list of str): A list of candidate strings to compare against the search term.
|
284
|
+
ignore_case (bool): If True, the comparison ignores case differences.
|
285
|
+
verbose (bool): If True, prints the similarity score and the best match.
|
286
|
+
|
287
|
+
Returns:
|
288
|
+
tuple: A tuple containing the best match and its index in the candidates list.
|
289
|
+
"""
|
290
|
+
def to_lower(s, ignore_case=True):
|
291
|
+
#Converts a string or list of strings to lowercase if ignore_case is True.
|
292
|
+
if ignore_case:
|
293
|
+
if isinstance(s, str):
|
294
|
+
return s.lower()
|
295
|
+
elif isinstance(s, list):
|
296
|
+
return [elem.lower() for elem in s]
|
297
|
+
return s
|
298
|
+
str1_,str2_ = to_lower(search_term, ignore_case),to_lower(candidates, ignore_case)
|
299
|
+
if isinstance(str2_, list):
|
300
|
+
if 'part' in scorer.lower():
|
301
|
+
similarity_scores = [fuzz.partial_ratio(str1_, word) for word in str2_]
|
302
|
+
elif 'W' in scorer.lower():
|
303
|
+
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
304
|
+
elif 'Ratio' in scorer.lower():
|
305
|
+
similarity_scores = [fuzz.Ratio(str1_, word) for word in str2_]
|
306
|
+
else:
|
307
|
+
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
308
|
+
best_match_index = similarity_scores.index(max(similarity_scores))
|
309
|
+
best_match_score = similarity_scores[best_match_index]
|
310
|
+
else:
|
311
|
+
best_match_index = 0
|
312
|
+
if 'part' in scorer.lower():
|
313
|
+
best_match_score = fuzz.partial_ratio(str1_, str2_)
|
314
|
+
elif 'W' in scorer.lower():
|
315
|
+
best_match_score = fuzz.WRatio(str1_, str2_)
|
316
|
+
elif 'Ratio' in scorer.lower():
|
317
|
+
best_match_score = fuzz.Ratio(str1_, str2_)
|
318
|
+
else:
|
319
|
+
best_match_score = fuzz.WRatio(str1_, str2_)
|
320
|
+
if verbose:
|
321
|
+
print(f"\nbest_match is: {candidates[best_match_index],best_match_score}")
|
322
|
+
best_match = process.extract(search_term, candidates)
|
323
|
+
print(f"建议: {best_match}")
|
324
|
+
return candidates[best_match_index], best_match_index
|
325
|
+
|
326
|
+
# Example usaged
|
327
|
+
# str1 = "plos biology"
|
328
|
+
# str2 = ['PLoS Computational Biology', 'PLOS BIOLOGY']
|
329
|
+
# best_match, idx = strcmp(str1, str2, ignore_case=1)
|
330
|
+
|
331
|
+
def counter(list_, verbose=True):
|
332
|
+
c = Counter(list_)
|
333
|
+
# Print the name counts
|
334
|
+
for item, count in c.items():
|
335
|
+
if verbose:
|
336
|
+
print(f"{item}: {count}")
|
337
|
+
return c
|
338
|
+
# usage:
|
339
|
+
# print(f"Return an iterator over elements repeating each as many times as its count:\n{sorted(c.elements())}")
|
340
|
+
# print(f"Return a list of the n most common elements:\n{c.most_common()}")
|
341
|
+
# print(f"Compute the sum of the counts:\n{c.total()}")
|
342
|
+
|
343
|
+
def is_num(s):
|
344
|
+
"""
|
345
|
+
Check if a string can be converted to a number (int or float).
|
346
|
+
Parameters:
|
347
|
+
- s (str): The string to check.
|
348
|
+
Returns:
|
349
|
+
- bool: True if the string can be converted to a number, False otherwise.
|
350
|
+
"""
|
351
|
+
try:
|
352
|
+
float(s) # Try converting the string to a float
|
353
|
+
return True
|
354
|
+
except ValueError:
|
355
|
+
return False
|
356
|
+
def isnum(s):
|
357
|
+
return is_num(s)
|
358
|
+
|
359
|
+
def str2time(time_str, fmt='24'):
|
360
|
+
"""
|
361
|
+
Convert a time string into the specified format.
|
362
|
+
Parameters:
|
363
|
+
- time_str (str): The time string to be converted.
|
364
|
+
- fmt (str): The format to convert the time to. Defaults to '%H:%M:%S'.
|
365
|
+
Returns:
|
366
|
+
%I represents the hour in 12-hour format.
|
367
|
+
%H represents the hour in 24-hour format (00 through 23).
|
368
|
+
%M represents the minute.
|
369
|
+
%S represents the second.
|
370
|
+
%p represents AM or PM.
|
371
|
+
- str: The converted time string.
|
372
|
+
"""
|
373
|
+
def time_len_corr(time_str):
|
374
|
+
time_str_= ssplit(time_str,by=[':'," ","digital_num"]) if ':' in time_str else None
|
375
|
+
time_str_split=[]
|
376
|
+
[time_str_split.append(i) for i in time_str_ if is_num(i)]
|
377
|
+
if time_str_split:
|
378
|
+
if len(time_str_split)==2:
|
379
|
+
H,M=time_str_split
|
380
|
+
time_str_full=H+":"+M+":00"
|
381
|
+
elif len(time_str_split)==3:
|
382
|
+
H,M,S=time_str_split
|
383
|
+
time_str_full=H+":"+M+":"+S
|
384
|
+
else:
|
385
|
+
time_str_full=time_str_
|
386
|
+
if 'am' in time_str.lower():
|
387
|
+
time_str_full+=" AM"
|
388
|
+
elif "pm"in time_str.lower():
|
389
|
+
time_str_full +=" PM"
|
390
|
+
return time_str_full
|
391
|
+
if '12' in fmt:
|
392
|
+
fmt = "%I:%M:%S %p"
|
393
|
+
elif '24' in fmt:
|
394
|
+
fmt = "%H:%M:%S"
|
395
|
+
|
396
|
+
try:
|
397
|
+
# Try to parse the time string assuming it could be in 24-hour or 12-hour format
|
398
|
+
time_obj = datetime.strptime(time_len_corr(time_str), '%H:%M:%S')
|
399
|
+
except ValueError:
|
400
|
+
try:
|
401
|
+
time_obj = datetime.strptime(time_len_corr(time_str), '%I:%M:%S %p')
|
402
|
+
except ValueError as e:
|
403
|
+
raise ValueError(f"Unable to parse time string: {time_str}. Error: {e}")
|
404
|
+
|
405
|
+
# Format the time object to the desired output format
|
406
|
+
formatted_time = time_obj.strftime(fmt)
|
407
|
+
return formatted_time
|
408
|
+
|
409
|
+
# # Example usage:
|
410
|
+
# time_str1 = "14:30:45"
|
411
|
+
# time_str2 = "02:30:45 PM"
|
412
|
+
|
413
|
+
# formatted_time1 = str2time(time_str1, fmt='12') # Convert to 12-hour format
|
414
|
+
# formatted_time2 = str2time(time_str2, fmt='24') # Convert to 24-hour format
|
415
|
+
|
416
|
+
# print(formatted_time1) # Output: 02:30:45 PM
|
417
|
+
# print(formatted_time2) # Output: 14:30:45
|
42
418
|
|
43
419
|
def str2date(date_str, fmt='%Y-%m-%d_%H:%M:%S'):
|
44
420
|
"""
|
@@ -153,18 +529,33 @@ def num2str(num, *args):
|
|
153
529
|
# print(num2str(7000.125, 2),type(num2str(7000.125, 2))) # Output: "7000.13"
|
154
530
|
# print(num2str(12345.6789, ","),type(num2str(12345.6789, ","))) # Output: "12,345.6789"
|
155
531
|
# print(num2str(7000.00, ","),type(num2str(7000.00, ","))) # Output: "7,000.00"
|
156
|
-
def sreplace(
|
532
|
+
def sreplace(*args,**kwargs):
|
157
533
|
"""
|
534
|
+
sreplace(text, by=None, robust=True)
|
158
535
|
Replace specified substrings in the input text with provided replacements.
|
159
536
|
Args:
|
160
537
|
text (str): The input text where replacements will be made.
|
161
|
-
|
538
|
+
by (dict, optional): A dictionary containing substrings to be replaced as keys
|
162
539
|
and their corresponding replacements as values. Defaults to {".com": "..come", "\n": " ", "\t": " ", " ": " "}.
|
163
540
|
robust (bool, optional): If True, additional default replacements for newline and tab characters will be applied.
|
164
541
|
Default is False.
|
165
542
|
Returns:
|
166
543
|
str: The text after replacements have been made.
|
167
544
|
"""
|
545
|
+
text = None
|
546
|
+
by = kwargs.get('by', None)
|
547
|
+
robust = kwargs.get('robust', True)
|
548
|
+
|
549
|
+
for arg in args:
|
550
|
+
if isinstance(arg,str):
|
551
|
+
text=arg
|
552
|
+
elif isinstance(arg,dict):
|
553
|
+
by=arg
|
554
|
+
elif isinstance(arg,bool):
|
555
|
+
robust=arg
|
556
|
+
else:
|
557
|
+
Error(f"{type(arg)} is not supported")
|
558
|
+
|
168
559
|
# Default replacements for newline and tab characters
|
169
560
|
default_replacements = {
|
170
561
|
"\a": "",
|
@@ -185,19 +576,18 @@ def sreplace(text, dict_replace=None, robust=True):
|
|
185
576
|
}
|
186
577
|
|
187
578
|
# If dict_replace is None, use the default dictionary
|
188
|
-
if
|
189
|
-
|
190
|
-
|
579
|
+
if by is None:
|
580
|
+
by = {}
|
191
581
|
# If robust is True, update the dictionary with default replacements
|
192
582
|
if robust:
|
193
|
-
|
583
|
+
by.update(default_replacements)
|
194
584
|
|
195
585
|
# Iterate over each key-value pair in the dictionary and replace substrings accordingly
|
196
|
-
for k, v in
|
586
|
+
for k, v in by.items():
|
197
587
|
text = text.replace(k, v)
|
198
588
|
return text
|
199
589
|
# usage:
|
200
|
-
# sreplace(text,
|
590
|
+
# sreplace(text, by=dict(old_str='new_str'), robust=True)
|
201
591
|
|
202
592
|
def paper_size(paper_type_str='a4'):
|
203
593
|
df=pd.DataFrame({'a0':[841,1189],'a1':[594,841],'a2':[420,594],'a3':[297,420],'a4':[210,297],'a5':[148,210],'a6':[105,148],'a7':[74,105],
|
@@ -339,10 +729,10 @@ def ssplit(text, by="space", verbose=False, **kws):
|
|
339
729
|
if verbose:
|
340
730
|
print(f"split_by_word_length(text, length)")
|
341
731
|
return split_by_word_length(text, **kws) # split_by_word_length(text, length)
|
342
|
-
elif "," in by:
|
343
|
-
|
344
|
-
|
345
|
-
|
732
|
+
# elif "," in by:
|
733
|
+
# if verbose:
|
734
|
+
# print(f"splited by ','")
|
735
|
+
# return text.split(",")
|
346
736
|
elif isinstance(by, list):
|
347
737
|
if verbose:
|
348
738
|
print(f"split_by_multiple_delimiters: ['|','&']")
|
@@ -600,6 +990,31 @@ def fload(fpath, kind=None, **kwargs):
|
|
600
990
|
# xlsx_content = fload('sample.xlsx')
|
601
991
|
# docx_content = fload('sample.docx')
|
602
992
|
|
993
|
+
def fupdate(fpath, content=None):
|
994
|
+
"""
|
995
|
+
Update a file by adding new content at the top and moving the old content to the bottom.
|
996
|
+
Parameters
|
997
|
+
----------
|
998
|
+
fpath : str
|
999
|
+
The file path where the content should be updated.
|
1000
|
+
content : str, optional
|
1001
|
+
The new content to add at the top of the file. If not provided, the function will not add any new content.
|
1002
|
+
Notes
|
1003
|
+
-----
|
1004
|
+
- If the file at `fpath` does not exist, it will be created.
|
1005
|
+
- The new content will be added at the top, followed by the old content of the file.
|
1006
|
+
"""
|
1007
|
+
content = content or ""
|
1008
|
+
if os.path.exists(fpath):
|
1009
|
+
with open(fpath, 'r') as file:
|
1010
|
+
old_content = file.read()
|
1011
|
+
else:
|
1012
|
+
old_content = ''
|
1013
|
+
|
1014
|
+
with open(fpath, 'w') as file:
|
1015
|
+
file.write(content)
|
1016
|
+
file.write(old_content)
|
1017
|
+
|
603
1018
|
def fsave(
|
604
1019
|
fpath,
|
605
1020
|
content,
|
@@ -607,6 +1022,7 @@ def fsave(
|
|
607
1022
|
font_name="Times",
|
608
1023
|
font_size=10,
|
609
1024
|
spacing=6,
|
1025
|
+
mode='w',
|
610
1026
|
**kwargs,
|
611
1027
|
):
|
612
1028
|
"""
|
@@ -622,8 +1038,8 @@ def fsave(
|
|
622
1038
|
Returns:
|
623
1039
|
None
|
624
1040
|
"""
|
625
|
-
def save_content(fpath, content):
|
626
|
-
with open(fpath,
|
1041
|
+
def save_content(fpath, content, mode=mode):
|
1042
|
+
with open(fpath, mode, encoding='utf-8') as file:
|
627
1043
|
file.write(content)
|
628
1044
|
|
629
1045
|
|
@@ -642,19 +1058,19 @@ def fsave(
|
|
642
1058
|
doc.save(fpath)
|
643
1059
|
|
644
1060
|
|
645
|
-
def save_txt_md(fpath, content, sep="\n"):
|
1061
|
+
def save_txt_md(fpath, content, sep="\n",mode='w'):
|
646
1062
|
# Ensure content is a single string
|
647
1063
|
if isinstance(content, list):
|
648
1064
|
content = sep.join(content)
|
649
|
-
save_content(fpath, sep.join(content))
|
1065
|
+
save_content(fpath, sep.join(content),mode)
|
650
1066
|
|
651
1067
|
|
652
|
-
def save_html(fpath, content, font_name, font_size):
|
1068
|
+
def save_html(fpath, content, font_name, font_size,mode='w'):
|
653
1069
|
html_content = "<html><body>"
|
654
1070
|
for paragraph_text in content:
|
655
1071
|
html_content += f'<p style="font-family:{font_name}; font-size:{font_size}px;">{paragraph_text}</p>'
|
656
1072
|
html_content += "</body></html>"
|
657
|
-
save_content(fpath, html_content)
|
1073
|
+
save_content(fpath, html_content,mode)
|
658
1074
|
|
659
1075
|
|
660
1076
|
def save_pdf(fpath, content, font_name, font_size):
|
@@ -737,16 +1153,16 @@ def fsave(
|
|
737
1153
|
"xml",
|
738
1154
|
"yaml",
|
739
1155
|
]:
|
740
|
-
|
741
|
-
f"
|
1156
|
+
print(
|
1157
|
+
f"Warning:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
742
1158
|
)
|
743
1159
|
|
744
1160
|
if kind == "docx" or kind=="doc":
|
745
1161
|
save_docx(fpath, content, font_name, font_size, spacing)
|
746
1162
|
elif kind == "txt":
|
747
|
-
save_txt_md(fpath, content, sep="")
|
1163
|
+
save_txt_md(fpath, content, sep="",mode=mode)
|
748
1164
|
elif kind == "md":
|
749
|
-
save_txt_md(fpath, content, sep="")
|
1165
|
+
save_txt_md(fpath, content, sep="",mode=mode)
|
750
1166
|
elif kind == "html":
|
751
1167
|
save_html(fpath, content, font_name, font_size)
|
752
1168
|
elif kind == "pdf":
|
@@ -766,9 +1182,12 @@ def fsave(
|
|
766
1182
|
elif kind == "yaml":
|
767
1183
|
save_yaml(fpath, content, **kwargs) # Assuming content is a serializable object
|
768
1184
|
else:
|
769
|
-
|
770
|
-
|
771
|
-
|
1185
|
+
try:
|
1186
|
+
netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
|
1187
|
+
except:
|
1188
|
+
print(
|
1189
|
+
f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
1190
|
+
)
|
772
1191
|
|
773
1192
|
|
774
1193
|
# # Example usage
|
@@ -792,15 +1211,116 @@ def fsave(
|
|
792
1211
|
def addpath(fpath):
|
793
1212
|
sys.path.insert(0,dir)
|
794
1213
|
def dirname(fpath):
|
1214
|
+
"""
|
1215
|
+
dirname: Extracting Directory Name from a File Path
|
1216
|
+
Args:
|
1217
|
+
fpath (str): the file or directory path
|
1218
|
+
Returns:
|
1219
|
+
str: directory, without filename
|
1220
|
+
"""
|
795
1221
|
dirname_=os.path.dirname(fpath)
|
796
1222
|
if not dirname_.endswith('/'):
|
797
1223
|
dirname_=dirname_+"/"
|
798
1224
|
return dirname_
|
799
|
-
|
1225
|
+
|
1226
|
+
def dir_name(fpath): # same as "dirname"
|
800
1227
|
return dirname(fpath)
|
801
1228
|
def basename(fpath):
|
1229
|
+
"""
|
1230
|
+
basename: # Output: file.txt
|
1231
|
+
Args:
|
1232
|
+
fpath (str): the file or directory path
|
1233
|
+
Returns:
|
1234
|
+
str: # Output: file.txt
|
1235
|
+
"""
|
802
1236
|
return os.path.basename(fpath)
|
1237
|
+
def flist(fpath, contains="all"):
|
1238
|
+
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
1239
|
+
if isinstance(contains, list):
|
1240
|
+
filt_files = []
|
1241
|
+
for filter_ in contains:
|
1242
|
+
filt_files.extend(flist(fpath, filter_))
|
1243
|
+
return filt_files
|
1244
|
+
else:
|
1245
|
+
if 'all' in contains.lower():
|
1246
|
+
return all_files
|
1247
|
+
else:
|
1248
|
+
filt_files = [f for f in all_files if isa(f, contains)]
|
1249
|
+
return filt_files
|
1250
|
+
def sort_kind(df, by="name", ascending=True):
|
1251
|
+
if df[by].dtype == 'object': # Check if the column contains string values
|
1252
|
+
if ascending:
|
1253
|
+
sorted_index = df[by].str.lower().argsort()
|
1254
|
+
else:
|
1255
|
+
sorted_index = df[by].str.lower().argsort()[::-1]
|
1256
|
+
else:
|
1257
|
+
if ascending:
|
1258
|
+
sorted_index = df[by].argsort()
|
1259
|
+
else:
|
1260
|
+
sorted_index = df[by].argsort()[::-1]
|
1261
|
+
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
1262
|
+
return sorted_df
|
803
1263
|
|
1264
|
+
def isa(*args,**kwargs):
|
1265
|
+
"""
|
1266
|
+
fpath, contains='img'
|
1267
|
+
containss file paths based on the specified contains.
|
1268
|
+
Args:
|
1269
|
+
fpath (str): Path to the file.
|
1270
|
+
contains (str): contains of file to contains. Default is 'img' for images. Other options include 'doc' for documents,
|
1271
|
+
'zip' for ZIP archives, and 'other' for other types of files.
|
1272
|
+
Returns:
|
1273
|
+
bool: True if the file matches the contains, False otherwise.
|
1274
|
+
"""
|
1275
|
+
for arg in args:
|
1276
|
+
if isinstance(arg, str):
|
1277
|
+
if '/' in arg or '\\' in arg:
|
1278
|
+
fpath = arg
|
1279
|
+
else:
|
1280
|
+
contains=arg
|
1281
|
+
if 'img' in contains.lower() or 'image' in contains.lower():
|
1282
|
+
return is_image(fpath)
|
1283
|
+
elif 'doc' in contains.lower():
|
1284
|
+
return is_document(fpath)
|
1285
|
+
elif 'zip' in contains.lower():
|
1286
|
+
return is_zip(fpath)
|
1287
|
+
elif 'dir' in contains.lower() or ('f' in contains.lower() and 'd' in contains.lower()):
|
1288
|
+
return bool(('/' in fpath) or ('\\' in fpath))
|
1289
|
+
elif 'fi' in contains.lower():#file
|
1290
|
+
return os.path.isfile(fpath)
|
1291
|
+
else:
|
1292
|
+
print(f"{contains} was not set up correctly")
|
1293
|
+
return False
|
1294
|
+
|
1295
|
+
def is_image(fpath):
|
1296
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1297
|
+
if mime_type and mime_type.startswith('image'):
|
1298
|
+
return True
|
1299
|
+
else:
|
1300
|
+
return False
|
1301
|
+
|
1302
|
+
def is_document(fpath):
|
1303
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1304
|
+
if mime_type and (
|
1305
|
+
mime_type.startswith('text/') or
|
1306
|
+
mime_type == 'application/pdf' or
|
1307
|
+
mime_type == 'application/msword' or
|
1308
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
1309
|
+
mime_type == 'application/vnd.ms-excel' or
|
1310
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
1311
|
+
mime_type == 'application/vnd.ms-powerpoint' or
|
1312
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
1313
|
+
):
|
1314
|
+
return True
|
1315
|
+
else:
|
1316
|
+
return False
|
1317
|
+
|
1318
|
+
def is_zip(fpath):
|
1319
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1320
|
+
if mime_type == 'application/zip':
|
1321
|
+
return True
|
1322
|
+
else:
|
1323
|
+
return False
|
804
1324
|
def listdir(
|
805
1325
|
rootdir,
|
806
1326
|
kind="folder",
|
@@ -810,83 +1330,6 @@ def listdir(
|
|
810
1330
|
orient="list",
|
811
1331
|
output="df"
|
812
1332
|
):
|
813
|
-
def sort_kind(df, by="name", ascending=True):
|
814
|
-
if df[by].dtype == 'object': # Check if the column contains string values
|
815
|
-
if ascending:
|
816
|
-
sorted_index = df[by].str.lower().argsort()
|
817
|
-
else:
|
818
|
-
sorted_index = df[by].str.lower().argsort()[::-1]
|
819
|
-
else:
|
820
|
-
if ascending:
|
821
|
-
sorted_index = df[by].argsort()
|
822
|
-
else:
|
823
|
-
sorted_index = df[by].argsort()[::-1]
|
824
|
-
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
825
|
-
return sorted_df
|
826
|
-
|
827
|
-
def flist(fpath, filter="all"):
|
828
|
-
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
829
|
-
if isinstance(filter, list):
|
830
|
-
filt_files = []
|
831
|
-
for filter_ in filter:
|
832
|
-
filt_files.extend(flist(fpath, filter_))
|
833
|
-
return filt_files
|
834
|
-
else:
|
835
|
-
if 'all' in filter.lower():
|
836
|
-
return all_files
|
837
|
-
else:
|
838
|
-
filt_files = [f for f in all_files if istype(f, filter)]
|
839
|
-
return filt_files
|
840
|
-
|
841
|
-
def istype(fpath, filter='img'):
|
842
|
-
"""
|
843
|
-
Filters file paths based on the specified filter.
|
844
|
-
Args:
|
845
|
-
fpath (str): Path to the file.
|
846
|
-
filter (str): Filter of file to filter. Default is 'img' for images. Other options include 'doc' for documents,
|
847
|
-
'zip' for ZIP archives, and 'other' for other types of files.
|
848
|
-
Returns:
|
849
|
-
bool: True if the file matches the filter, False otherwise.
|
850
|
-
"""
|
851
|
-
if 'img' in filter.lower():
|
852
|
-
return is_image(fpath)
|
853
|
-
elif 'doc' in filter.lower():
|
854
|
-
return is_document(fpath)
|
855
|
-
elif 'zip' in filter.lower():
|
856
|
-
return is_zip(fpath)
|
857
|
-
else:
|
858
|
-
return False
|
859
|
-
|
860
|
-
def is_image(fpath):
|
861
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
862
|
-
if mime_type and mime_type.startswith('image'):
|
863
|
-
return True
|
864
|
-
else:
|
865
|
-
return False
|
866
|
-
|
867
|
-
def is_document(fpath):
|
868
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
869
|
-
if mime_type and (
|
870
|
-
mime_type.startswith('text/') or
|
871
|
-
mime_type == 'application/pdf' or
|
872
|
-
mime_type == 'application/msword' or
|
873
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
874
|
-
mime_type == 'application/vnd.ms-excel' or
|
875
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
876
|
-
mime_type == 'application/vnd.ms-powerpoint' or
|
877
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
878
|
-
):
|
879
|
-
return True
|
880
|
-
else:
|
881
|
-
return False
|
882
|
-
|
883
|
-
def is_zip(fpath):
|
884
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
885
|
-
if mime_type == 'application/zip':
|
886
|
-
return True
|
887
|
-
else:
|
888
|
-
return False
|
889
|
-
|
890
1333
|
if not kind.startswith("."):
|
891
1334
|
kind = "." + kind
|
892
1335
|
|
@@ -915,8 +1358,10 @@ def listdir(
|
|
915
1358
|
os.path.isfile(item_path)
|
916
1359
|
)
|
917
1360
|
if kind in ['.doc','.img','.zip']: #选择大的类别
|
918
|
-
if kind != ".folder" and not
|
1361
|
+
if kind != ".folder" and not isa(item_path, kind):
|
919
1362
|
continue
|
1363
|
+
elif kind in ['.all']:
|
1364
|
+
return flist(fpath, contains=contains)
|
920
1365
|
else: #精确到文件的后缀
|
921
1366
|
if not is_folder and not is_file:
|
922
1367
|
continue
|
@@ -989,26 +1434,45 @@ def func_list(lib_name, opt="call"):
|
|
989
1434
|
funcs = dir(lib_name)
|
990
1435
|
return funcs
|
991
1436
|
|
992
|
-
def newfolder(
|
993
|
-
|
1437
|
+
def newfolder(*args, **kwargs):
|
1438
|
+
"""
|
1439
|
+
newfolder(pardir, chdir)
|
1440
|
+
|
1441
|
+
Args:
|
1442
|
+
pardir (dir): parent dir
|
1443
|
+
chdir (str): children dir
|
1444
|
+
overwrite (bool): overwrite?
|
1445
|
+
|
1446
|
+
Returns:
|
1447
|
+
mkdir, giving a option if exists_ok or not
|
1448
|
+
"""
|
1449
|
+
overwrite=kwargs.get("overwrite",False)
|
1450
|
+
for arg in args:
|
1451
|
+
if isinstance(arg, str):
|
1452
|
+
if "/" in arg or "\\" in arg:
|
1453
|
+
pardir=arg
|
1454
|
+
print(f'pardir{pardir}')
|
1455
|
+
else:
|
1456
|
+
chdir = arg
|
1457
|
+
print(f'chdir{chdir}')
|
1458
|
+
elif isinstance(arg,bool):
|
1459
|
+
overwrite=arg
|
1460
|
+
print(overwrite)
|
1461
|
+
else:
|
1462
|
+
print(f"{arg}Error: not support a {type(arg)} type")
|
994
1463
|
rootdir = []
|
995
1464
|
# Convert string to list
|
996
1465
|
if isinstance(chdir, str):
|
997
1466
|
chdir = [chdir]
|
998
|
-
|
999
1467
|
# Subfoldername should be unique
|
1000
1468
|
chdir = list(set(chdir))
|
1001
|
-
|
1002
1469
|
if isinstance(pardir, str): # Dir_parents should be 'str' type
|
1003
1470
|
pardir = os.path.normpath(pardir)
|
1004
|
-
|
1005
1471
|
# Get the slash type: "/" or "\"
|
1006
1472
|
stype = '/' if '/' in pardir else '\\'
|
1007
|
-
|
1008
1473
|
# Check if the parent directory exists and is a directory path
|
1009
1474
|
if os.path.isdir(pardir):
|
1010
1475
|
os.chdir(pardir) # Set current path
|
1011
|
-
|
1012
1476
|
# Check if subdirectories are not empty
|
1013
1477
|
if chdir:
|
1014
1478
|
chdir.sort()
|
@@ -1020,22 +1484,21 @@ def newfolder(pardir, chdir):
|
|
1020
1484
|
os.mkdir('./' + folder)
|
1021
1485
|
print(f'\n {folder} was created successfully!\n')
|
1022
1486
|
else:
|
1023
|
-
|
1024
|
-
|
1487
|
+
if overwrite:
|
1488
|
+
shutil.rmtree(child_tmp)
|
1489
|
+
os.mkdir('./' + folder)
|
1490
|
+
print(f'\n {folder} overwrite! \n')
|
1491
|
+
else:
|
1492
|
+
print(f'\n {folder} already exists! \n')
|
1025
1493
|
rootdir.append(child_tmp + stype) # Note down
|
1026
|
-
|
1027
1494
|
else:
|
1028
1495
|
print('\nWarning: Dir_child doesn\'t exist\n')
|
1029
|
-
|
1030
1496
|
else:
|
1031
1497
|
print('\nWarning: Dir_parent is not a directory path\n')
|
1032
|
-
|
1033
1498
|
# Dir is the main output, if only one dir, then str type is inconvenient
|
1034
1499
|
if len(rootdir) == 1:
|
1035
1500
|
rootdir = rootdir[0]
|
1036
|
-
|
1037
1501
|
return rootdir
|
1038
|
-
|
1039
1502
|
|
1040
1503
|
def figsave(*args,dpi=300):
|
1041
1504
|
DirSave = None
|