py2ls 0.1.4.7__py3-none-any.whl → 0.1.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/config +1 -0
- py2ls/ips.py +509 -121
- py2ls/netfinder.py +452 -128
- py2ls/translator.py +172 -121
- {py2ls-0.1.4.7.dist-info → py2ls-0.1.4.8.dist-info}/METADATA +1 -1
- {py2ls-0.1.4.7.dist-info → py2ls-0.1.4.8.dist-info}/RECORD +7 -7
- {py2ls-0.1.4.7.dist-info → py2ls-0.1.4.8.dist-info}/WHEEL +1 -1
py2ls/ips.py
CHANGED
@@ -39,6 +39,306 @@ import mimetypes
|
|
39
39
|
from pprint import pp
|
40
40
|
from dateutil import parser
|
41
41
|
from datetime import datetime
|
42
|
+
from collections import Counter
|
43
|
+
from fuzzywuzzy import fuzz,process
|
44
|
+
from py2ls import netfinder
|
45
|
+
from langdetect import detect
|
46
|
+
import shutil
|
47
|
+
from duckduckgo_search import DDGS
|
48
|
+
|
49
|
+
|
50
|
+
dir_save='/Users/macjianfeng/Dropbox/Downloads/'
|
51
|
+
|
52
|
+
def rm_folder(folder_path, verbose=True):
|
53
|
+
try:
|
54
|
+
shutil.rmtree(folder_path)
|
55
|
+
if verbose:
|
56
|
+
print(f'Successfully deleted {folder_path}')
|
57
|
+
except Exception as e:
|
58
|
+
if verbose:
|
59
|
+
print(f'Failed to delete {folder_path}. Reason: {e}')
|
60
|
+
|
61
|
+
|
62
|
+
def search(query, limit=5, kind='text', output='df',verbose=False,download=True, dir_save=dir_save):
|
63
|
+
from duckduckgo_search import DDGS
|
64
|
+
if 'te' in kind.lower():
|
65
|
+
results = DDGS().text(query, max_results=limit)
|
66
|
+
res=pd.DataFrame(results)
|
67
|
+
res.rename(columns={"href":"links"},inplace=True)
|
68
|
+
if verbose:
|
69
|
+
print(f'searching "{query}": got the results below\n{res}')
|
70
|
+
if download:
|
71
|
+
try:
|
72
|
+
netfinder.downloader(url=res.links.tolist(), dir_save=dir_save, verbose=verbose)
|
73
|
+
except:
|
74
|
+
if verbose:
|
75
|
+
print(f"failed link")
|
76
|
+
return res
|
77
|
+
|
78
|
+
def echo(*args,**kwargs):
|
79
|
+
"""
|
80
|
+
query, model="gpt", verbose=True, log=True, dir_save=dir_save
|
81
|
+
a ai chat tool
|
82
|
+
Args:
|
83
|
+
query (str): _description_
|
84
|
+
model (str, optional): _description_. Defaults to "gpt".
|
85
|
+
verbose (bool, optional): _description_. Defaults to True.
|
86
|
+
log (bool, optional): _description_. Defaults to True.
|
87
|
+
dir_save (str, path, optional): _description_. Defaults to dir_save.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
str: the answer from ai
|
91
|
+
"""
|
92
|
+
global dir_save
|
93
|
+
|
94
|
+
query=None
|
95
|
+
model=kwargs.get('model', 'gpt')
|
96
|
+
verbose=kwargs.get('verbose', True)
|
97
|
+
log=kwargs.get('log', True)
|
98
|
+
dir_save=kwargs.get('dir_save', dir_save)
|
99
|
+
for arg in args:
|
100
|
+
if isinstance(arg, str):
|
101
|
+
if os.path.isdir(arg):
|
102
|
+
dir_save = arg
|
103
|
+
# elif os.path.isfile(arg):
|
104
|
+
# dir_save = dirname(arg)
|
105
|
+
elif len(arg) <= 5:
|
106
|
+
model = arg
|
107
|
+
else:
|
108
|
+
query = arg
|
109
|
+
elif isinstance(arg, dict):
|
110
|
+
verbose = arg.get("verbose", verbose)
|
111
|
+
log = arg.get("log", log)
|
112
|
+
def is_in_any(str_candi_short, str_full, ignore_case=True):
|
113
|
+
if isinstance(str_candi_short, str):
|
114
|
+
str_candi_short=[str_candi_short]
|
115
|
+
res_bool=[]
|
116
|
+
if ignore_case:
|
117
|
+
[res_bool.append(i in str_full.lower()) for i in str_candi_short ]
|
118
|
+
else:
|
119
|
+
[res_bool.append(i in str_full) for i in str_candi_short ]
|
120
|
+
return any(res_bool)
|
121
|
+
def valid_mod_name(str_fly):
|
122
|
+
if is_in_any(str_fly, "claude-3-haiku"):
|
123
|
+
return "claude-3-haiku"
|
124
|
+
elif is_in_any(str_fly, "gpt-3.5"):
|
125
|
+
return "gpt-3.5"
|
126
|
+
elif is_in_any(str_fly, "llama-3-70b"):
|
127
|
+
return "llama-3-70b"
|
128
|
+
elif is_in_any(str_fly, "mixtral-8x7b"):
|
129
|
+
return "mixtral-8x7b"
|
130
|
+
else:
|
131
|
+
print(f"not support your model{model}, supported models: 'claude','gpt(default)', 'llama','mixtral'")
|
132
|
+
return "gpt-3.5" # default model
|
133
|
+
model_valid = valid_mod_name(model)
|
134
|
+
res=DDGS().chat(query, model=model_valid)
|
135
|
+
if verbose:
|
136
|
+
pp(res)
|
137
|
+
if log:
|
138
|
+
dt_str=datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S')
|
139
|
+
res_ = f"\n\n####Q:{query}\n\n#####Ans:{dt_str}\n\n>{res}\n"
|
140
|
+
if bool(os.path.basename(dir_save)):
|
141
|
+
fpath = dir_save
|
142
|
+
else:
|
143
|
+
os.makedirs(dir_save, exist_ok=True)
|
144
|
+
fpath = os.path.join(dir_save, f"log_ai.md")
|
145
|
+
fupdate(fpath=fpath,content=res_)
|
146
|
+
print(f"log file:{fpath}")
|
147
|
+
return res
|
148
|
+
|
149
|
+
def chat(*args, **kwargs):
|
150
|
+
if len(args) == 1 and isinstance(args[0], str):
|
151
|
+
kwargs['query'] = args[0]
|
152
|
+
return echo(**kwargs)
|
153
|
+
|
154
|
+
def ai(*args, **kwargs):
|
155
|
+
if len(args) == 1 and isinstance(args[0], str):
|
156
|
+
kwargs['query'] = args[0]
|
157
|
+
return echo(**kwargs)
|
158
|
+
|
159
|
+
def detect_lang(text, output='lang',verbose=True):
|
160
|
+
lang_code_iso639={'Abkhazian': 'ab',
|
161
|
+
'Afar': 'aa',
|
162
|
+
'Afrikaans': 'af',
|
163
|
+
'Akan': 'ak',
|
164
|
+
'Albanian': 'sq',
|
165
|
+
'Amharic': 'am',
|
166
|
+
'Arabic': 'ar',
|
167
|
+
'Armenian': 'hy',
|
168
|
+
'Assamese': 'as',
|
169
|
+
# 'Avaric': 'av',
|
170
|
+
'Aymara': 'ay',
|
171
|
+
'Azerbaijani': 'az',
|
172
|
+
'Bashkir': 'ba',
|
173
|
+
'Basque': 'eu',
|
174
|
+
'Belarusian': 'be',
|
175
|
+
'Bislama': 'bi',
|
176
|
+
'Breton': 'br',
|
177
|
+
'Burmese': 'my',
|
178
|
+
'Catalan, Valencian': 'ca',
|
179
|
+
'Chamorro': 'ch',
|
180
|
+
'Chichewa, Chewa, Nyanja': 'ny',
|
181
|
+
'Chinese': 'zh',
|
182
|
+
'Corsican': 'co',
|
183
|
+
'Cree': 'cr',
|
184
|
+
'Croatian': 'hr',
|
185
|
+
'Danish': 'da',
|
186
|
+
'Dutch, Flemish': 'nl',
|
187
|
+
'Dzongkha': 'dz',
|
188
|
+
'English': 'en',
|
189
|
+
'Finnish': 'fi',
|
190
|
+
'French': 'fr',
|
191
|
+
'Galician': 'gl',
|
192
|
+
'Georgian': 'ka',
|
193
|
+
'German': 'de',
|
194
|
+
'Greek, Modern (1453–)': 'el',
|
195
|
+
'Gujarati': 'gu',
|
196
|
+
'Hausa': 'ha',
|
197
|
+
'Hebrew': 'he',
|
198
|
+
'Hindi': 'hi',
|
199
|
+
'Hungarian': 'hu',
|
200
|
+
'Icelandic': 'is',
|
201
|
+
'Italian': 'it',
|
202
|
+
'Kikuyu, Gikuyu': 'ki',
|
203
|
+
'Korean': 'ko',
|
204
|
+
'Kurdish': 'ku',
|
205
|
+
'Latin': 'la',
|
206
|
+
'Limburgan, Limburger, Limburgish': 'li',
|
207
|
+
'Luba-Katanga': 'lu',
|
208
|
+
'Macedonian': 'mk',
|
209
|
+
'Malay': 'ms',
|
210
|
+
'Nauru': 'na',
|
211
|
+
'North Ndebele': 'nd',
|
212
|
+
'Nepali': 'ne',
|
213
|
+
'Norwegian': 'no',
|
214
|
+
'Norwegian Nynorsk': 'nn',
|
215
|
+
'Sichuan Yi, Nuosu': 'ii',
|
216
|
+
'Occitan': 'oc',
|
217
|
+
'Ojibwa': 'oj',
|
218
|
+
'Oriya': 'or',
|
219
|
+
'Ossetian, Ossetic': 'os',
|
220
|
+
'Persian': 'fa',
|
221
|
+
'Punjabi, Panjabi': 'pa',
|
222
|
+
'Quechua': 'qu',
|
223
|
+
'Romanian, Moldavian, Moldovan': 'ro',
|
224
|
+
'Russian': 'ru',
|
225
|
+
'Samoan': 'sm',
|
226
|
+
'Sanskrit': 'sa',
|
227
|
+
'Serbian': 'sr',
|
228
|
+
'Shona': 'sn',
|
229
|
+
'Sinhala, Sinhalese': 'si',
|
230
|
+
'Slovenian': 'sl',
|
231
|
+
'Somali': 'so',
|
232
|
+
'Sundanese': 'su',
|
233
|
+
'Swahili': 'sw',
|
234
|
+
'Swati': 'ss',
|
235
|
+
'Tajik': 'tg',
|
236
|
+
'Tamil': 'ta',
|
237
|
+
'Telugu': 'te',
|
238
|
+
'Thai': 'th',
|
239
|
+
'Tibetan': 'bo',
|
240
|
+
'Tigrinya': 'ti',
|
241
|
+
'Tonga (Tonga Islands)': 'to',
|
242
|
+
'Tsonga': 'ts',
|
243
|
+
'Twi': 'tw',
|
244
|
+
'Ukrainian': 'uk',
|
245
|
+
'Urdu': 'ur',
|
246
|
+
'Uzbek': 'uz',
|
247
|
+
'Venda': 've',
|
248
|
+
'Vietnamese': 'vi',
|
249
|
+
'Volapük': 'vo',
|
250
|
+
'Welsh': 'cy',
|
251
|
+
'Wolof': 'wo',
|
252
|
+
'Xhosa': 'xh',
|
253
|
+
'Yiddish': 'yi',
|
254
|
+
'Yoruba': 'yo',
|
255
|
+
'Zulu': 'zu'}
|
256
|
+
l_lang,l_code = [],[]
|
257
|
+
[[l_lang.append(v),l_code.append(k)] for v,k in lang_code_iso639.items()]
|
258
|
+
try:
|
259
|
+
if is_text(text):
|
260
|
+
code_detect=detect(text)
|
261
|
+
if 'c' in output.lower(): # return code
|
262
|
+
return l_code[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
263
|
+
else:
|
264
|
+
return l_lang[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
265
|
+
else:
|
266
|
+
print(f"{text} is not supported")
|
267
|
+
return 'no'
|
268
|
+
except:
|
269
|
+
return 'no'
|
270
|
+
|
271
|
+
def is_text(s):
|
272
|
+
has_alpha = any(char.isalpha() for char in s)
|
273
|
+
has_non_alpha = any(not char.isalpha() for char in s)
|
274
|
+
# no_special = not re.search(r'[^A-Za-z0-9\s]', s)
|
275
|
+
return has_alpha and has_non_alpha
|
276
|
+
|
277
|
+
def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR'):
|
278
|
+
"""
|
279
|
+
Compares a search term with a list of candidate strings and finds the best match based on similarity score.
|
280
|
+
|
281
|
+
Parameters:
|
282
|
+
search_term (str): The term to be searched for.
|
283
|
+
candidates (list of str): A list of candidate strings to compare against the search term.
|
284
|
+
ignore_case (bool): If True, the comparison ignores case differences.
|
285
|
+
verbose (bool): If True, prints the similarity score and the best match.
|
286
|
+
|
287
|
+
Returns:
|
288
|
+
tuple: A tuple containing the best match and its index in the candidates list.
|
289
|
+
"""
|
290
|
+
def to_lower(s, ignore_case=True):
|
291
|
+
#Converts a string or list of strings to lowercase if ignore_case is True.
|
292
|
+
if ignore_case:
|
293
|
+
if isinstance(s, str):
|
294
|
+
return s.lower()
|
295
|
+
elif isinstance(s, list):
|
296
|
+
return [elem.lower() for elem in s]
|
297
|
+
return s
|
298
|
+
str1_,str2_ = to_lower(search_term, ignore_case),to_lower(candidates, ignore_case)
|
299
|
+
if isinstance(str2_, list):
|
300
|
+
if 'part' in scorer.lower():
|
301
|
+
similarity_scores = [fuzz.partial_ratio(str1_, word) for word in str2_]
|
302
|
+
elif 'W' in scorer.lower():
|
303
|
+
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
304
|
+
elif 'Ratio' in scorer.lower():
|
305
|
+
similarity_scores = [fuzz.Ratio(str1_, word) for word in str2_]
|
306
|
+
else:
|
307
|
+
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
308
|
+
best_match_index = similarity_scores.index(max(similarity_scores))
|
309
|
+
best_match_score = similarity_scores[best_match_index]
|
310
|
+
else:
|
311
|
+
best_match_index = 0
|
312
|
+
if 'part' in scorer.lower():
|
313
|
+
best_match_score = fuzz.partial_ratio(str1_, str2_)
|
314
|
+
elif 'W' in scorer.lower():
|
315
|
+
best_match_score = fuzz.WRatio(str1_, str2_)
|
316
|
+
elif 'Ratio' in scorer.lower():
|
317
|
+
best_match_score = fuzz.Ratio(str1_, str2_)
|
318
|
+
else:
|
319
|
+
best_match_score = fuzz.WRatio(str1_, str2_)
|
320
|
+
if verbose:
|
321
|
+
print(f"\nbest_match is: {candidates[best_match_index],best_match_score}")
|
322
|
+
best_match = process.extract(search_term, candidates)
|
323
|
+
print(f"建议: {best_match}")
|
324
|
+
return candidates[best_match_index], best_match_index
|
325
|
+
|
326
|
+
# Example usaged
|
327
|
+
# str1 = "plos biology"
|
328
|
+
# str2 = ['PLoS Computational Biology', 'PLOS BIOLOGY']
|
329
|
+
# best_match, idx = strcmp(str1, str2, ignore_case=1)
|
330
|
+
|
331
|
+
def counter(list_, verbose=True):
|
332
|
+
c = Counter(list_)
|
333
|
+
# Print the name counts
|
334
|
+
for item, count in c.items():
|
335
|
+
if verbose:
|
336
|
+
print(f"{item}: {count}")
|
337
|
+
return c
|
338
|
+
# usage:
|
339
|
+
# print(f"Return an iterator over elements repeating each as many times as its count:\n{sorted(c.elements())}")
|
340
|
+
# print(f"Return a list of the n most common elements:\n{c.most_common()}")
|
341
|
+
# print(f"Compute the sum of the counts:\n{c.total()}")
|
42
342
|
|
43
343
|
def is_num(s):
|
44
344
|
"""
|
@@ -81,7 +381,8 @@ def str2time(time_str, fmt='24'):
|
|
81
381
|
elif len(time_str_split)==3:
|
82
382
|
H,M,S=time_str_split
|
83
383
|
time_str_full=H+":"+M+":"+S
|
84
|
-
|
384
|
+
else:
|
385
|
+
time_str_full=time_str_
|
85
386
|
if 'am' in time_str.lower():
|
86
387
|
time_str_full+=" AM"
|
87
388
|
elif "pm"in time_str.lower():
|
@@ -94,10 +395,10 @@ def str2time(time_str, fmt='24'):
|
|
94
395
|
|
95
396
|
try:
|
96
397
|
# Try to parse the time string assuming it could be in 24-hour or 12-hour format
|
97
|
-
time_obj = datetime.strptime(time_str, '%H:%M:%S')
|
398
|
+
time_obj = datetime.strptime(time_len_corr(time_str), '%H:%M:%S')
|
98
399
|
except ValueError:
|
99
400
|
try:
|
100
|
-
time_obj = datetime.strptime(time_str, '%I:%M:%S %p')
|
401
|
+
time_obj = datetime.strptime(time_len_corr(time_str), '%I:%M:%S %p')
|
101
402
|
except ValueError as e:
|
102
403
|
raise ValueError(f"Unable to parse time string: {time_str}. Error: {e}")
|
103
404
|
|
@@ -228,18 +529,33 @@ def num2str(num, *args):
|
|
228
529
|
# print(num2str(7000.125, 2),type(num2str(7000.125, 2))) # Output: "7000.13"
|
229
530
|
# print(num2str(12345.6789, ","),type(num2str(12345.6789, ","))) # Output: "12,345.6789"
|
230
531
|
# print(num2str(7000.00, ","),type(num2str(7000.00, ","))) # Output: "7,000.00"
|
231
|
-
def sreplace(
|
532
|
+
def sreplace(*args,**kwargs):
|
232
533
|
"""
|
534
|
+
sreplace(text, by=None, robust=True)
|
233
535
|
Replace specified substrings in the input text with provided replacements.
|
234
536
|
Args:
|
235
537
|
text (str): The input text where replacements will be made.
|
236
|
-
|
538
|
+
by (dict, optional): A dictionary containing substrings to be replaced as keys
|
237
539
|
and their corresponding replacements as values. Defaults to {".com": "..come", "\n": " ", "\t": " ", " ": " "}.
|
238
540
|
robust (bool, optional): If True, additional default replacements for newline and tab characters will be applied.
|
239
541
|
Default is False.
|
240
542
|
Returns:
|
241
543
|
str: The text after replacements have been made.
|
242
544
|
"""
|
545
|
+
text = None
|
546
|
+
by = kwargs.get('by', None)
|
547
|
+
robust = kwargs.get('robust', True)
|
548
|
+
|
549
|
+
for arg in args:
|
550
|
+
if isinstance(arg,str):
|
551
|
+
text=arg
|
552
|
+
elif isinstance(arg,dict):
|
553
|
+
by=arg
|
554
|
+
elif isinstance(arg,bool):
|
555
|
+
robust=arg
|
556
|
+
else:
|
557
|
+
Error(f"{type(arg)} is not supported")
|
558
|
+
|
243
559
|
# Default replacements for newline and tab characters
|
244
560
|
default_replacements = {
|
245
561
|
"\a": "",
|
@@ -260,19 +576,18 @@ def sreplace(text, dict_replace=None, robust=True):
|
|
260
576
|
}
|
261
577
|
|
262
578
|
# If dict_replace is None, use the default dictionary
|
263
|
-
if
|
264
|
-
|
265
|
-
|
579
|
+
if by is None:
|
580
|
+
by = {}
|
266
581
|
# If robust is True, update the dictionary with default replacements
|
267
582
|
if robust:
|
268
|
-
|
583
|
+
by.update(default_replacements)
|
269
584
|
|
270
585
|
# Iterate over each key-value pair in the dictionary and replace substrings accordingly
|
271
|
-
for k, v in
|
586
|
+
for k, v in by.items():
|
272
587
|
text = text.replace(k, v)
|
273
588
|
return text
|
274
589
|
# usage:
|
275
|
-
# sreplace(text,
|
590
|
+
# sreplace(text, by=dict(old_str='new_str'), robust=True)
|
276
591
|
|
277
592
|
def paper_size(paper_type_str='a4'):
|
278
593
|
df=pd.DataFrame({'a0':[841,1189],'a1':[594,841],'a2':[420,594],'a3':[297,420],'a4':[210,297],'a5':[148,210],'a6':[105,148],'a7':[74,105],
|
@@ -414,10 +729,10 @@ def ssplit(text, by="space", verbose=False, **kws):
|
|
414
729
|
if verbose:
|
415
730
|
print(f"split_by_word_length(text, length)")
|
416
731
|
return split_by_word_length(text, **kws) # split_by_word_length(text, length)
|
417
|
-
elif "," in by:
|
418
|
-
|
419
|
-
|
420
|
-
|
732
|
+
# elif "," in by:
|
733
|
+
# if verbose:
|
734
|
+
# print(f"splited by ','")
|
735
|
+
# return text.split(",")
|
421
736
|
elif isinstance(by, list):
|
422
737
|
if verbose:
|
423
738
|
print(f"split_by_multiple_delimiters: ['|','&']")
|
@@ -675,6 +990,31 @@ def fload(fpath, kind=None, **kwargs):
|
|
675
990
|
# xlsx_content = fload('sample.xlsx')
|
676
991
|
# docx_content = fload('sample.docx')
|
677
992
|
|
993
|
+
def fupdate(fpath, content=None):
|
994
|
+
"""
|
995
|
+
Update a file by adding new content at the top and moving the old content to the bottom.
|
996
|
+
Parameters
|
997
|
+
----------
|
998
|
+
fpath : str
|
999
|
+
The file path where the content should be updated.
|
1000
|
+
content : str, optional
|
1001
|
+
The new content to add at the top of the file. If not provided, the function will not add any new content.
|
1002
|
+
Notes
|
1003
|
+
-----
|
1004
|
+
- If the file at `fpath` does not exist, it will be created.
|
1005
|
+
- The new content will be added at the top, followed by the old content of the file.
|
1006
|
+
"""
|
1007
|
+
content = content or ""
|
1008
|
+
if os.path.exists(fpath):
|
1009
|
+
with open(fpath, 'r') as file:
|
1010
|
+
old_content = file.read()
|
1011
|
+
else:
|
1012
|
+
old_content = ''
|
1013
|
+
|
1014
|
+
with open(fpath, 'w') as file:
|
1015
|
+
file.write(content)
|
1016
|
+
file.write(old_content)
|
1017
|
+
|
678
1018
|
def fsave(
|
679
1019
|
fpath,
|
680
1020
|
content,
|
@@ -682,6 +1022,7 @@ def fsave(
|
|
682
1022
|
font_name="Times",
|
683
1023
|
font_size=10,
|
684
1024
|
spacing=6,
|
1025
|
+
mode='w',
|
685
1026
|
**kwargs,
|
686
1027
|
):
|
687
1028
|
"""
|
@@ -697,8 +1038,8 @@ def fsave(
|
|
697
1038
|
Returns:
|
698
1039
|
None
|
699
1040
|
"""
|
700
|
-
def save_content(fpath, content):
|
701
|
-
with open(fpath,
|
1041
|
+
def save_content(fpath, content, mode=mode):
|
1042
|
+
with open(fpath, mode, encoding='utf-8') as file:
|
702
1043
|
file.write(content)
|
703
1044
|
|
704
1045
|
|
@@ -717,19 +1058,19 @@ def fsave(
|
|
717
1058
|
doc.save(fpath)
|
718
1059
|
|
719
1060
|
|
720
|
-
def save_txt_md(fpath, content, sep="\n"):
|
1061
|
+
def save_txt_md(fpath, content, sep="\n",mode='w'):
|
721
1062
|
# Ensure content is a single string
|
722
1063
|
if isinstance(content, list):
|
723
1064
|
content = sep.join(content)
|
724
|
-
save_content(fpath, sep.join(content))
|
1065
|
+
save_content(fpath, sep.join(content),mode)
|
725
1066
|
|
726
1067
|
|
727
|
-
def save_html(fpath, content, font_name, font_size):
|
1068
|
+
def save_html(fpath, content, font_name, font_size,mode='w'):
|
728
1069
|
html_content = "<html><body>"
|
729
1070
|
for paragraph_text in content:
|
730
1071
|
html_content += f'<p style="font-family:{font_name}; font-size:{font_size}px;">{paragraph_text}</p>'
|
731
1072
|
html_content += "</body></html>"
|
732
|
-
save_content(fpath, html_content)
|
1073
|
+
save_content(fpath, html_content,mode)
|
733
1074
|
|
734
1075
|
|
735
1076
|
def save_pdf(fpath, content, font_name, font_size):
|
@@ -812,16 +1153,16 @@ def fsave(
|
|
812
1153
|
"xml",
|
813
1154
|
"yaml",
|
814
1155
|
]:
|
815
|
-
|
816
|
-
f"
|
1156
|
+
print(
|
1157
|
+
f"Warning:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
817
1158
|
)
|
818
1159
|
|
819
1160
|
if kind == "docx" or kind=="doc":
|
820
1161
|
save_docx(fpath, content, font_name, font_size, spacing)
|
821
1162
|
elif kind == "txt":
|
822
|
-
save_txt_md(fpath, content, sep="")
|
1163
|
+
save_txt_md(fpath, content, sep="",mode=mode)
|
823
1164
|
elif kind == "md":
|
824
|
-
save_txt_md(fpath, content, sep="")
|
1165
|
+
save_txt_md(fpath, content, sep="",mode=mode)
|
825
1166
|
elif kind == "html":
|
826
1167
|
save_html(fpath, content, font_name, font_size)
|
827
1168
|
elif kind == "pdf":
|
@@ -841,9 +1182,12 @@ def fsave(
|
|
841
1182
|
elif kind == "yaml":
|
842
1183
|
save_yaml(fpath, content, **kwargs) # Assuming content is a serializable object
|
843
1184
|
else:
|
844
|
-
|
845
|
-
|
846
|
-
|
1185
|
+
try:
|
1186
|
+
netfinder.downloader(url=content, dir_save=dirname(fpath), kind=kind)
|
1187
|
+
except:
|
1188
|
+
print(
|
1189
|
+
f"Error:\n{kind} is not in the supported list ['docx', 'txt', 'md', 'html', 'pdf', 'csv', 'xlsx', 'json', 'xml', 'yaml']"
|
1190
|
+
)
|
847
1191
|
|
848
1192
|
|
849
1193
|
# # Example usage
|
@@ -867,15 +1211,116 @@ def fsave(
|
|
867
1211
|
def addpath(fpath):
|
868
1212
|
sys.path.insert(0,dir)
|
869
1213
|
def dirname(fpath):
|
1214
|
+
"""
|
1215
|
+
dirname: Extracting Directory Name from a File Path
|
1216
|
+
Args:
|
1217
|
+
fpath (str): the file or directory path
|
1218
|
+
Returns:
|
1219
|
+
str: directory, without filename
|
1220
|
+
"""
|
870
1221
|
dirname_=os.path.dirname(fpath)
|
871
1222
|
if not dirname_.endswith('/'):
|
872
1223
|
dirname_=dirname_+"/"
|
873
1224
|
return dirname_
|
874
|
-
|
1225
|
+
|
1226
|
+
def dir_name(fpath): # same as "dirname"
|
875
1227
|
return dirname(fpath)
|
876
1228
|
def basename(fpath):
|
1229
|
+
"""
|
1230
|
+
basename: # Output: file.txt
|
1231
|
+
Args:
|
1232
|
+
fpath (str): the file or directory path
|
1233
|
+
Returns:
|
1234
|
+
str: # Output: file.txt
|
1235
|
+
"""
|
877
1236
|
return os.path.basename(fpath)
|
1237
|
+
def flist(fpath, contains="all"):
|
1238
|
+
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
1239
|
+
if isinstance(contains, list):
|
1240
|
+
filt_files = []
|
1241
|
+
for filter_ in contains:
|
1242
|
+
filt_files.extend(flist(fpath, filter_))
|
1243
|
+
return filt_files
|
1244
|
+
else:
|
1245
|
+
if 'all' in contains.lower():
|
1246
|
+
return all_files
|
1247
|
+
else:
|
1248
|
+
filt_files = [f for f in all_files if isa(f, contains)]
|
1249
|
+
return filt_files
|
1250
|
+
def sort_kind(df, by="name", ascending=True):
|
1251
|
+
if df[by].dtype == 'object': # Check if the column contains string values
|
1252
|
+
if ascending:
|
1253
|
+
sorted_index = df[by].str.lower().argsort()
|
1254
|
+
else:
|
1255
|
+
sorted_index = df[by].str.lower().argsort()[::-1]
|
1256
|
+
else:
|
1257
|
+
if ascending:
|
1258
|
+
sorted_index = df[by].argsort()
|
1259
|
+
else:
|
1260
|
+
sorted_index = df[by].argsort()[::-1]
|
1261
|
+
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
1262
|
+
return sorted_df
|
1263
|
+
|
1264
|
+
def isa(*args,**kwargs):
|
1265
|
+
"""
|
1266
|
+
fpath, contains='img'
|
1267
|
+
containss file paths based on the specified contains.
|
1268
|
+
Args:
|
1269
|
+
fpath (str): Path to the file.
|
1270
|
+
contains (str): contains of file to contains. Default is 'img' for images. Other options include 'doc' for documents,
|
1271
|
+
'zip' for ZIP archives, and 'other' for other types of files.
|
1272
|
+
Returns:
|
1273
|
+
bool: True if the file matches the contains, False otherwise.
|
1274
|
+
"""
|
1275
|
+
for arg in args:
|
1276
|
+
if isinstance(arg, str):
|
1277
|
+
if '/' in arg or '\\' in arg:
|
1278
|
+
fpath = arg
|
1279
|
+
else:
|
1280
|
+
contains=arg
|
1281
|
+
if 'img' in contains.lower() or 'image' in contains.lower():
|
1282
|
+
return is_image(fpath)
|
1283
|
+
elif 'doc' in contains.lower():
|
1284
|
+
return is_document(fpath)
|
1285
|
+
elif 'zip' in contains.lower():
|
1286
|
+
return is_zip(fpath)
|
1287
|
+
elif 'dir' in contains.lower() or ('f' in contains.lower() and 'd' in contains.lower()):
|
1288
|
+
return bool(('/' in fpath) or ('\\' in fpath))
|
1289
|
+
elif 'fi' in contains.lower():#file
|
1290
|
+
return os.path.isfile(fpath)
|
1291
|
+
else:
|
1292
|
+
print(f"{contains} was not set up correctly")
|
1293
|
+
return False
|
1294
|
+
|
1295
|
+
def is_image(fpath):
|
1296
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1297
|
+
if mime_type and mime_type.startswith('image'):
|
1298
|
+
return True
|
1299
|
+
else:
|
1300
|
+
return False
|
878
1301
|
|
1302
|
+
def is_document(fpath):
|
1303
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1304
|
+
if mime_type and (
|
1305
|
+
mime_type.startswith('text/') or
|
1306
|
+
mime_type == 'application/pdf' or
|
1307
|
+
mime_type == 'application/msword' or
|
1308
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
1309
|
+
mime_type == 'application/vnd.ms-excel' or
|
1310
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
1311
|
+
mime_type == 'application/vnd.ms-powerpoint' or
|
1312
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
1313
|
+
):
|
1314
|
+
return True
|
1315
|
+
else:
|
1316
|
+
return False
|
1317
|
+
|
1318
|
+
def is_zip(fpath):
|
1319
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
1320
|
+
if mime_type == 'application/zip':
|
1321
|
+
return True
|
1322
|
+
else:
|
1323
|
+
return False
|
879
1324
|
def listdir(
|
880
1325
|
rootdir,
|
881
1326
|
kind="folder",
|
@@ -885,83 +1330,6 @@ def listdir(
|
|
885
1330
|
orient="list",
|
886
1331
|
output="df"
|
887
1332
|
):
|
888
|
-
def sort_kind(df, by="name", ascending=True):
|
889
|
-
if df[by].dtype == 'object': # Check if the column contains string values
|
890
|
-
if ascending:
|
891
|
-
sorted_index = df[by].str.lower().argsort()
|
892
|
-
else:
|
893
|
-
sorted_index = df[by].str.lower().argsort()[::-1]
|
894
|
-
else:
|
895
|
-
if ascending:
|
896
|
-
sorted_index = df[by].argsort()
|
897
|
-
else:
|
898
|
-
sorted_index = df[by].argsort()[::-1]
|
899
|
-
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
900
|
-
return sorted_df
|
901
|
-
|
902
|
-
def flist(fpath, filter="all"):
|
903
|
-
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
904
|
-
if isinstance(filter, list):
|
905
|
-
filt_files = []
|
906
|
-
for filter_ in filter:
|
907
|
-
filt_files.extend(flist(fpath, filter_))
|
908
|
-
return filt_files
|
909
|
-
else:
|
910
|
-
if 'all' in filter.lower():
|
911
|
-
return all_files
|
912
|
-
else:
|
913
|
-
filt_files = [f for f in all_files if istype(f, filter)]
|
914
|
-
return filt_files
|
915
|
-
|
916
|
-
def istype(fpath, filter='img'):
|
917
|
-
"""
|
918
|
-
Filters file paths based on the specified filter.
|
919
|
-
Args:
|
920
|
-
fpath (str): Path to the file.
|
921
|
-
filter (str): Filter of file to filter. Default is 'img' for images. Other options include 'doc' for documents,
|
922
|
-
'zip' for ZIP archives, and 'other' for other types of files.
|
923
|
-
Returns:
|
924
|
-
bool: True if the file matches the filter, False otherwise.
|
925
|
-
"""
|
926
|
-
if 'img' in filter.lower():
|
927
|
-
return is_image(fpath)
|
928
|
-
elif 'doc' in filter.lower():
|
929
|
-
return is_document(fpath)
|
930
|
-
elif 'zip' in filter.lower():
|
931
|
-
return is_zip(fpath)
|
932
|
-
else:
|
933
|
-
return False
|
934
|
-
|
935
|
-
def is_image(fpath):
|
936
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
937
|
-
if mime_type and mime_type.startswith('image'):
|
938
|
-
return True
|
939
|
-
else:
|
940
|
-
return False
|
941
|
-
|
942
|
-
def is_document(fpath):
|
943
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
944
|
-
if mime_type and (
|
945
|
-
mime_type.startswith('text/') or
|
946
|
-
mime_type == 'application/pdf' or
|
947
|
-
mime_type == 'application/msword' or
|
948
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
949
|
-
mime_type == 'application/vnd.ms-excel' or
|
950
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
951
|
-
mime_type == 'application/vnd.ms-powerpoint' or
|
952
|
-
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
953
|
-
):
|
954
|
-
return True
|
955
|
-
else:
|
956
|
-
return False
|
957
|
-
|
958
|
-
def is_zip(fpath):
|
959
|
-
mime_type, _ = mimetypes.guess_type(fpath)
|
960
|
-
if mime_type == 'application/zip':
|
961
|
-
return True
|
962
|
-
else:
|
963
|
-
return False
|
964
|
-
|
965
1333
|
if not kind.startswith("."):
|
966
1334
|
kind = "." + kind
|
967
1335
|
|
@@ -990,8 +1358,10 @@ def listdir(
|
|
990
1358
|
os.path.isfile(item_path)
|
991
1359
|
)
|
992
1360
|
if kind in ['.doc','.img','.zip']: #选择大的类别
|
993
|
-
if kind != ".folder" and not
|
1361
|
+
if kind != ".folder" and not isa(item_path, kind):
|
994
1362
|
continue
|
1363
|
+
elif kind in ['.all']:
|
1364
|
+
return flist(fpath, contains=contains)
|
995
1365
|
else: #精确到文件的后缀
|
996
1366
|
if not is_folder and not is_file:
|
997
1367
|
continue
|
@@ -1064,26 +1434,45 @@ def func_list(lib_name, opt="call"):
|
|
1064
1434
|
funcs = dir(lib_name)
|
1065
1435
|
return funcs
|
1066
1436
|
|
1067
|
-
def newfolder(
|
1068
|
-
|
1437
|
+
def newfolder(*args, **kwargs):
|
1438
|
+
"""
|
1439
|
+
newfolder(pardir, chdir)
|
1440
|
+
|
1441
|
+
Args:
|
1442
|
+
pardir (dir): parent dir
|
1443
|
+
chdir (str): children dir
|
1444
|
+
overwrite (bool): overwrite?
|
1445
|
+
|
1446
|
+
Returns:
|
1447
|
+
mkdir, giving a option if exists_ok or not
|
1448
|
+
"""
|
1449
|
+
overwrite=kwargs.get("overwrite",False)
|
1450
|
+
for arg in args:
|
1451
|
+
if isinstance(arg, str):
|
1452
|
+
if "/" in arg or "\\" in arg:
|
1453
|
+
pardir=arg
|
1454
|
+
print(f'pardir{pardir}')
|
1455
|
+
else:
|
1456
|
+
chdir = arg
|
1457
|
+
print(f'chdir{chdir}')
|
1458
|
+
elif isinstance(arg,bool):
|
1459
|
+
overwrite=arg
|
1460
|
+
print(overwrite)
|
1461
|
+
else:
|
1462
|
+
print(f"{arg}Error: not support a {type(arg)} type")
|
1069
1463
|
rootdir = []
|
1070
1464
|
# Convert string to list
|
1071
1465
|
if isinstance(chdir, str):
|
1072
1466
|
chdir = [chdir]
|
1073
|
-
|
1074
1467
|
# Subfoldername should be unique
|
1075
1468
|
chdir = list(set(chdir))
|
1076
|
-
|
1077
1469
|
if isinstance(pardir, str): # Dir_parents should be 'str' type
|
1078
1470
|
pardir = os.path.normpath(pardir)
|
1079
|
-
|
1080
1471
|
# Get the slash type: "/" or "\"
|
1081
1472
|
stype = '/' if '/' in pardir else '\\'
|
1082
|
-
|
1083
1473
|
# Check if the parent directory exists and is a directory path
|
1084
1474
|
if os.path.isdir(pardir):
|
1085
1475
|
os.chdir(pardir) # Set current path
|
1086
|
-
|
1087
1476
|
# Check if subdirectories are not empty
|
1088
1477
|
if chdir:
|
1089
1478
|
chdir.sort()
|
@@ -1095,22 +1484,21 @@ def newfolder(pardir, chdir):
|
|
1095
1484
|
os.mkdir('./' + folder)
|
1096
1485
|
print(f'\n {folder} was created successfully!\n')
|
1097
1486
|
else:
|
1098
|
-
|
1099
|
-
|
1487
|
+
if overwrite:
|
1488
|
+
shutil.rmtree(child_tmp)
|
1489
|
+
os.mkdir('./' + folder)
|
1490
|
+
print(f'\n {folder} overwrite! \n')
|
1491
|
+
else:
|
1492
|
+
print(f'\n {folder} already exists! \n')
|
1100
1493
|
rootdir.append(child_tmp + stype) # Note down
|
1101
|
-
|
1102
1494
|
else:
|
1103
1495
|
print('\nWarning: Dir_child doesn\'t exist\n')
|
1104
|
-
|
1105
1496
|
else:
|
1106
1497
|
print('\nWarning: Dir_parent is not a directory path\n')
|
1107
|
-
|
1108
1498
|
# Dir is the main output, if only one dir, then str type is inconvenient
|
1109
1499
|
if len(rootdir) == 1:
|
1110
1500
|
rootdir = rootdir[0]
|
1111
|
-
|
1112
1501
|
return rootdir
|
1113
|
-
|
1114
1502
|
|
1115
1503
|
def figsave(*args,dpi=300):
|
1116
1504
|
DirSave = None
|