oafuncs 0.0.89__py2.py3-none-any.whl → 0.0.91__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/data_store/OAFuncs.png +0 -0
- oafuncs/oa_data.py +9 -82
- oafuncs/oa_down/__init__.py +1 -0
- oafuncs/oa_down/hycom_3hourly.py +315 -401
- oafuncs/oa_down/idm.py +50 -0
- oafuncs/oa_down/literature.py +53 -29
- oafuncs/oa_down/user_agent.py +0 -3
- oafuncs/oa_file.py +80 -21
- oafuncs/oa_help.py +8 -1
- oafuncs/oa_nc.py +20 -18
- oafuncs/oa_tool/__init__.py +6 -6
- oafuncs/oa_tool/parallel.py +90 -0
- {oafuncs-0.0.89.dist-info → oafuncs-0.0.91.dist-info}/METADATA +1 -1
- oafuncs-0.0.91.dist-info/RECORD +28 -0
- {oafuncs-0.0.89.dist-info → oafuncs-0.0.91.dist-info}/WHEEL +1 -1
- oafuncs-0.0.89.dist-info/RECORD +0 -26
- {oafuncs-0.0.89.dist-info → oafuncs-0.0.91.dist-info}/LICENSE.txt +0 -0
- {oafuncs-0.0.89.dist-info → oafuncs-0.0.91.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/idm.py
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# coding=utf-8
|
3
|
+
"""
|
4
|
+
Author: Liu Kun && 16031215@qq.com
|
5
|
+
Date: 2025-01-11 16:19:12
|
6
|
+
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
+
LastEditTime: 2025-01-11 16:25:47
|
8
|
+
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\idm.py
|
9
|
+
Description:
|
10
|
+
EditPlatform: vscode
|
11
|
+
ComputerInfo: XPS 15 9510
|
12
|
+
SystemInfo: Windows 11
|
13
|
+
Python Version: 3.12
|
14
|
+
"""
|
15
|
+
|
16
|
+
import datetime
|
17
|
+
import os
|
18
|
+
from subprocess import call
|
19
|
+
|
20
|
+
from rich import print
|
21
|
+
|
22
|
+
__all__ = ["downloader"]
|
23
|
+
|
24
|
+
|
25
|
+
def downloader(task_url, folder_path, file_name, idm_engine=r"D:\Programs\Internet Download Manager\IDMan.exe"):
|
26
|
+
"""
|
27
|
+
Description:
|
28
|
+
Use IDM to download files.
|
29
|
+
Parameter:
|
30
|
+
task_url: str
|
31
|
+
The download link of the file.
|
32
|
+
folder_path: str
|
33
|
+
The path of the folder where the file is saved.
|
34
|
+
file_name: str
|
35
|
+
The name of the file to be saved.
|
36
|
+
idm_engine: str
|
37
|
+
The path of the IDM engine. Note: "IDMan.exe"
|
38
|
+
Return:
|
39
|
+
None
|
40
|
+
Example:
|
41
|
+
downloader("https://www.test.com/data.nc", r"E:\Data", "test.nc", r"D:\Programs\Internet Download Manager\IDMan.exe")
|
42
|
+
"""
|
43
|
+
os.makedirs(folder_path, exist_ok=True)
|
44
|
+
# 将任务添加至队列
|
45
|
+
call([idm_engine, "/d", task_url, "/p", folder_path, "/f", file_name, "/a"])
|
46
|
+
# 开始任务队列
|
47
|
+
call([idm_engine, "/s"])
|
48
|
+
# print(f"IDM下载器:{file_name}下载任务已添加至队列...")
|
49
|
+
print("[purple]-" * 50 + f"\n{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n" + "[purple]-" * 50)
|
50
|
+
print(f"[green]IDM Downloader: {file_name} download task has been added to the queue...[/green]")
|
oafuncs/oa_down/literature.py
CHANGED
@@ -64,7 +64,21 @@ class _Downloader:
|
|
64
64
|
r"https://sci-hub.se",
|
65
65
|
r"https://sci-hub.ren",
|
66
66
|
r"https://sci-hub.st",
|
67
|
-
r"https://sci-hub.ru",
|
67
|
+
r"https://sci-hub.ru", # 最好用的一个网站
|
68
|
+
# ------------------------------------- 以下网站没验证
|
69
|
+
r"https://sci-hub.wf",
|
70
|
+
r"https://sci-hub.yt",
|
71
|
+
r"https://sci-hub.ee",
|
72
|
+
r"https://sci-hub.cat",
|
73
|
+
r"https://sci-hub.in",
|
74
|
+
r"https://www.pismin.com",
|
75
|
+
r"https://sci-hub.vkif.top",
|
76
|
+
r"https://www.bothonce.com",
|
77
|
+
r"https://sci-hub.et-fine.com",
|
78
|
+
r"https://sci-hub.hkvisa.net",
|
79
|
+
# r"https://sci-hub.3800808.com", # 这个只能手动保存
|
80
|
+
r"https://sci-hub.zidianzhan.net",
|
81
|
+
r"https://sci-hub.usualwant.com",
|
68
82
|
]
|
69
83
|
self.base_url = None
|
70
84
|
self.url = None
|
@@ -86,33 +100,37 @@ class _Downloader:
|
|
86
100
|
self.try_times = 0
|
87
101
|
|
88
102
|
def get_pdf_url(self):
|
89
|
-
print("[bold #E6E6FA]-" *
|
103
|
+
print("[bold #E6E6FA]-" * 120)
|
90
104
|
print(f"DOI: {self.doi}")
|
91
105
|
print(f"Requesting: {self.url}...")
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
if
|
104
|
-
|
106
|
+
try:
|
107
|
+
response = requests.get(self.url, headers=self.headers)
|
108
|
+
if response.status_code == 200:
|
109
|
+
self.cookies = response.cookies
|
110
|
+
text = response.text.replace("\\", "")
|
111
|
+
# text = text.replace(' ', '') # It is important to remove the space
|
112
|
+
# print(text)
|
113
|
+
pattern = re.compile(r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
|
114
|
+
match = pattern.search(text)
|
115
|
+
if match:
|
116
|
+
got_url = match.group(1)
|
117
|
+
if r"http" not in got_url:
|
118
|
+
if got_url[:2] == "//":
|
119
|
+
self.pdf_url = "https:" + got_url
|
120
|
+
else:
|
121
|
+
self.pdf_url = self.base_url + got_url
|
105
122
|
else:
|
106
|
-
self.pdf_url =
|
123
|
+
self.pdf_url = got_url
|
124
|
+
print(f"URL: {self.pdf_url}")
|
107
125
|
else:
|
108
|
-
self.
|
109
|
-
|
126
|
+
print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
|
127
|
+
self.try_times = self.try_times_each_url_max + 1
|
110
128
|
else:
|
129
|
+
print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
|
111
130
|
print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
|
112
131
|
self.try_times = self.try_times_each_url_max + 1
|
113
|
-
|
114
|
-
print(f"Failed to retrieve the webpage.
|
115
|
-
print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
|
132
|
+
except Exception as e:
|
133
|
+
print(f"Failed to retrieve the webpage. Error: {e}")
|
116
134
|
self.try_times = self.try_times_each_url_max + 1
|
117
135
|
|
118
136
|
def url_iterate(self):
|
@@ -129,6 +147,12 @@ class _Downloader:
|
|
129
147
|
# break
|
130
148
|
|
131
149
|
def write_wrong_record(self):
|
150
|
+
# 先读取txt中的内容,如果已经存在则不再写入
|
151
|
+
if self.wrong_record_file.exists():
|
152
|
+
with open(self.wrong_record_file, "r") as f:
|
153
|
+
lines = f.readlines()
|
154
|
+
if self.doi in lines:
|
155
|
+
return
|
132
156
|
with open(self.wrong_record_file, "a") as f:
|
133
157
|
f.write(self.doi + "\n")
|
134
158
|
|
@@ -140,7 +164,7 @@ class _Downloader:
|
|
140
164
|
os.remove(self.fpath)
|
141
165
|
print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
|
142
166
|
else:
|
143
|
-
print("[bold #E6E6FA]-" *
|
167
|
+
print("[bold #E6E6FA]-" * 120)
|
144
168
|
print(f"[bold purple]The PDF file {self.fpath} already exists.")
|
145
169
|
return
|
146
170
|
self.url_index = 0
|
@@ -230,11 +254,11 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
|
|
230
254
|
|
231
255
|
Example:
|
232
256
|
download5doi(doi_list='10.3389/feart.2021.698876')
|
233
|
-
download5doi(store_path=
|
234
|
-
download5doi(store_path=
|
235
|
-
download5doi(store_path=
|
236
|
-
download5doi(store_path=
|
237
|
-
download5doi(store_path=
|
257
|
+
download5doi(store_path='I:\\Delete\\ref_pdf', doi_list='10.3389/feart.2021.698876')
|
258
|
+
download5doi(store_path='I:\\Delete\\ref_pdf', doi_list=['10.3389/feart.2021.698876', '10.3389/feart.2021.698876'])
|
259
|
+
download5doi(store_path='I:\\Delete\\ref_pdf', txt_file='I:\\Delete\\ref_pdf\\wrong_record.txt')
|
260
|
+
download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx')
|
261
|
+
download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx', col_name='DOI')
|
238
262
|
"""
|
239
263
|
if not store_path:
|
240
264
|
store_path = Path.cwd()
|
@@ -257,7 +281,7 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
|
|
257
281
|
|
258
282
|
|
259
283
|
if __name__ == "__main__":
|
260
|
-
store_path = r"
|
261
|
-
excel_file = r"
|
284
|
+
store_path = r"F:\AAA-Delete\DOI_Reference\pdf"
|
285
|
+
excel_file = r"F:\AAA-Delete\DOI_Reference\savedrecs.xls"
|
262
286
|
# download5doi(store_path, doi_list='10.1007/s00382-022-06260-x')
|
263
287
|
download5doi(store_path, excel_file=excel_file)
|
oafuncs/oa_down/user_agent.py
CHANGED
oafuncs/oa_file.py
CHANGED
@@ -19,7 +19,7 @@ import re
|
|
19
19
|
import shutil
|
20
20
|
from rich import print
|
21
21
|
|
22
|
-
__all__ = ["find_file", "link_file", "copy_file", "rename_file", "make_folder", "clear_folder", "remove_empty_folder", "remove", "file_size"]
|
22
|
+
__all__ = ["find_file", "link_file", "copy_file", "rename_file", "make_folder", "clear_folder", "remove_empty_folder", "remove", "file_size", "mean_size", "make_dir"]
|
23
23
|
|
24
24
|
|
25
25
|
# ** 查找文件,支持通配符
|
@@ -191,7 +191,7 @@ def rename_file(directory, old_str, new_str):
|
|
191
191
|
|
192
192
|
|
193
193
|
# ** 创建子文件夹(可选清空)
|
194
|
-
def make_folder(rootpath=None, folder_name=None, clear=
|
194
|
+
def make_folder(rootpath=None, folder_name=None, clear=False) -> str:
|
195
195
|
"""
|
196
196
|
# 描述:创建子文件夹(可选清空)
|
197
197
|
# 使用示例
|
@@ -210,6 +210,26 @@ def make_folder(rootpath=None, folder_name=None, clear=0) -> str:
|
|
210
210
|
return folder_path
|
211
211
|
|
212
212
|
|
213
|
+
# ** 创建路径
|
214
|
+
def make_dir(directory):
|
215
|
+
"""
|
216
|
+
Description:
|
217
|
+
Create a directory if it does not exist
|
218
|
+
|
219
|
+
Parameters:
|
220
|
+
directory: The directory path to create
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
None
|
224
|
+
|
225
|
+
Example:
|
226
|
+
make_dir(r"E:\Data\2024\09\17\var1")
|
227
|
+
"""
|
228
|
+
directory = str(directory)
|
229
|
+
os.makedirs(directory, exist_ok=True)
|
230
|
+
print(f"Created directory: {directory}")
|
231
|
+
|
232
|
+
|
213
233
|
# ** 清空文件夹
|
214
234
|
def clear_folder(folder_path):
|
215
235
|
"""
|
@@ -270,27 +290,45 @@ def remove_empty_folder(path, print_info=1):
|
|
270
290
|
# ** 删除相关文件,可使用通配符
|
271
291
|
def remove(pattern):
|
272
292
|
"""
|
273
|
-
|
293
|
+
Delete files or directories that match the given wildcard pattern.
|
294
|
+
|
295
|
+
Parameters:
|
296
|
+
pattern : str
|
297
|
+
File path or string containing wildcards. For example:
|
298
|
+
- r'E:\Code\Python\Model\WRF\Radar2\bzip2-radar-0*'
|
299
|
+
- 'bzip2-radar-0*' (assuming you are already in the target directory)
|
300
|
+
|
301
|
+
Usage examples:
|
274
302
|
remove(r'E:\Code\Python\Model\WRF\Radar2\bzip2-radar-0*')
|
275
|
-
|
303
|
+
or
|
276
304
|
os.chdir(r'E:\Code\Python\Model\WRF\Radar2')
|
277
305
|
remove('bzip2-radar-0*')
|
278
|
-
|
306
|
+
|
307
|
+
last updated: 2025-01-10 11:49:13
|
279
308
|
"""
|
280
|
-
# 使用glob.glob来获取所有匹配的文件
|
281
|
-
# 可以使用通配符*来匹配所有文件
|
282
309
|
pattern = str(pattern)
|
310
|
+
|
311
|
+
# Use glob.glob to get all matching files or directories
|
283
312
|
file_list = glob.glob(pattern)
|
313
|
+
|
314
|
+
if not file_list:
|
315
|
+
print(f"No files or directories found matching '{pattern}'.")
|
316
|
+
return
|
317
|
+
|
284
318
|
for file_path in file_list:
|
285
319
|
if os.path.exists(file_path):
|
286
320
|
try:
|
287
|
-
|
288
|
-
|
321
|
+
if os.path.isdir(file_path):
|
322
|
+
shutil.rmtree(file_path)
|
323
|
+
print(f"Successfully deleted directory: {file_path}")
|
324
|
+
else:
|
325
|
+
os.remove(file_path)
|
326
|
+
print(f"Successfully deleted file: {file_path}")
|
289
327
|
except Exception as e:
|
290
|
-
print(f"
|
291
|
-
print(e)
|
328
|
+
print(f"Deletion failed: {file_path}")
|
329
|
+
print(f"Error message: {e}")
|
292
330
|
else:
|
293
|
-
print(f"
|
331
|
+
print(f"File or directory does not exist: {file_path}")
|
294
332
|
|
295
333
|
|
296
334
|
# ** 获取文件大小
|
@@ -307,7 +345,10 @@ def file_size(file_path, unit="KB"):
|
|
307
345
|
"""
|
308
346
|
# 检查文件是否存在
|
309
347
|
if not os.path.exists(file_path):
|
310
|
-
return "文件不存在"
|
348
|
+
# return "文件不存在"
|
349
|
+
# print(f"文件不存在: {file_path}\n返回0.0")
|
350
|
+
print(f'File does not exist: {file_path}\nReturn 0.0')
|
351
|
+
return 0.0
|
311
352
|
|
312
353
|
# 获取文件大小(字节)
|
313
354
|
file_size = os.path.getsize(file_path)
|
@@ -317,7 +358,10 @@ def file_size(file_path, unit="KB"):
|
|
317
358
|
|
318
359
|
# 检查传入的单位是否合法
|
319
360
|
if unit not in unit_dict:
|
320
|
-
return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
|
361
|
+
# return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
|
362
|
+
# print("单位不合法,请选择PB、TB、GB、MB、KB中的一个\n返回0.0")
|
363
|
+
print("Invalid unit, please choose one of PB, TB, GB, MB, KB\nReturn 0.0")
|
364
|
+
return 0.0
|
321
365
|
|
322
366
|
# 转换文件大小到指定单位
|
323
367
|
converted_size = file_size / unit_dict[unit]
|
@@ -326,16 +370,31 @@ def file_size(file_path, unit="KB"):
|
|
326
370
|
|
327
371
|
|
328
372
|
# ** 计算文件夹下指定相关文件的平均大小
|
329
|
-
def mean_size(parent_path,fname):
|
373
|
+
def mean_size(parent_path,fname,max_num=None,unit="KB"):
|
374
|
+
"""
|
375
|
+
Description:
|
376
|
+
Calculate the average size of the specified related files in the folder
|
377
|
+
|
378
|
+
Parameters:
|
379
|
+
parent_path: The parent path where the files are located
|
380
|
+
fname: The file name pattern to search for
|
381
|
+
max_num: The maximum number of files to search for
|
382
|
+
unit: The unit of the file size, default is "KB"
|
383
|
+
|
384
|
+
Returns:
|
385
|
+
The average size
|
386
|
+
"""
|
330
387
|
flist = find_file(parent_path, fname)
|
331
388
|
if flist:
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
389
|
+
if max_num:
|
390
|
+
flist = flist[:int(max_num)]
|
391
|
+
size_list = [file_size(f,unit) for f in flist if file_size(f,unit) > 0]
|
392
|
+
if size_list:
|
393
|
+
return sum(size_list) / len(size_list)
|
394
|
+
else:
|
395
|
+
return 0.0
|
336
396
|
else:
|
337
|
-
|
338
|
-
return mean_size, min_size, max_size
|
397
|
+
return 0.0
|
339
398
|
|
340
399
|
|
341
400
|
if __name__ == "__main__":
|
oafuncs/oa_help.py
CHANGED
@@ -118,9 +118,16 @@ def log():
|
|
118
118
|
print("更新日志:")
|
119
119
|
print(
|
120
120
|
"""
|
121
|
-
2025-01-
|
121
|
+
2025-01-15
|
122
|
+
1. 优化了doi下载文献函数,增加下载途径及优化异常处理
|
123
|
+
"""
|
124
|
+
)
|
125
|
+
print(
|
126
|
+
"""
|
127
|
+
2025-01-07
|
122
128
|
1. 测试Python版本最低为3.9
|
123
129
|
2. 优化了部分函数说明
|
130
|
+
3. 优化hycom_3hourly模块,滑动判断文件是否正常
|
124
131
|
"""
|
125
132
|
)
|
126
133
|
print(
|
oafuncs/oa_nc.py
CHANGED
@@ -20,7 +20,7 @@ import numpy as np
|
|
20
20
|
import xarray as xr
|
21
21
|
from rich import print
|
22
22
|
|
23
|
-
__all__ = ["get_var", "extract", "save", "merge", "modify", "rename", "
|
23
|
+
__all__ = ["get_var", "extract", "save", "merge", "modify", "rename", "check", "convert_longitude", "isel"]
|
24
24
|
|
25
25
|
|
26
26
|
def get_var(file, *vars):
|
@@ -222,8 +222,10 @@ def merge(file_list, var_name=None, dim_name=None, target_filename=None):
|
|
222
222
|
merged_data = {}
|
223
223
|
|
224
224
|
# 遍历文件列表
|
225
|
+
print('Reading file ...')
|
225
226
|
for i, file in enumerate(file_list):
|
226
|
-
|
227
|
+
# 更新track描述进度
|
228
|
+
# print(f"\rReading file {i + 1}/{len(file_list)}...", end="")
|
227
229
|
ds = xr.open_dataset(file)
|
228
230
|
for var_name in var_names:
|
229
231
|
var = ds[var_name]
|
@@ -239,17 +241,17 @@ def merge(file_list, var_name=None, dim_name=None, target_filename=None):
|
|
239
241
|
merged_data[var_name] = var
|
240
242
|
ds.close()
|
241
243
|
|
242
|
-
print("\nMerging data...")
|
244
|
+
print("\nMerging data ...")
|
243
245
|
for var_name in merged_data:
|
244
246
|
if isinstance(merged_data[var_name], list):
|
245
247
|
merged_data[var_name] = xr.concat(merged_data[var_name], dim=dim_name)
|
246
248
|
|
247
249
|
merged_data = xr.Dataset(merged_data)
|
248
250
|
|
249
|
-
print("Writing data to file...")
|
251
|
+
print("Writing data to file ...")
|
250
252
|
if os.path.exists(target_filename):
|
251
253
|
print("Warning: The target file already exists.")
|
252
|
-
print("Removing existing file...")
|
254
|
+
print("Removing existing file ...")
|
253
255
|
os.remove(target_filename)
|
254
256
|
merged_data.to_netcdf(target_filename)
|
255
257
|
print(f'File "{target_filename}" has been created.')
|
@@ -370,38 +372,38 @@ def rename(ncfile_path, old_name, new_name):
|
|
370
372
|
print(f"An error occurred: {e}")
|
371
373
|
|
372
374
|
|
373
|
-
def
|
374
|
-
|
375
|
+
def check(ncfile, if_delete=False):
|
376
|
+
"""
|
375
377
|
Description:
|
376
|
-
Check if the NetCDF file is corrupted.
|
377
|
-
|
378
|
+
Check if the NetCDF file is corrupted using xarray.
|
379
|
+
|
378
380
|
Parameters:
|
379
381
|
ncfile (str): The path to the NetCDF file.
|
380
382
|
if_delete (bool): Whether to delete the file if it is corrupted, default is False.
|
381
|
-
|
383
|
+
|
382
384
|
Returns:
|
383
385
|
bool: True if the file is not corrupted, False otherwise.
|
384
|
-
|
386
|
+
"""
|
385
387
|
if not os.path.exists(ncfile):
|
386
388
|
return False
|
387
389
|
|
388
390
|
try:
|
389
|
-
with
|
390
|
-
|
391
|
-
if len(f.variables) > 0:
|
391
|
+
with xr.open_dataset(ncfile) as ds:
|
392
|
+
if len(ds.variables) > 0:
|
392
393
|
return True
|
393
394
|
else:
|
394
|
-
|
395
|
-
|
395
|
+
print(f"File {ncfile} is empty or corrupted.")
|
396
|
+
if if_delete:
|
397
|
+
os.remove(ncfile)
|
398
|
+
print(f"File {ncfile} has been deleted.")
|
399
|
+
return False
|
396
400
|
except OSError as e:
|
397
|
-
# 捕获文件打开时可能发生的OSError
|
398
401
|
print(f"An error occurred while opening the file: {e}")
|
399
402
|
if if_delete:
|
400
403
|
os.remove(ncfile)
|
401
404
|
print(f"File {ncfile} has been deleted.")
|
402
405
|
return False
|
403
406
|
except Exception as e:
|
404
|
-
# 捕获其他可能的异常
|
405
407
|
print(f"An unexpected error occurred: {e}")
|
406
408
|
if if_delete:
|
407
409
|
os.remove(ncfile)
|
oafuncs/oa_tool/__init__.py
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# coding=utf-8
|
3
|
-
|
3
|
+
"""
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-11-21 09:48:00
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime:
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\
|
9
|
-
Description:
|
7
|
+
LastEditTime: 2025-01-11 20:09:09
|
8
|
+
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_tool\\__init__.py
|
9
|
+
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
12
12
|
SystemInfo: Windows 11
|
13
13
|
Python Version: 3.12
|
14
|
-
|
15
|
-
|
14
|
+
"""
|
16
15
|
|
17
16
|
# 会导致OAFuncs直接导入所有函数,不符合模块化设计
|
18
17
|
from .email import *
|
18
|
+
from .parallel import *
|
@@ -0,0 +1,90 @@
|
|
1
|
+
import logging
|
2
|
+
import multiprocessing as mp
|
3
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
4
|
+
|
5
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
6
|
+
|
7
|
+
__all__ = ["ParallelExecutor"]
|
8
|
+
|
9
|
+
|
10
|
+
class ParallelExecutor:
|
11
|
+
"""
|
12
|
+
A class for parallel execution of tasks using threads or processes.
|
13
|
+
|
14
|
+
If mode is "process", the tasks are executed in separate processes.
|
15
|
+
If mode is "thread", the tasks are executed in separate threads.
|
16
|
+
|
17
|
+
Parameters:
|
18
|
+
mode (str): The execution mode. Supported values are "process" and "thread".
|
19
|
+
process ~ Must use top function to run, can't use in jupyter notebook
|
20
|
+
thread ~ Function can not be top function, can use in jupyter notebook
|
21
|
+
max_workers (int): The maximum number of workers to use. Defaults to CPU count - 1.
|
22
|
+
|
23
|
+
Note:!!!
|
24
|
+
If Jupyter notebook is used, the mode should be "thread" to avoid hanging issues.
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(self, mode="process", max_workers=None):
|
28
|
+
if mode not in {"process", "thread"}:
|
29
|
+
raise ValueError("Invalid mode. Supported values are 'process' and 'thread'.")
|
30
|
+
# process: Must use top function to run, can't use in jupyter notebook
|
31
|
+
# thread: Can use in jupyter notebook
|
32
|
+
self.mode = mode
|
33
|
+
self.max_workers = max_workers or max(1, mp.cpu_count() - 1)
|
34
|
+
self.executor_class = ProcessPoolExecutor if mode == "process" else ThreadPoolExecutor
|
35
|
+
|
36
|
+
def run(self, func, param_list):
|
37
|
+
"""
|
38
|
+
Run a function in parallel using the specified executor.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
func (callable): The function to execute.
|
42
|
+
param_list (list): A list of parameter tuples to pass to the function.
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
list: Results of the function execution.
|
46
|
+
"""
|
47
|
+
if not callable(func):
|
48
|
+
raise ValueError("func must be callable.")
|
49
|
+
if not isinstance(param_list, list) or not all(isinstance(p, tuple) for p in param_list):
|
50
|
+
raise ValueError("param_list must be a list of tuples.")
|
51
|
+
|
52
|
+
results = [None] * len(param_list)
|
53
|
+
logging.info("Starting parallel execution in %s mode with %d workers.", self.mode, self.max_workers)
|
54
|
+
|
55
|
+
with self.executor_class(max_workers=self.max_workers) as executor:
|
56
|
+
future_to_index = {executor.submit(func, *params): idx for idx, params in enumerate(param_list)}
|
57
|
+
|
58
|
+
for future in as_completed(future_to_index):
|
59
|
+
idx = future_to_index[future]
|
60
|
+
try:
|
61
|
+
results[idx] = future.result()
|
62
|
+
except Exception as e:
|
63
|
+
logging.error("Task %d failed with error: %s", idx, e)
|
64
|
+
results[idx] = e
|
65
|
+
|
66
|
+
logging.info("Parallel execution completed.")
|
67
|
+
return results
|
68
|
+
|
69
|
+
def _compute_square(x):
|
70
|
+
return x * x
|
71
|
+
|
72
|
+
def _example():
|
73
|
+
def _compute_sum(a, b):
|
74
|
+
return a + b
|
75
|
+
|
76
|
+
executor1 = ParallelExecutor(mode="process", max_workers=4)
|
77
|
+
params1 = [(i,) for i in range(10)]
|
78
|
+
results1 = executor1.run(_compute_square, params1)
|
79
|
+
print("Results (compute_square):", results1)
|
80
|
+
|
81
|
+
executor2 = ParallelExecutor(mode="thread", max_workers=2)
|
82
|
+
params2 = [(1, 2), (3, 4), (5, 6)]
|
83
|
+
results2 = executor2.run(_compute_sum, params2)
|
84
|
+
print("Results (compute_sum):", results2)
|
85
|
+
|
86
|
+
|
87
|
+
if __name__ == "__main__":
|
88
|
+
_example()
|
89
|
+
# 也可以不要装饰器,直接运行没啥问题,就是避免在ipynb中使用,最好使用ipynb,或者把这个函数放到一个独立的py文件中运行
|
90
|
+
# 或者,jupyter中使用thread,不要使用process,因为process会导致jupyter挂掉
|
@@ -0,0 +1,28 @@
|
|
1
|
+
oafuncs/__init__.py,sha256=glcIlhQ9xSK4WtL58dq7Od2S3JPqsuEyhUQ-VWO8hOc,1426
|
2
|
+
oafuncs/oa_cmap.py,sha256=azVg9QR_IlG9lXCCXXVs1LS1kFci8yjxDmb_VA_TdTQ,7408
|
3
|
+
oafuncs/oa_data.py,sha256=nENfszcOaALRse70fWFKi2vKH35EhRSCr65oIAKHiS8,12774
|
4
|
+
oafuncs/oa_draw.py,sha256=QypQp4vJIrbAyFddEVxd9K9Q4d85PRYqYQi9xDUmSZw,11150
|
5
|
+
oafuncs/oa_file.py,sha256=FVffpW3p6C8l1zrDrNr9aQeuCrA1qt4u4YssSwcTkkE,14106
|
6
|
+
oafuncs/oa_help.py,sha256=loyzTbjU_0VpSIBvAEUA_tqxG8MVsO0xFE_2hgQ3zMw,4188
|
7
|
+
oafuncs/oa_nc.py,sha256=CVZlv2EIehdgzrf1MHXYOUFcNkdOnmE1GYQYLldzrk0,17499
|
8
|
+
oafuncs/oa_python.py,sha256=Q-6UGGw_dJff7Ef8i87fsLPoGeHV5jBzfb-7HP4THR0,4018
|
9
|
+
oafuncs/data_store/OAFuncs.png,sha256=Cc0TDi9H5mWBporXYw9K0bUWC0oSsI-Qj3FGAXUtGKM,3332020
|
10
|
+
oafuncs/oa_down/User_Agent-list.txt,sha256=pazxSip8_lphEBOPHG902zmIBUg8sBKXgmqp_g6j_E4,661062
|
11
|
+
oafuncs/oa_down/__init__.py,sha256=kRX5eTUCbAiz3zTaQM1501paOYS_3fizDN4Pa0mtNUA,585
|
12
|
+
oafuncs/oa_down/hycom_3hourly.py,sha256=ZJpsx2D_x-C1Z4R1Wwr2vzUuT6iNPTZVDxusCG_q330,62113
|
13
|
+
oafuncs/oa_down/idm.py,sha256=lOiDQ5i5JPhj5ca3uDM9dw5DnHtj1EyJ17owhy7luLg,1666
|
14
|
+
oafuncs/oa_down/literature.py,sha256=n9pvL_N7pk-MZHHNIqc8OUYK_9ycASjDq0-D0wLSZ3s,11329
|
15
|
+
oafuncs/oa_down/test_ua.py,sha256=0IQq3NjqfNr7KkyjS_U-a4mYu-r-E7gzawwo4IfEa6Y,10851
|
16
|
+
oafuncs/oa_down/user_agent.py,sha256=TsPcAxFmMTYAEHRFjurI1bQBJfDhcA70MdHoUPwQmks,785
|
17
|
+
oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,563
|
18
|
+
oafuncs/oa_sign/meteorological.py,sha256=mLbupsZSq427HTfVbZMvIlFzDHwSzQAbK3X19o8anFY,6525
|
19
|
+
oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
|
20
|
+
oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
|
21
|
+
oafuncs/oa_tool/__init__.py,sha256=bNTy9abznDhg3k_Irx0YieXl37r-oDRMtTAxf57Stzs,487
|
22
|
+
oafuncs/oa_tool/email.py,sha256=4lJxV_KUzhxgLYfVwYTqp0qxRugD7fvsZkXDe5WkUKo,3052
|
23
|
+
oafuncs/oa_tool/parallel.py,sha256=kYbiIFDB7EoxasmXGSomaEDVUsg9Rfvdgbw93lBOY7o,3770
|
24
|
+
oafuncs-0.0.91.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
|
25
|
+
oafuncs-0.0.91.dist-info/METADATA,sha256=KT2rJ-ZeMPNYiqfyN5tRDdvf7eP5DB5NnesLZVsFG5A,3321
|
26
|
+
oafuncs-0.0.91.dist-info/WHEEL,sha256=M1ikteR9eetPNvm1LyQ3rpXxNYuGd90oakQO1a-ohSk,109
|
27
|
+
oafuncs-0.0.91.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
|
28
|
+
oafuncs-0.0.91.dist-info/RECORD,,
|
oafuncs-0.0.89.dist-info/RECORD
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
oafuncs/__init__.py,sha256=glcIlhQ9xSK4WtL58dq7Od2S3JPqsuEyhUQ-VWO8hOc,1426
|
2
|
-
oafuncs/oa_cmap.py,sha256=azVg9QR_IlG9lXCCXXVs1LS1kFci8yjxDmb_VA_TdTQ,7408
|
3
|
-
oafuncs/oa_data.py,sha256=21HC_7GVFAtU9AMYKGSSzY9J6_0Ju-5n8dJKwOOx5HI,15641
|
4
|
-
oafuncs/oa_draw.py,sha256=QypQp4vJIrbAyFddEVxd9K9Q4d85PRYqYQi9xDUmSZw,11150
|
5
|
-
oafuncs/oa_file.py,sha256=tnENr1PZlwF_pbFL4tOfBkfoVn8SoaOrhkPIua9wPX8,12301
|
6
|
-
oafuncs/oa_help.py,sha256=1S6b4LoiXYkoQCcvI8qbZvRQIXxXYRyziEZiNCnyDGE,3967
|
7
|
-
oafuncs/oa_nc.py,sha256=m_80xWzoyY2niupfpTSvej1D_k4WvTnDYlnlYbIfqGI,17525
|
8
|
-
oafuncs/oa_python.py,sha256=Q-6UGGw_dJff7Ef8i87fsLPoGeHV5jBzfb-7HP4THR0,4018
|
9
|
-
oafuncs/data_store/OAFuncs.png,sha256=w2pR7MUyeeWrT8BVTSy40EIRDIrfpdo1QvWvvjLOgjM,3258809
|
10
|
-
oafuncs/oa_down/User_Agent-list.txt,sha256=pazxSip8_lphEBOPHG902zmIBUg8sBKXgmqp_g6j_E4,661062
|
11
|
-
oafuncs/oa_down/__init__.py,sha256=pKPqxD0z09NEXWCemuemfgTct7Kcu3APPJqqB1FPXRM,565
|
12
|
-
oafuncs/oa_down/hycom_3hourly.py,sha256=wYOZrV5mZVSTfx6a5QLay8Jg2sL2x3sWKWmxwidRzMo,62928
|
13
|
-
oafuncs/oa_down/literature.py,sha256=Txv1YGSG-Z7m4o7FGHvXOR40EFxYozMsyM0-gy5CMEg,10086
|
14
|
-
oafuncs/oa_down/test_ua.py,sha256=0IQq3NjqfNr7KkyjS_U-a4mYu-r-E7gzawwo4IfEa6Y,10851
|
15
|
-
oafuncs/oa_down/user_agent.py,sha256=NSJjB2LAqPKS0hErvN5FfwsQeb58XjlOc68emiAK66c,893
|
16
|
-
oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,563
|
17
|
-
oafuncs/oa_sign/meteorological.py,sha256=mLbupsZSq427HTfVbZMvIlFzDHwSzQAbK3X19o8anFY,6525
|
18
|
-
oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
|
19
|
-
oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
|
20
|
-
oafuncs/oa_tool/__init__.py,sha256=IKOlqpWlb4cMDCtq2VKR_RTxQHDNqR_vfqqsOsp_lKQ,466
|
21
|
-
oafuncs/oa_tool/email.py,sha256=4lJxV_KUzhxgLYfVwYTqp0qxRugD7fvsZkXDe5WkUKo,3052
|
22
|
-
oafuncs-0.0.89.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
|
23
|
-
oafuncs-0.0.89.dist-info/METADATA,sha256=E4bEI5HiNc4-4xadqlRWwkUI3TCwdhroNQqgmwAvFZM,3321
|
24
|
-
oafuncs-0.0.89.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109
|
25
|
-
oafuncs-0.0.89.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
|
26
|
-
oafuncs-0.0.89.dist-info/RECORD,,
|
File without changes
|
File without changes
|