oafuncs 0.0.98.3__py3-none-any.whl → 0.0.98.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/_script/parallel.py +158 -509
- oafuncs/_script/parallel_test.py +14 -0
- oafuncs/oa_down/User_Agent-list.txt +1 -1611
- oafuncs/oa_down/hycom_3hourly.py +109 -75
- oafuncs/oa_down/hycom_3hourly_20250416.py +1191 -0
- oafuncs/oa_down/test_ua.py +27 -138
- oafuncs/oa_tool.py +118 -30
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/METADATA +2 -1
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/RECORD +12 -11
- oafuncs/_script/parallel_example_usage.py +0 -83
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.3.dist-info → oafuncs-0.0.98.4.dist-info}/top_level.txt +0 -0
oafuncs/_script/parallel.py
CHANGED
@@ -1,565 +1,214 @@
|
|
1
|
-
|
2
|
-
# coding=utf-8
|
3
|
-
"""
|
4
|
-
Author: Liu Kun && 16031215@qq.com
|
5
|
-
Date: 2025-04-04 20:19:23
|
6
|
-
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2025-04-04 20:19:23
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\parallel.py
|
9
|
-
Description:
|
10
|
-
EditPlatform: vscode
|
11
|
-
ComputerInfo: XPS 15 9510
|
12
|
-
SystemInfo: Windows 11
|
13
|
-
Python Version: 3.12
|
14
|
-
"""
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
import contextlib
|
1
|
+
import atexit
|
19
2
|
import logging
|
20
3
|
import multiprocessing as mp
|
21
|
-
import os
|
22
4
|
import platform
|
5
|
+
import threading
|
23
6
|
import time
|
24
7
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
8
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
25
9
|
|
26
10
|
import psutil
|
27
11
|
|
28
12
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
29
13
|
|
30
|
-
__all__ = ["
|
31
|
-
|
32
|
-
|
33
|
-
class Simple_ParallelExecutor:
|
34
|
-
"""
|
35
|
-
A class for parallel execution of tasks using threads or processes.
|
36
|
-
|
37
|
-
If mode is "process", the tasks are executed in separate processes.
|
38
|
-
If mode is "thread", the tasks are executed in separate threads.
|
39
|
-
|
40
|
-
Parameters:
|
41
|
-
mode (str): The execution mode. Supported values are "process" and "thread".
|
42
|
-
process ~ Must use top function to run, can't use in jupyter notebook
|
43
|
-
thread ~ Function can not be top function, can use in jupyter notebook
|
44
|
-
max_workers (int): The maximum number of workers to use. Defaults to CPU count - 1.
|
45
|
-
|
46
|
-
Note:!!!
|
47
|
-
If Jupyter notebook is used, the mode should be "thread" to avoid hanging issues.
|
48
|
-
"""
|
49
|
-
|
50
|
-
def __init__(self, mode="process", max_workers=None):
|
51
|
-
if mode not in {"process", "thread"}:
|
52
|
-
raise ValueError("Invalid mode. Supported values are 'process' and 'thread'.")
|
53
|
-
# process: Must use top function to run, can't use in jupyter notebook
|
54
|
-
# thread: Can use in jupyter notebook
|
55
|
-
self.mode = mode
|
56
|
-
self.max_workers = max_workers or max(1, mp.cpu_count() - 1)
|
57
|
-
self.executor_class = ProcessPoolExecutor if mode == "process" else ThreadPoolExecutor
|
58
|
-
|
59
|
-
def run(self, func, param_list):
|
60
|
-
"""
|
61
|
-
Run a function in parallel using the specified executor.
|
62
|
-
|
63
|
-
Args:
|
64
|
-
func (callable): The function to execute.
|
65
|
-
param_list (list): A list of parameter tuples to pass to the function.
|
66
|
-
|
67
|
-
Returns:
|
68
|
-
list: Results of the function execution.
|
69
|
-
"""
|
70
|
-
if not callable(func):
|
71
|
-
raise ValueError("func must be callable.")
|
72
|
-
if not isinstance(param_list, list) or not all(isinstance(p, tuple) for p in param_list):
|
73
|
-
raise ValueError("param_list must be a list of tuples.")
|
74
|
-
|
75
|
-
results = [None] * len(param_list)
|
76
|
-
logging.info("Starting parallel execution in %s mode with %d workers.", self.mode, self.max_workers)
|
77
|
-
|
78
|
-
with self.executor_class(max_workers=self.max_workers) as executor:
|
79
|
-
future_to_index = {executor.submit(func, *params): idx for idx, params in enumerate(param_list)}
|
80
|
-
|
81
|
-
for future in as_completed(future_to_index):
|
82
|
-
idx = future_to_index[future]
|
83
|
-
try:
|
84
|
-
results[idx] = future.result()
|
85
|
-
except Exception as e:
|
86
|
-
logging.error("Task %d failed with error: %s", idx, e)
|
87
|
-
results[idx] = e
|
88
|
-
|
89
|
-
logging.info("Parallel execution completed.")
|
90
|
-
return results
|
91
|
-
|
92
|
-
|
93
|
-
def _compute_square(x):
|
94
|
-
return x * x
|
95
|
-
|
96
|
-
|
97
|
-
def _example():
|
98
|
-
def _compute_sum(a, b):
|
99
|
-
return a + b
|
100
|
-
|
101
|
-
executor1 = Simple_ParallelExecutor(mode="process", max_workers=4)
|
102
|
-
params1 = [(i,) for i in range(10)]
|
103
|
-
results1 = executor1.run(_compute_square, params1)
|
104
|
-
print("Results (compute_square):", results1)
|
105
|
-
|
106
|
-
executor2 = Simple_ParallelExecutor(mode="thread", max_workers=2)
|
107
|
-
params2 = [(1, 2), (3, 4), (5, 6)]
|
108
|
-
results2 = executor2.run(_compute_sum, params2)
|
109
|
-
print("Results (compute_sum):", results2)
|
14
|
+
__all__ = ["ParallelExecutor"]
|
110
15
|
|
111
16
|
|
112
17
|
class ParallelExecutor:
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
- 自动故障转移机制
|
122
|
-
"""
|
123
|
-
|
124
|
-
def __init__(self):
|
125
|
-
# 检测平台
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
max_workers: Optional[int] = None,
|
21
|
+
chunk_size: Optional[int] = None,
|
22
|
+
mem_per_process: float = 1.0, # GB
|
23
|
+
timeout_per_task: int = 3600,
|
24
|
+
max_retries: int = 3,
|
25
|
+
):
|
126
26
|
self.platform = self._detect_platform()
|
127
|
-
|
128
|
-
self.
|
129
|
-
|
27
|
+
self.mem_per_process = mem_per_process
|
28
|
+
self.timeout_per_task = timeout_per_task
|
29
|
+
self.max_retries = max_retries
|
30
|
+
self.running = True
|
31
|
+
self.task_history = []
|
130
32
|
self._executor = None
|
131
|
-
self.executor_class = ProcessPoolExecutor if self.mode == "process" else ThreadPoolExecutor
|
132
|
-
# 进程池重用策略
|
133
|
-
self.reuse_pool = self.mode == "process" and self.platform != "windows"
|
134
|
-
|
135
|
-
# 特定于平台的优化参数
|
136
|
-
self.mp_context = None
|
137
|
-
self.chunk_size = self._get_default_chunk_size()
|
138
|
-
self.timeout_per_task = 3600 # 默认任务超时时间(秒)
|
139
|
-
self.worker_init_func = None
|
140
|
-
|
141
|
-
# 针对Linux的特定优化
|
142
|
-
if self.platform == "linux":
|
143
|
-
self._setup_linux_optimizations()
|
144
|
-
# 针对Windows的特定优化
|
145
|
-
elif self.platform == "windows":
|
146
|
-
self._setup_windows_optimizations()
|
147
33
|
|
148
|
-
|
34
|
+
self.mode, default_workers = self._determine_optimal_settings()
|
35
|
+
self.max_workers = max_workers or default_workers
|
36
|
+
self.chunk_size = chunk_size or self._get_default_chunk_size()
|
149
37
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
if system == "linux":
|
154
|
-
return "linux"
|
155
|
-
elif system == "windows":
|
156
|
-
return "windows"
|
157
|
-
elif system == "darwin":
|
158
|
-
return "macos"
|
159
|
-
else:
|
160
|
-
return "unknown"
|
38
|
+
self._init_platform_settings()
|
39
|
+
self._start_resource_monitor()
|
40
|
+
atexit.register(self.shutdown)
|
161
41
|
|
162
|
-
|
163
|
-
"""确定最佳执行模式和工作线程/进程数量"""
|
164
|
-
mode = "process" # 默认使用进程模式
|
42
|
+
logging.info(f"Initialized {self.__class__.__name__} on {self.platform} (mode={self.mode}, workers={self.max_workers})")
|
165
43
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
# 检查是否在容器中运行(如Docker)
|
172
|
-
in_container = self._is_in_container()
|
173
|
-
|
174
|
-
# 获取物理和逻辑CPU核心数
|
175
|
-
physical_cores = psutil.cpu_count(logical=False) or 1
|
176
|
-
logical_cores = psutil.cpu_count(logical=True) or 1
|
177
|
-
|
178
|
-
# 获取系统内存信息
|
179
|
-
mem = psutil.virtual_memory()
|
180
|
-
# total_mem_gb = mem.total / (1024**3)
|
181
|
-
available_mem_gb = mem.available / (1024**3)
|
182
|
-
|
183
|
-
# 每个进程估计内存使用(根据应用程序特性调整)
|
184
|
-
est_mem_per_process_gb = 0.5
|
185
|
-
|
186
|
-
# 根据可用内存限制工作进程数
|
187
|
-
mem_limited_workers = max(1, int(available_mem_gb / est_mem_per_process_gb))
|
188
|
-
|
189
|
-
# 在容器环境中更保守一些
|
190
|
-
if in_container:
|
191
|
-
max_workers = min(physical_cores, mem_limited_workers, 4)
|
192
|
-
else:
|
193
|
-
max_workers = min(logical_cores, mem_limited_workers)
|
44
|
+
def _detect_platform(self) -> str:
|
45
|
+
system = platform.system().lower()
|
46
|
+
if system == "linux":
|
47
|
+
return "wsl" if "microsoft" in platform.release().lower() else "linux"
|
48
|
+
return system
|
194
49
|
|
195
|
-
|
50
|
+
def _init_platform_settings(self):
|
51
|
+
if self.platform in ["linux", "wsl"]:
|
52
|
+
self.mp_context = mp.get_context("fork")
|
196
53
|
elif self.platform == "windows":
|
197
|
-
|
198
|
-
|
199
|
-
mode = "process" # 默认也使用进程模式,因为通常更可靠
|
200
|
-
|
201
|
-
# Windows通常使用超线程,所以我们可以使用逻辑核心数
|
202
|
-
logical_cores = psutil.cpu_count(logical=True) or 1
|
203
|
-
|
204
|
-
# Windows建议使用更少的进程以减少开销
|
205
|
-
if logical_cores > 4:
|
206
|
-
max_workers = logical_cores - 1
|
207
|
-
else:
|
208
|
-
max_workers = max(1, logical_cores)
|
209
|
-
|
210
|
-
# macOS平台优化
|
211
|
-
elif self.platform == "macos":
|
212
|
-
mode = "process"
|
213
|
-
logical_cores = psutil.cpu_count(logical=True) or 1
|
214
|
-
max_workers = max(1, logical_cores - 1)
|
215
|
-
|
216
|
-
# 未知平台的保守设置
|
54
|
+
mp.set_start_method("spawn", force=True)
|
55
|
+
self.mp_context = mp.get_context("spawn")
|
217
56
|
else:
|
218
|
-
mode = "process"
|
219
|
-
max_workers = max(1, (psutil.cpu_count(logical=True) or 2) - 1)
|
220
|
-
|
221
|
-
return mode, max_workers
|
222
|
-
|
223
|
-
def _is_in_container(self):
|
224
|
-
"""检测是否在容器环境中运行"""
|
225
|
-
# 检查常见的容器环境指标
|
226
|
-
if os.path.exists("/.dockerenv"):
|
227
|
-
return True
|
228
|
-
|
229
|
-
try:
|
230
|
-
with open("/proc/1/cgroup", "rt") as f:
|
231
|
-
return any(("docker" in line or "kubepods" in line) for line in f)
|
232
|
-
except Exception:
|
233
|
-
pass
|
234
|
-
|
235
|
-
return False
|
236
|
-
|
237
|
-
def _setup_linux_optimizations(self):
|
238
|
-
"""设置Linux特定的优化参数"""
|
239
|
-
try:
|
240
|
-
# 在Linux上,选择最适合的多进程上下文
|
241
|
-
# fork: 最快但可能会导致多线程程序出现问题
|
242
|
-
# spawn: 更安全但更慢
|
243
|
-
# forkserver: 中间解决方案
|
244
|
-
|
245
|
-
# 根据应用程序特性选择合适的上下文
|
246
|
-
self.mp_context = mp.get_context("fork")
|
247
|
-
|
248
|
-
# 设置进程初始化函数来设置CPU亲和性
|
249
|
-
self.worker_init_func = self._linux_worker_init
|
250
|
-
|
251
|
-
except Exception as e:
|
252
|
-
logging.warning(f"Failed to set Linux optimizations: {e}")
|
253
57
|
self.mp_context = None
|
254
58
|
|
255
|
-
def
|
256
|
-
|
257
|
-
#
|
258
|
-
# 进程创建和启动开销在Windows上较高,因此增加每批的任务数
|
259
|
-
self.chunk_size = 10
|
260
|
-
# Windows通常不需要特殊的工作进程初始化
|
261
|
-
self.worker_init_func = None
|
59
|
+
def _determine_optimal_settings(self) -> Tuple[str, int]:
|
60
|
+
logical_cores = psutil.cpu_count(logical=True) or 1
|
61
|
+
available_mem = psutil.virtual_memory().available / 1024**3 # GB
|
262
62
|
|
263
|
-
|
264
|
-
""
|
265
|
-
try:
|
266
|
-
# 获取当前进程
|
267
|
-
p = psutil.Process()
|
63
|
+
mem_limit = max(1, int(available_mem / self.mem_per_process))
|
64
|
+
return ("process", min(logical_cores, mem_limit))
|
268
65
|
|
269
|
-
|
270
|
-
|
66
|
+
def _get_default_chunk_size(self) -> int:
|
67
|
+
return max(10, 100 // (psutil.cpu_count() or 1))
|
271
68
|
|
272
|
-
|
273
|
-
|
69
|
+
def _start_resource_monitor(self):
|
70
|
+
def monitor():
|
71
|
+
threshold = self.mem_per_process * 1024**3
|
72
|
+
while self.running:
|
73
|
+
try:
|
74
|
+
if psutil.virtual_memory().available < threshold:
|
75
|
+
self._scale_down_workers()
|
76
|
+
time.sleep(1)
|
77
|
+
except Exception as e:
|
78
|
+
logging.error(f"Resource monitor error: {e}")
|
274
79
|
|
275
|
-
|
276
|
-
# 需要root权限,所以只是尝试一下
|
277
|
-
try:
|
278
|
-
os.system(f"ionice -c 2 -n 4 -p {os.getpid()} > /dev/null 2>&1")
|
279
|
-
except Exception:
|
280
|
-
pass
|
281
|
-
|
282
|
-
except Exception as e:
|
283
|
-
logging.debug(f"Worker initialization warning (non-critical): {e}")
|
284
|
-
pass # 失败不中断程序运行
|
285
|
-
|
286
|
-
def _get_default_chunk_size(self):
|
287
|
-
"""获取默认任务分块大小"""
|
288
|
-
if self.platform == "linux":
|
289
|
-
# Linux下进程创建较快,可以使用较小的块大小
|
290
|
-
return 5
|
291
|
-
elif self.platform == "windows":
|
292
|
-
# Windows下进程创建较慢,使用较大的块大小
|
293
|
-
return 10
|
294
|
-
else:
|
295
|
-
return 5
|
80
|
+
threading.Thread(target=monitor, daemon=True).start()
|
296
81
|
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
kwargs["mp_context"] = self.mp_context
|
82
|
+
def _scale_down_workers(self):
|
83
|
+
if self.max_workers > 1:
|
84
|
+
new_count = self.max_workers - 1
|
85
|
+
logging.warning(f"Scaling down workers from {self.max_workers} to {new_count}")
|
86
|
+
self.max_workers = new_count
|
87
|
+
self._restart_executor()
|
304
88
|
|
305
|
-
|
306
|
-
|
89
|
+
def _restart_executor(self):
|
90
|
+
if self._executor:
|
91
|
+
self._executor.shutdown(wait=False)
|
92
|
+
self._executor = None
|
307
93
|
|
308
|
-
|
94
|
+
def _get_executor(self):
|
95
|
+
if not self._executor:
|
96
|
+
Executor = ThreadPoolExecutor if self.mode == "thread" else ProcessPoolExecutor
|
97
|
+
self._executor = Executor(max_workers=self.max_workers, mp_context=self.mp_context if self.mode == "process" else None)
|
309
98
|
return self._executor
|
310
99
|
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
def run(self, func, param_list, chunk_size=None, fallback_on_failure=True):
|
328
|
-
"""
|
329
|
-
并行执行函数
|
330
|
-
|
331
|
-
Args:
|
332
|
-
func (callable): 要执行的函数
|
333
|
-
param_list (list): 参数元组列表
|
334
|
-
chunk_size (int, optional): 任务分块大小,None表示使用默认值
|
335
|
-
fallback_on_failure (bool): 如果主执行模式失败,是否尝试其他模式
|
336
|
-
|
337
|
-
Returns:
|
338
|
-
list: 函数执行结果
|
339
|
-
"""
|
340
|
-
if not callable(func):
|
341
|
-
raise ValueError("func must be callable.")
|
342
|
-
if not isinstance(param_list, list):
|
343
|
-
raise ValueError("param_list must be a list.")
|
344
|
-
|
345
|
-
# 空列表直接返回
|
346
|
-
if not param_list:
|
100
|
+
def run(self, func: Callable, params: List[Tuple], chunk_size: Optional[int] = None) -> List[Any]:
|
101
|
+
chunk_size = chunk_size or self.chunk_size
|
102
|
+
for retry in range(self.max_retries + 1):
|
103
|
+
try:
|
104
|
+
start_time = time.monotonic()
|
105
|
+
results = self._execute_batch(func, params, chunk_size)
|
106
|
+
self._update_settings(time.monotonic() - start_time, len(params))
|
107
|
+
return results
|
108
|
+
except Exception as e:
|
109
|
+
logging.error(f"Attempt {retry + 1} failed: {e}")
|
110
|
+
self._handle_failure()
|
111
|
+
raise RuntimeError(f"Failed after {self.max_retries} retries")
|
112
|
+
|
113
|
+
def _execute_batch(self, func: Callable, params: List[Tuple], chunk_size: int) -> List[Any]:
|
114
|
+
if not params:
|
347
115
|
return []
|
348
116
|
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
# 任务分块处理
|
353
|
-
if effective_chunk_size and len(param_list) > effective_chunk_size * 2:
|
354
|
-
return self._run_chunked(func, param_list, effective_chunk_size)
|
355
|
-
|
356
|
-
try:
|
357
|
-
return self._execute(func, param_list)
|
358
|
-
except Exception as e:
|
359
|
-
if fallback_on_failure:
|
360
|
-
logging.warning(f"Execution failed with {self.mode} mode: {e}. Trying fallback...")
|
361
|
-
# 如果当前模式失败,尝试其他模式
|
362
|
-
old_mode = self.mode
|
363
|
-
self.mode = "thread" if old_mode == "process" else "process"
|
364
|
-
self.executor_class = ProcessPoolExecutor if self.mode == "process" else ThreadPoolExecutor
|
365
|
-
self._executor = None # 重置执行器
|
117
|
+
if len(params) > chunk_size * 2:
|
118
|
+
return self._chunked_execution(func, params, chunk_size)
|
366
119
|
|
120
|
+
results = [None] * len(params)
|
121
|
+
with self._get_executor() as executor:
|
122
|
+
futures = {executor.submit(func, *args): idx for idx, args in enumerate(params)}
|
123
|
+
for future in as_completed(futures):
|
124
|
+
idx = futures[future]
|
367
125
|
try:
|
368
|
-
results = self._execute(func, param_list)
|
369
|
-
logging.info(f"Fallback to {self.mode} mode succeeded.")
|
370
|
-
return results
|
371
|
-
except Exception as e2:
|
372
|
-
logging.error(f"Fallback also failed: {e2}")
|
373
|
-
# 恢复原始模式
|
374
|
-
self.mode = old_mode
|
375
|
-
self.executor_class = ProcessPoolExecutor if self.mode == "process" else ThreadPoolExecutor
|
376
|
-
self._executor = None
|
377
|
-
raise
|
378
|
-
else:
|
379
|
-
raise
|
380
|
-
|
381
|
-
def _execute(self, func, param_list):
|
382
|
-
"""内部执行方法"""
|
383
|
-
results = [None] * len(param_list)
|
384
|
-
logging.info("Starting parallel execution in %s mode with %d workers.", self.mode, self.max_workers)
|
385
|
-
|
386
|
-
start_time = time.time()
|
387
|
-
|
388
|
-
with self.get_executor() as executor:
|
389
|
-
future_to_index = {executor.submit(func, *params): idx for idx, params in enumerate(param_list)}
|
390
|
-
|
391
|
-
for future in as_completed(future_to_index):
|
392
|
-
idx = future_to_index[future]
|
393
|
-
try:
|
394
|
-
# 添加超时保护
|
395
126
|
results[idx] = future.result(timeout=self.timeout_per_task)
|
396
127
|
except Exception as e:
|
397
|
-
|
398
|
-
results[idx] = e
|
399
|
-
|
400
|
-
elapsed = time.time() - start_time
|
401
|
-
logging.info("Parallel execution completed in %.2f seconds.", elapsed)
|
128
|
+
results[idx] = self._handle_error(e, func, params[idx])
|
402
129
|
return results
|
403
130
|
|
404
|
-
def
|
405
|
-
|
131
|
+
def _chunked_execution(self, func: Callable, params: List[Tuple], chunk_size: int) -> List[Any]:
|
132
|
+
results = []
|
133
|
+
with self._get_executor() as executor:
|
134
|
+
futures = []
|
135
|
+
for i in range(0, len(params), chunk_size):
|
136
|
+
chunk = params[i : i + chunk_size]
|
137
|
+
futures.append(executor.submit(self._process_chunk, func, chunk))
|
406
138
|
|
407
|
-
|
408
|
-
|
139
|
+
for future in as_completed(futures):
|
140
|
+
try:
|
141
|
+
results.extend(future.result(timeout=self.timeout_per_task))
|
142
|
+
except Exception as e:
|
143
|
+
logging.error(f"Chunk failed: {e}")
|
144
|
+
results.extend([None] * chunk_size)
|
145
|
+
return results
|
146
|
+
|
147
|
+
@staticmethod
|
148
|
+
def _process_chunk(func: Callable, chunk: List[Tuple]) -> List[Any]:
|
149
|
+
return [func(*args) for args in chunk]
|
409
150
|
|
410
|
-
|
411
|
-
|
151
|
+
def _update_settings(self, duration: float, task_count: int):
|
152
|
+
self.task_history.append((duration, task_count))
|
153
|
+
self.chunk_size = max(5, min(100, self.chunk_size + (1 if duration < 5 else -1)))
|
412
154
|
|
413
|
-
|
155
|
+
def _handle_error(self, error: Exception, func: Callable, args: Tuple) -> Any:
|
156
|
+
if isinstance(error, TimeoutError):
|
157
|
+
logging.warning(f"Timeout processing {func.__name__}{args}")
|
158
|
+
elif isinstance(error, MemoryError):
|
159
|
+
logging.warning("Memory error detected")
|
160
|
+
self._scale_down_workers()
|
161
|
+
else:
|
162
|
+
logging.error(f"Error processing {func.__name__}{args}: {str(error)}")
|
163
|
+
return None
|
414
164
|
|
415
|
-
|
165
|
+
def _handle_failure(self):
|
166
|
+
if self.max_workers > 2:
|
167
|
+
self.max_workers = max(1, self.max_workers // 2)
|
168
|
+
self._restart_executor()
|
416
169
|
|
417
|
-
|
418
|
-
|
170
|
+
def shutdown(self):
|
171
|
+
self.running = False
|
172
|
+
if self._executor:
|
173
|
+
try:
|
174
|
+
self._executor.shutdown(wait=False)
|
175
|
+
except Exception as e:
|
176
|
+
logging.error(f"Shutdown error: {e}")
|
177
|
+
finally:
|
178
|
+
self._executor = None
|
419
179
|
|
420
|
-
def
|
421
|
-
|
422
|
-
类似于内置map函数的并行版本
|
180
|
+
def __enter__(self):
|
181
|
+
return self
|
423
182
|
|
424
|
-
|
425
|
-
|
426
|
-
*iterables: 一个或多个可迭代对象
|
427
|
-
timeout: 每个任务的超时时间
|
428
|
-
chunk_size: 任务分块大小
|
183
|
+
def __exit__(self, *exc_info):
|
184
|
+
self.shutdown()
|
429
185
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
186
|
+
def get_stats(self) -> Dict[str, Any]:
|
187
|
+
stats = {
|
188
|
+
"platform": self.platform,
|
189
|
+
"mode": self.mode,
|
190
|
+
"workers": self.max_workers,
|
191
|
+
"chunk_size": self.chunk_size,
|
192
|
+
"total_tasks": sum(count for _, count in self.task_history),
|
193
|
+
}
|
194
|
+
if self.task_history:
|
195
|
+
total_time = sum(time for time, _ in self.task_history)
|
196
|
+
stats["avg_task_throughput"] = stats["total_tasks"] / total_time if total_time else 0
|
197
|
+
return stats
|
435
198
|
|
436
|
-
# 临时存储超时设置
|
437
|
-
original_timeout = self.timeout_per_task
|
438
|
-
if timeout:
|
439
|
-
self.timeout_per_task = timeout
|
440
199
|
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
yield r
|
445
|
-
finally:
|
446
|
-
# 恢复原超时设置
|
447
|
-
self.timeout_per_task = original_timeout
|
200
|
+
def _test_func(a, b):
|
201
|
+
time.sleep(0.01)
|
202
|
+
return a + b
|
448
203
|
|
449
|
-
def __del__(self):
|
450
|
-
"""确保资源被正确释放"""
|
451
|
-
self.shutdown()
|
452
204
|
|
453
|
-
|
454
|
-
|
455
|
-
if self._executor:
|
456
|
-
try:
|
457
|
-
self._executor.shutdown(wait=True)
|
458
|
-
except Exception:
|
459
|
-
pass
|
460
|
-
self._executor = None
|
205
|
+
if __name__ == "__main__":
|
206
|
+
params = [(i, i * 2) for i in range(1000)]
|
461
207
|
|
462
|
-
|
463
|
-
|
464
|
-
类似concurrent.futures.Executor.map的接口,但返回迭代器
|
465
|
-
"""
|
466
|
-
return self.map(func, *iterables, timeout=timeout, chunk_size=chunk_size)
|
467
|
-
|
468
|
-
def imap_unordered(self, func, *iterables, timeout=None, chunk_size=None):
|
469
|
-
"""
|
470
|
-
类似multiprocessing.Pool.imap_unordered的接口,结果可能乱序返回
|
471
|
-
"""
|
472
|
-
# 将zip后的可迭代对象转换为参数元组列表
|
473
|
-
param_list = [(args,) for args in zip(*iterables)]
|
474
|
-
|
475
|
-
# 空列表直接返回
|
476
|
-
if not param_list:
|
477
|
-
return
|
478
|
-
|
479
|
-
# 临时存储超时设置
|
480
|
-
original_timeout = self.timeout_per_task
|
481
|
-
if timeout:
|
482
|
-
self.timeout_per_task = timeout
|
483
|
-
|
484
|
-
try:
|
485
|
-
# 使用默认分块大小或自定义大小
|
486
|
-
effective_chunk_size = chunk_size or self.chunk_size
|
487
|
-
|
488
|
-
# 任务分块处理
|
489
|
-
if effective_chunk_size and len(param_list) > effective_chunk_size * 2:
|
490
|
-
chunks = [param_list[i : i + effective_chunk_size] for i in range(0, len(param_list), effective_chunk_size)]
|
491
|
-
|
492
|
-
with self.get_executor() as executor:
|
493
|
-
futures = [executor.submit(self._process_chunk_for_imap, func, chunk) for chunk in chunks]
|
494
|
-
|
495
|
-
for future in as_completed(futures):
|
496
|
-
try:
|
497
|
-
chunk_results = future.result(timeout=self.timeout_per_task)
|
498
|
-
for result in chunk_results:
|
499
|
-
yield result
|
500
|
-
except Exception as e:
|
501
|
-
logging.error(f"Chunk processing failed: {e}")
|
502
|
-
else:
|
503
|
-
with self.get_executor() as executor:
|
504
|
-
futures = [executor.submit(func, *params) for params in param_list]
|
505
|
-
|
506
|
-
for future in as_completed(futures):
|
507
|
-
try:
|
508
|
-
yield future.result(timeout=self.timeout_per_task)
|
509
|
-
except Exception as e:
|
510
|
-
logging.error(f"Task failed: {e}")
|
511
|
-
yield e
|
512
|
-
finally:
|
513
|
-
# 恢复原超时设置
|
514
|
-
self.timeout_per_task = original_timeout
|
515
|
-
|
516
|
-
def _process_chunk_for_imap(self, func, chunk):
|
517
|
-
"""处理imap_unordered的数据块"""
|
518
|
-
return [func(*params) for params in chunk]
|
519
|
-
|
520
|
-
def starmap(self, func, iterable, timeout=None, chunk_size=None):
|
521
|
-
"""
|
522
|
-
类似于内置starmap函数的并行版本
|
523
|
-
|
524
|
-
Args:
|
525
|
-
func: 要应用于每个元素的函数
|
526
|
-
iterable: 可迭代对象,每个元素是函数参数的元组
|
527
|
-
timeout: 每个任务的超时时间
|
528
|
-
chunk_size: 任务分块大小
|
529
|
-
|
530
|
-
Returns:
|
531
|
-
生成器,产生结果
|
532
|
-
"""
|
533
|
-
|
534
|
-
# 将每个元素转换为单参数函数调用
|
535
|
-
def wrapper(args):
|
536
|
-
return func(*args)
|
537
|
-
|
538
|
-
# 使用map实现
|
539
|
-
return self.map(wrapper, iterable, timeout=timeout, chunk_size=chunk_size)
|
540
|
-
|
541
|
-
def gather(self, funcs_and_args):
|
542
|
-
"""
|
543
|
-
并行执行多个不同的函数,类似于asyncio.gather
|
544
|
-
|
545
|
-
Args:
|
546
|
-
funcs_and_args: 列表,每个元素是(func, args)元组,
|
547
|
-
其中args是要传递给func的参数元组
|
548
|
-
|
549
|
-
Returns:
|
550
|
-
list: 函数执行结果,顺序与输入相同
|
551
|
-
"""
|
552
|
-
if not isinstance(funcs_and_args, list):
|
553
|
-
raise ValueError("funcs_and_args must be a list of (func, args) tuples")
|
554
|
-
|
555
|
-
def wrapper(func_and_args):
|
556
|
-
func, args = func_and_args
|
557
|
-
return func(*args)
|
558
|
-
|
559
|
-
return self.run(wrapper, [(item,) for item in funcs_and_args])
|
208
|
+
with ParallelExecutor() as executor:
|
209
|
+
results = executor.run(_test_func, params)
|
560
210
|
|
211
|
+
# print("Results:", results)
|
561
212
|
|
562
|
-
|
563
|
-
|
564
|
-
# 也可以不要装饰器,直接运行没啥问题,就是避免在ipynb中使用,最好使用ipynb,或者把这个函数放到一个独立的py文件中运行
|
565
|
-
# 或者,jupyter中使用thread,不要使用process,因为process会导致jupyter挂掉
|
213
|
+
print(f"Processed {len(results)} tasks")
|
214
|
+
print("Execution stats:", executor.get_stats())
|