oafuncs 0.0.98.3__py3-none-any.whl → 0.0.98.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,565 +1,214 @@
1
- #!/usr/bin/env python
2
- # coding=utf-8
3
- """
4
- Author: Liu Kun && 16031215@qq.com
5
- Date: 2025-04-04 20:19:23
6
- LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2025-04-04 20:19:23
8
- FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\parallel.py
9
- Description:
10
- EditPlatform: vscode
11
- ComputerInfo: XPS 15 9510
12
- SystemInfo: Windows 11
13
- Python Version: 3.12
14
- """
15
-
16
-
17
-
18
- import contextlib
1
+ import atexit
19
2
  import logging
20
3
  import multiprocessing as mp
21
- import os
22
4
  import platform
5
+ import threading
23
6
  import time
24
7
  from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
8
+ from typing import Any, Callable, Dict, List, Optional, Tuple
25
9
 
26
10
  import psutil
27
11
 
28
12
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
29
13
 
30
- __all__ = ["Simple_ParallelExecutor", "ParallelExecutor"]
31
-
32
-
33
- class Simple_ParallelExecutor:
34
- """
35
- A class for parallel execution of tasks using threads or processes.
36
-
37
- If mode is "process", the tasks are executed in separate processes.
38
- If mode is "thread", the tasks are executed in separate threads.
39
-
40
- Parameters:
41
- mode (str): The execution mode. Supported values are "process" and "thread".
42
- process ~ Must use top function to run, can't use in jupyter notebook
43
- thread ~ Function can not be top function, can use in jupyter notebook
44
- max_workers (int): The maximum number of workers to use. Defaults to CPU count - 1.
45
-
46
- Note:!!!
47
- If Jupyter notebook is used, the mode should be "thread" to avoid hanging issues.
48
- """
49
-
50
- def __init__(self, mode="process", max_workers=None):
51
- if mode not in {"process", "thread"}:
52
- raise ValueError("Invalid mode. Supported values are 'process' and 'thread'.")
53
- # process: Must use top function to run, can't use in jupyter notebook
54
- # thread: Can use in jupyter notebook
55
- self.mode = mode
56
- self.max_workers = max_workers or max(1, mp.cpu_count() - 1)
57
- self.executor_class = ProcessPoolExecutor if mode == "process" else ThreadPoolExecutor
58
-
59
- def run(self, func, param_list):
60
- """
61
- Run a function in parallel using the specified executor.
62
-
63
- Args:
64
- func (callable): The function to execute.
65
- param_list (list): A list of parameter tuples to pass to the function.
66
-
67
- Returns:
68
- list: Results of the function execution.
69
- """
70
- if not callable(func):
71
- raise ValueError("func must be callable.")
72
- if not isinstance(param_list, list) or not all(isinstance(p, tuple) for p in param_list):
73
- raise ValueError("param_list must be a list of tuples.")
74
-
75
- results = [None] * len(param_list)
76
- logging.info("Starting parallel execution in %s mode with %d workers.", self.mode, self.max_workers)
77
-
78
- with self.executor_class(max_workers=self.max_workers) as executor:
79
- future_to_index = {executor.submit(func, *params): idx for idx, params in enumerate(param_list)}
80
-
81
- for future in as_completed(future_to_index):
82
- idx = future_to_index[future]
83
- try:
84
- results[idx] = future.result()
85
- except Exception as e:
86
- logging.error("Task %d failed with error: %s", idx, e)
87
- results[idx] = e
88
-
89
- logging.info("Parallel execution completed.")
90
- return results
91
-
92
-
93
- def _compute_square(x):
94
- return x * x
95
-
96
-
97
- def _example():
98
- def _compute_sum(a, b):
99
- return a + b
100
-
101
- executor1 = Simple_ParallelExecutor(mode="process", max_workers=4)
102
- params1 = [(i,) for i in range(10)]
103
- results1 = executor1.run(_compute_square, params1)
104
- print("Results (compute_square):", results1)
105
-
106
- executor2 = Simple_ParallelExecutor(mode="thread", max_workers=2)
107
- params2 = [(1, 2), (3, 4), (5, 6)]
108
- results2 = executor2.run(_compute_sum, params2)
109
- print("Results (compute_sum):", results2)
14
+ __all__ = ["ParallelExecutor"]
110
15
 
111
16
 
112
17
  class ParallelExecutor:
113
- """
114
- 自动优化的并行执行器,根据平台和任务特性自动选择最佳执行模式和工作线程/进程数量。
115
-
116
- 特性:
117
- - 自动检测平台并选择最佳执行模式
118
- - 动态调整工作线程/进程数量
119
- - 针对Linux和Windows的特定优化
120
- - 任务批处理功能以提高小任务的效率
121
- - 自动故障转移机制
122
- """
123
-
124
- def __init__(self):
125
- # 检测平台
18
+ def __init__(
19
+ self,
20
+ max_workers: Optional[int] = None,
21
+ chunk_size: Optional[int] = None,
22
+ mem_per_process: float = 1.0, # GB
23
+ timeout_per_task: int = 3600,
24
+ max_retries: int = 3,
25
+ ):
126
26
  self.platform = self._detect_platform()
127
- # 自动选择最佳执行模式和工作线程/进程数量
128
- self.mode, self.max_workers = self._determine_optimal_settings()
129
- # 初始化执行器
27
+ self.mem_per_process = mem_per_process
28
+ self.timeout_per_task = timeout_per_task
29
+ self.max_retries = max_retries
30
+ self.running = True
31
+ self.task_history = []
130
32
  self._executor = None
131
- self.executor_class = ProcessPoolExecutor if self.mode == "process" else ThreadPoolExecutor
132
- # 进程池重用策略
133
- self.reuse_pool = self.mode == "process" and self.platform != "windows"
134
-
135
- # 特定于平台的优化参数
136
- self.mp_context = None
137
- self.chunk_size = self._get_default_chunk_size()
138
- self.timeout_per_task = 3600 # 默认任务超时时间(秒)
139
- self.worker_init_func = None
140
-
141
- # 针对Linux的特定优化
142
- if self.platform == "linux":
143
- self._setup_linux_optimizations()
144
- # 针对Windows的特定优化
145
- elif self.platform == "windows":
146
- self._setup_windows_optimizations()
147
33
 
148
- logging.info(f"Initialized {self.__class__.__name__} with mode={self.mode}, max_workers={self.max_workers} on {self.platform} platform")
34
+ self.mode, default_workers = self._determine_optimal_settings()
35
+ self.max_workers = max_workers or default_workers
36
+ self.chunk_size = chunk_size or self._get_default_chunk_size()
149
37
 
150
- def _detect_platform(self):
151
- """检测当前运行的平台"""
152
- system = platform.system().lower()
153
- if system == "linux":
154
- return "linux"
155
- elif system == "windows":
156
- return "windows"
157
- elif system == "darwin":
158
- return "macos"
159
- else:
160
- return "unknown"
38
+ self._init_platform_settings()
39
+ self._start_resource_monitor()
40
+ atexit.register(self.shutdown)
161
41
 
162
- def _determine_optimal_settings(self):
163
- """确定最佳执行模式和工作线程/进程数量"""
164
- mode = "process" # 默认使用进程模式
42
+ logging.info(f"Initialized {self.__class__.__name__} on {self.platform} (mode={self.mode}, workers={self.max_workers})")
165
43
 
166
- # Linux平台优化
167
- if self.platform == "linux":
168
- # 在Linux上,根据之前的问题,我们优先使用进程模式
169
- mode = "process"
170
-
171
- # 检查是否在容器中运行(如Docker)
172
- in_container = self._is_in_container()
173
-
174
- # 获取物理和逻辑CPU核心数
175
- physical_cores = psutil.cpu_count(logical=False) or 1
176
- logical_cores = psutil.cpu_count(logical=True) or 1
177
-
178
- # 获取系统内存信息
179
- mem = psutil.virtual_memory()
180
- # total_mem_gb = mem.total / (1024**3)
181
- available_mem_gb = mem.available / (1024**3)
182
-
183
- # 每个进程估计内存使用(根据应用程序特性调整)
184
- est_mem_per_process_gb = 0.5
185
-
186
- # 根据可用内存限制工作进程数
187
- mem_limited_workers = max(1, int(available_mem_gb / est_mem_per_process_gb))
188
-
189
- # 在容器环境中更保守一些
190
- if in_container:
191
- max_workers = min(physical_cores, mem_limited_workers, 4)
192
- else:
193
- max_workers = min(logical_cores, mem_limited_workers)
44
+ def _detect_platform(self) -> str:
45
+ system = platform.system().lower()
46
+ if system == "linux":
47
+ return "wsl" if "microsoft" in platform.release().lower() else "linux"
48
+ return system
194
49
 
195
- # Windows平台优化
50
+ def _init_platform_settings(self):
51
+ if self.platform in ["linux", "wsl"]:
52
+ self.mp_context = mp.get_context("fork")
196
53
  elif self.platform == "windows":
197
- # Windows上进程创建较快,线程和进程都可以考虑
198
- # 但进程间通信开销大,所以对于小型任务,线程可能更高效
199
- mode = "process" # 默认也使用进程模式,因为通常更可靠
200
-
201
- # Windows通常使用超线程,所以我们可以使用逻辑核心数
202
- logical_cores = psutil.cpu_count(logical=True) or 1
203
-
204
- # Windows建议使用更少的进程以减少开销
205
- if logical_cores > 4:
206
- max_workers = logical_cores - 1
207
- else:
208
- max_workers = max(1, logical_cores)
209
-
210
- # macOS平台优化
211
- elif self.platform == "macos":
212
- mode = "process"
213
- logical_cores = psutil.cpu_count(logical=True) or 1
214
- max_workers = max(1, logical_cores - 1)
215
-
216
- # 未知平台的保守设置
54
+ mp.set_start_method("spawn", force=True)
55
+ self.mp_context = mp.get_context("spawn")
217
56
  else:
218
- mode = "process"
219
- max_workers = max(1, (psutil.cpu_count(logical=True) or 2) - 1)
220
-
221
- return mode, max_workers
222
-
223
- def _is_in_container(self):
224
- """检测是否在容器环境中运行"""
225
- # 检查常见的容器环境指标
226
- if os.path.exists("/.dockerenv"):
227
- return True
228
-
229
- try:
230
- with open("/proc/1/cgroup", "rt") as f:
231
- return any(("docker" in line or "kubepods" in line) for line in f)
232
- except Exception:
233
- pass
234
-
235
- return False
236
-
237
- def _setup_linux_optimizations(self):
238
- """设置Linux特定的优化参数"""
239
- try:
240
- # 在Linux上,选择最适合的多进程上下文
241
- # fork: 最快但可能会导致多线程程序出现问题
242
- # spawn: 更安全但更慢
243
- # forkserver: 中间解决方案
244
-
245
- # 根据应用程序特性选择合适的上下文
246
- self.mp_context = mp.get_context("fork")
247
-
248
- # 设置进程初始化函数来设置CPU亲和性
249
- self.worker_init_func = self._linux_worker_init
250
-
251
- except Exception as e:
252
- logging.warning(f"Failed to set Linux optimizations: {e}")
253
57
  self.mp_context = None
254
58
 
255
- def _setup_windows_optimizations(self):
256
- """设置Windows特定的优化参数"""
257
- # Windows优化参数
258
- # 进程创建和启动开销在Windows上较高,因此增加每批的任务数
259
- self.chunk_size = 10
260
- # Windows通常不需要特殊的工作进程初始化
261
- self.worker_init_func = None
59
+ def _determine_optimal_settings(self) -> Tuple[str, int]:
60
+ logical_cores = psutil.cpu_count(logical=True) or 1
61
+ available_mem = psutil.virtual_memory().available / 1024**3 # GB
262
62
 
263
- def _linux_worker_init(self):
264
- """Linux工作进程初始化函数"""
265
- try:
266
- # 获取当前进程
267
- p = psutil.Process()
63
+ mem_limit = max(1, int(available_mem / self.mem_per_process))
64
+ return ("process", min(logical_cores, mem_limit))
268
65
 
269
- # 设置进程优先级为稍低于正常,以避免争抢重要系统资源
270
- p.nice(10)
66
+ def _get_default_chunk_size(self) -> int:
67
+ return max(10, 100 // (psutil.cpu_count() or 1))
271
68
 
272
- # 尝试设置CPU亲和性以提高缓存局部性
273
- # 这里我们不设置特定的CPU核心,让系统调度,因为手动设置可能导致不平衡
69
+ def _start_resource_monitor(self):
70
+ def monitor():
71
+ threshold = self.mem_per_process * 1024**3
72
+ while self.running:
73
+ try:
74
+ if psutil.virtual_memory().available < threshold:
75
+ self._scale_down_workers()
76
+ time.sleep(1)
77
+ except Exception as e:
78
+ logging.error(f"Resource monitor error: {e}")
274
79
 
275
- # 设置进程I/O优先级
276
- # 需要root权限,所以只是尝试一下
277
- try:
278
- os.system(f"ionice -c 2 -n 4 -p {os.getpid()} > /dev/null 2>&1")
279
- except Exception:
280
- pass
281
-
282
- except Exception as e:
283
- logging.debug(f"Worker initialization warning (non-critical): {e}")
284
- pass # 失败不中断程序运行
285
-
286
- def _get_default_chunk_size(self):
287
- """获取默认任务分块大小"""
288
- if self.platform == "linux":
289
- # Linux下进程创建较快,可以使用较小的块大小
290
- return 5
291
- elif self.platform == "windows":
292
- # Windows下进程创建较慢,使用较大的块大小
293
- return 10
294
- else:
295
- return 5
80
+ threading.Thread(target=monitor, daemon=True).start()
296
81
 
297
- @property
298
- def executor(self):
299
- """懒加载并重用执行器"""
300
- if self._executor is None and self.reuse_pool:
301
- kwargs = {}
302
- if self.mode == "process" and self.mp_context:
303
- kwargs["mp_context"] = self.mp_context
82
+ def _scale_down_workers(self):
83
+ if self.max_workers > 1:
84
+ new_count = self.max_workers - 1
85
+ logging.warning(f"Scaling down workers from {self.max_workers} to {new_count}")
86
+ self.max_workers = new_count
87
+ self._restart_executor()
304
88
 
305
- if self.worker_init_func and self.mode == "process":
306
- kwargs["initializer"] = self.worker_init_func
89
+ def _restart_executor(self):
90
+ if self._executor:
91
+ self._executor.shutdown(wait=False)
92
+ self._executor = None
307
93
 
308
- self._executor = self.executor_class(max_workers=self.max_workers, **kwargs)
94
+ def _get_executor(self):
95
+ if not self._executor:
96
+ Executor = ThreadPoolExecutor if self.mode == "thread" else ProcessPoolExecutor
97
+ self._executor = Executor(max_workers=self.max_workers, mp_context=self.mp_context if self.mode == "process" else None)
309
98
  return self._executor
310
99
 
311
- @contextlib.contextmanager
312
- def get_executor(self):
313
- """获取执行器的上下文管理器"""
314
- if self.reuse_pool and self._executor:
315
- yield self._executor
316
- else:
317
- kwargs = {}
318
- if self.mode == "process" and self.mp_context:
319
- kwargs["mp_context"] = self.mp_context
320
-
321
- if self.worker_init_func and self.mode == "process":
322
- kwargs["initializer"] = self.worker_init_func
323
-
324
- with self.executor_class(max_workers=self.max_workers, **kwargs) as executor:
325
- yield executor
326
-
327
- def run(self, func, param_list, chunk_size=None, fallback_on_failure=True):
328
- """
329
- 并行执行函数
330
-
331
- Args:
332
- func (callable): 要执行的函数
333
- param_list (list): 参数元组列表
334
- chunk_size (int, optional): 任务分块大小,None表示使用默认值
335
- fallback_on_failure (bool): 如果主执行模式失败,是否尝试其他模式
336
-
337
- Returns:
338
- list: 函数执行结果
339
- """
340
- if not callable(func):
341
- raise ValueError("func must be callable.")
342
- if not isinstance(param_list, list):
343
- raise ValueError("param_list must be a list.")
344
-
345
- # 空列表直接返回
346
- if not param_list:
100
+ def run(self, func: Callable, params: List[Tuple], chunk_size: Optional[int] = None) -> List[Any]:
101
+ chunk_size = chunk_size or self.chunk_size
102
+ for retry in range(self.max_retries + 1):
103
+ try:
104
+ start_time = time.monotonic()
105
+ results = self._execute_batch(func, params, chunk_size)
106
+ self._update_settings(time.monotonic() - start_time, len(params))
107
+ return results
108
+ except Exception as e:
109
+ logging.error(f"Attempt {retry + 1} failed: {e}")
110
+ self._handle_failure()
111
+ raise RuntimeError(f"Failed after {self.max_retries} retries")
112
+
113
+ def _execute_batch(self, func: Callable, params: List[Tuple], chunk_size: int) -> List[Any]:
114
+ if not params:
347
115
  return []
348
116
 
349
- # 使用默认分块大小或自定义大小
350
- effective_chunk_size = chunk_size or self.chunk_size
351
-
352
- # 任务分块处理
353
- if effective_chunk_size and len(param_list) > effective_chunk_size * 2:
354
- return self._run_chunked(func, param_list, effective_chunk_size)
355
-
356
- try:
357
- return self._execute(func, param_list)
358
- except Exception as e:
359
- if fallback_on_failure:
360
- logging.warning(f"Execution failed with {self.mode} mode: {e}. Trying fallback...")
361
- # 如果当前模式失败,尝试其他模式
362
- old_mode = self.mode
363
- self.mode = "thread" if old_mode == "process" else "process"
364
- self.executor_class = ProcessPoolExecutor if self.mode == "process" else ThreadPoolExecutor
365
- self._executor = None # 重置执行器
117
+ if len(params) > chunk_size * 2:
118
+ return self._chunked_execution(func, params, chunk_size)
366
119
 
120
+ results = [None] * len(params)
121
+ with self._get_executor() as executor:
122
+ futures = {executor.submit(func, *args): idx for idx, args in enumerate(params)}
123
+ for future in as_completed(futures):
124
+ idx = futures[future]
367
125
  try:
368
- results = self._execute(func, param_list)
369
- logging.info(f"Fallback to {self.mode} mode succeeded.")
370
- return results
371
- except Exception as e2:
372
- logging.error(f"Fallback also failed: {e2}")
373
- # 恢复原始模式
374
- self.mode = old_mode
375
- self.executor_class = ProcessPoolExecutor if self.mode == "process" else ThreadPoolExecutor
376
- self._executor = None
377
- raise
378
- else:
379
- raise
380
-
381
- def _execute(self, func, param_list):
382
- """内部执行方法"""
383
- results = [None] * len(param_list)
384
- logging.info("Starting parallel execution in %s mode with %d workers.", self.mode, self.max_workers)
385
-
386
- start_time = time.time()
387
-
388
- with self.get_executor() as executor:
389
- future_to_index = {executor.submit(func, *params): idx for idx, params in enumerate(param_list)}
390
-
391
- for future in as_completed(future_to_index):
392
- idx = future_to_index[future]
393
- try:
394
- # 添加超时保护
395
126
  results[idx] = future.result(timeout=self.timeout_per_task)
396
127
  except Exception as e:
397
- logging.error("Task %d failed with error: %s", idx, e)
398
- results[idx] = e
399
-
400
- elapsed = time.time() - start_time
401
- logging.info("Parallel execution completed in %.2f seconds.", elapsed)
128
+ results[idx] = self._handle_error(e, func, params[idx])
402
129
  return results
403
130
 
404
- def _run_chunked(self, func, param_list, chunk_size):
405
- """处理大量小任务的批处理执行"""
131
+ def _chunked_execution(self, func: Callable, params: List[Tuple], chunk_size: int) -> List[Any]:
132
+ results = []
133
+ with self._get_executor() as executor:
134
+ futures = []
135
+ for i in range(0, len(params), chunk_size):
136
+ chunk = params[i : i + chunk_size]
137
+ futures.append(executor.submit(self._process_chunk, func, chunk))
406
138
 
407
- def process_chunk(chunk):
408
- return [func(*params) for params in chunk]
139
+ for future in as_completed(futures):
140
+ try:
141
+ results.extend(future.result(timeout=self.timeout_per_task))
142
+ except Exception as e:
143
+ logging.error(f"Chunk failed: {e}")
144
+ results.extend([None] * chunk_size)
145
+ return results
146
+
147
+ @staticmethod
148
+ def _process_chunk(func: Callable, chunk: List[Tuple]) -> List[Any]:
149
+ return [func(*args) for args in chunk]
409
150
 
410
- # 将参数列表分成多个块
411
- chunks = [param_list[i : i + chunk_size] for i in range(0, len(param_list), chunk_size)]
151
+ def _update_settings(self, duration: float, task_count: int):
152
+ self.task_history.append((duration, task_count))
153
+ self.chunk_size = max(5, min(100, self.chunk_size + (1 if duration < 5 else -1)))
412
154
 
413
- logging.info(f"Processing {len(param_list)} tasks in {len(chunks)} chunks of size ~{chunk_size}")
155
+ def _handle_error(self, error: Exception, func: Callable, args: Tuple) -> Any:
156
+ if isinstance(error, TimeoutError):
157
+ logging.warning(f"Timeout processing {func.__name__}{args}")
158
+ elif isinstance(error, MemoryError):
159
+ logging.warning("Memory error detected")
160
+ self._scale_down_workers()
161
+ else:
162
+ logging.error(f"Error processing {func.__name__}{args}: {str(error)}")
163
+ return None
414
164
 
415
- chunk_results = self._execute(process_chunk, [(chunk,) for chunk in chunks])
165
+ def _handle_failure(self):
166
+ if self.max_workers > 2:
167
+ self.max_workers = max(1, self.max_workers // 2)
168
+ self._restart_executor()
416
169
 
417
- # 将块结果展平成单个结果列表
418
- return [result for sublist in chunk_results if isinstance(sublist, list) for result in sublist]
170
+ def shutdown(self):
171
+ self.running = False
172
+ if self._executor:
173
+ try:
174
+ self._executor.shutdown(wait=False)
175
+ except Exception as e:
176
+ logging.error(f"Shutdown error: {e}")
177
+ finally:
178
+ self._executor = None
419
179
 
420
- def map(self, func, *iterables, timeout=None, chunk_size=None):
421
- """
422
- 类似于内置map函数的并行版本
180
+ def __enter__(self):
181
+ return self
423
182
 
424
- Args:
425
- func: 要应用于每个元素的函数
426
- *iterables: 一个或多个可迭代对象
427
- timeout: 每个任务的超时时间
428
- chunk_size: 任务分块大小
183
+ def __exit__(self, *exc_info):
184
+ self.shutdown()
429
185
 
430
- Returns:
431
- 生成器,产生的结果与输入顺序相同
432
- """
433
- # 将zip后的可迭代对象转换为参数元组列表
434
- param_list = [(args,) for args in zip(*iterables)]
186
+ def get_stats(self) -> Dict[str, Any]:
187
+ stats = {
188
+ "platform": self.platform,
189
+ "mode": self.mode,
190
+ "workers": self.max_workers,
191
+ "chunk_size": self.chunk_size,
192
+ "total_tasks": sum(count for _, count in self.task_history),
193
+ }
194
+ if self.task_history:
195
+ total_time = sum(time for time, _ in self.task_history)
196
+ stats["avg_task_throughput"] = stats["total_tasks"] / total_time if total_time else 0
197
+ return stats
435
198
 
436
- # 临时存储超时设置
437
- original_timeout = self.timeout_per_task
438
- if timeout:
439
- self.timeout_per_task = timeout
440
199
 
441
- try:
442
- results = self.run(lambda x: func(x), param_list, chunk_size=chunk_size)
443
- for r in results:
444
- yield r
445
- finally:
446
- # 恢复原超时设置
447
- self.timeout_per_task = original_timeout
200
+ def _test_func(a, b):
201
+ time.sleep(0.01)
202
+ return a + b
448
203
 
449
- def __del__(self):
450
- """确保资源被正确释放"""
451
- self.shutdown()
452
204
 
453
- def shutdown(self):
454
- """显式关闭执行器"""
455
- if self._executor:
456
- try:
457
- self._executor.shutdown(wait=True)
458
- except Exception:
459
- pass
460
- self._executor = None
205
+ if __name__ == "__main__":
206
+ params = [(i, i * 2) for i in range(1000)]
461
207
 
462
- def imap(self, func, *iterables, timeout=None, chunk_size=None):
463
- """
464
- 类似concurrent.futures.Executor.map的接口,但返回迭代器
465
- """
466
- return self.map(func, *iterables, timeout=timeout, chunk_size=chunk_size)
467
-
468
- def imap_unordered(self, func, *iterables, timeout=None, chunk_size=None):
469
- """
470
- 类似multiprocessing.Pool.imap_unordered的接口,结果可能乱序返回
471
- """
472
- # 将zip后的可迭代对象转换为参数元组列表
473
- param_list = [(args,) for args in zip(*iterables)]
474
-
475
- # 空列表直接返回
476
- if not param_list:
477
- return
478
-
479
- # 临时存储超时设置
480
- original_timeout = self.timeout_per_task
481
- if timeout:
482
- self.timeout_per_task = timeout
483
-
484
- try:
485
- # 使用默认分块大小或自定义大小
486
- effective_chunk_size = chunk_size or self.chunk_size
487
-
488
- # 任务分块处理
489
- if effective_chunk_size and len(param_list) > effective_chunk_size * 2:
490
- chunks = [param_list[i : i + effective_chunk_size] for i in range(0, len(param_list), effective_chunk_size)]
491
-
492
- with self.get_executor() as executor:
493
- futures = [executor.submit(self._process_chunk_for_imap, func, chunk) for chunk in chunks]
494
-
495
- for future in as_completed(futures):
496
- try:
497
- chunk_results = future.result(timeout=self.timeout_per_task)
498
- for result in chunk_results:
499
- yield result
500
- except Exception as e:
501
- logging.error(f"Chunk processing failed: {e}")
502
- else:
503
- with self.get_executor() as executor:
504
- futures = [executor.submit(func, *params) for params in param_list]
505
-
506
- for future in as_completed(futures):
507
- try:
508
- yield future.result(timeout=self.timeout_per_task)
509
- except Exception as e:
510
- logging.error(f"Task failed: {e}")
511
- yield e
512
- finally:
513
- # 恢复原超时设置
514
- self.timeout_per_task = original_timeout
515
-
516
- def _process_chunk_for_imap(self, func, chunk):
517
- """处理imap_unordered的数据块"""
518
- return [func(*params) for params in chunk]
519
-
520
- def starmap(self, func, iterable, timeout=None, chunk_size=None):
521
- """
522
- 类似于内置starmap函数的并行版本
523
-
524
- Args:
525
- func: 要应用于每个元素的函数
526
- iterable: 可迭代对象,每个元素是函数参数的元组
527
- timeout: 每个任务的超时时间
528
- chunk_size: 任务分块大小
529
-
530
- Returns:
531
- 生成器,产生结果
532
- """
533
-
534
- # 将每个元素转换为单参数函数调用
535
- def wrapper(args):
536
- return func(*args)
537
-
538
- # 使用map实现
539
- return self.map(wrapper, iterable, timeout=timeout, chunk_size=chunk_size)
540
-
541
- def gather(self, funcs_and_args):
542
- """
543
- 并行执行多个不同的函数,类似于asyncio.gather
544
-
545
- Args:
546
- funcs_and_args: 列表,每个元素是(func, args)元组,
547
- 其中args是要传递给func的参数元组
548
-
549
- Returns:
550
- list: 函数执行结果,顺序与输入相同
551
- """
552
- if not isinstance(funcs_and_args, list):
553
- raise ValueError("funcs_and_args must be a list of (func, args) tuples")
554
-
555
- def wrapper(func_and_args):
556
- func, args = func_and_args
557
- return func(*args)
558
-
559
- return self.run(wrapper, [(item,) for item in funcs_and_args])
208
+ with ParallelExecutor() as executor:
209
+ results = executor.run(_test_func, params)
560
210
 
211
+ # print("Results:", results)
561
212
 
562
- if __name__ == "__main__":
563
- _example()
564
- # 也可以不要装饰器,直接运行没啥问题,就是避免在ipynb中使用,最好使用ipynb,或者把这个函数放到一个独立的py文件中运行
565
- # 或者,jupyter中使用thread,不要使用process,因为process会导致jupyter挂掉
213
+ print(f"Processed {len(results)} tasks")
214
+ print("Execution stats:", executor.get_stats())