oafuncs 0.0.98.36__py3-none-any.whl → 0.0.98.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/_script/parallel.py +39 -2
- oafuncs/_script/parallel_bak.py +230 -0
- oafuncs/oa_tool.py +1 -1
- {oafuncs-0.0.98.36.dist-info → oafuncs-0.0.98.37.dist-info}/METADATA +1 -1
- {oafuncs-0.0.98.36.dist-info → oafuncs-0.0.98.37.dist-info}/RECORD +8 -8
- oafuncs/_script/parallel_test.py +0 -14
- {oafuncs-0.0.98.36.dist-info → oafuncs-0.0.98.37.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.98.36.dist-info → oafuncs-0.0.98.37.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.36.dist-info → oafuncs-0.0.98.37.dist-info}/top_level.txt +0 -0
oafuncs/_script/parallel.py
CHANGED
@@ -20,7 +20,7 @@ class ParallelExecutor:
|
|
20
20
|
self,
|
21
21
|
max_workers: Optional[int] = None,
|
22
22
|
chunk_size: Optional[int] = None,
|
23
|
-
mem_per_process: float =
|
23
|
+
mem_per_process: float = 3.0, # GB
|
24
24
|
timeout_per_task: int = 3600,
|
25
25
|
max_retries: int = 3,
|
26
26
|
):
|
@@ -130,18 +130,50 @@ class ParallelExecutor:
|
|
130
130
|
return self._chunked_execution(func, params, chunk_size)
|
131
131
|
|
132
132
|
results = [None] * len(params)
|
133
|
+
|
134
|
+
# 创建进度条 - 使用 range 作为占位符,手动控制进度
|
135
|
+
progress_bar = pbar(
|
136
|
+
iterable=range(len(params)), # 使用 range 作为占位符
|
137
|
+
description="Parallel Tasks",
|
138
|
+
total=len(params),
|
139
|
+
completed=0,
|
140
|
+
next_line=False,
|
141
|
+
)
|
142
|
+
# 手动开始任务
|
143
|
+
progress_bar.task.start()
|
144
|
+
|
133
145
|
with self._get_executor() as executor:
|
134
146
|
futures = {executor.submit(func, *args): idx for idx, args in enumerate(params)}
|
135
|
-
|
147
|
+
|
148
|
+
for future in as_completed(futures):
|
136
149
|
idx = futures[future]
|
137
150
|
try:
|
138
151
|
results[idx] = future.result(timeout=self.timeout_per_task)
|
139
152
|
except Exception as e:
|
140
153
|
results[idx] = self._handle_error(e, func, params[idx])
|
154
|
+
|
155
|
+
# 实时更新进度条
|
156
|
+
progress_bar.update(1)
|
157
|
+
progress_bar.refresh()
|
158
|
+
print('\n') # 结束进度条输出
|
141
159
|
return results
|
142
160
|
|
143
161
|
def _chunked_execution(self, func: Callable, params: List[Tuple], chunk_size: int) -> List[Any]:
|
162
|
+
from oafuncs.oa_tool import pbar
|
163
|
+
|
144
164
|
results = []
|
165
|
+
chunk_count = (len(params) + chunk_size - 1) // chunk_size
|
166
|
+
|
167
|
+
# 为分块执行创建进度条
|
168
|
+
progress_bar = pbar(
|
169
|
+
iterable=range(chunk_count), # 使用 range 作为占位符
|
170
|
+
description="Parallel Chunks",
|
171
|
+
total=chunk_count,
|
172
|
+
completed=0,
|
173
|
+
next_line=False,
|
174
|
+
)
|
175
|
+
progress_bar.task.start()
|
176
|
+
|
145
177
|
with self._get_executor() as executor:
|
146
178
|
futures = []
|
147
179
|
for i in range(0, len(params), chunk_size):
|
@@ -154,6 +186,11 @@ class ParallelExecutor:
|
|
154
186
|
except Exception as e:
|
155
187
|
logging.error(f"Chunk failed: {e}")
|
156
188
|
results.extend([None] * chunk_size)
|
189
|
+
|
190
|
+
# 更新分块进度
|
191
|
+
progress_bar.update(1)
|
192
|
+
progress_bar.refresh()
|
193
|
+
print('\n')
|
157
194
|
return results
|
158
195
|
|
159
196
|
@staticmethod
|
@@ -0,0 +1,230 @@
|
|
1
|
+
import logging
|
2
|
+
import multiprocessing as mp
|
3
|
+
import platform
|
4
|
+
import threading
|
5
|
+
import time
|
6
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
7
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
8
|
+
|
9
|
+
|
10
|
+
import psutil
|
11
|
+
|
12
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
13
|
+
|
14
|
+
__all__ = ["ParallelExecutor"]
|
15
|
+
|
16
|
+
|
17
|
+
class ParallelExecutor:
|
18
|
+
|
19
|
+
def __init__(
|
20
|
+
self,
|
21
|
+
max_workers: Optional[int] = None,
|
22
|
+
chunk_size: Optional[int] = None,
|
23
|
+
mem_per_process: float = 3.0, # GB
|
24
|
+
timeout_per_task: int = 3600,
|
25
|
+
max_retries: int = 3,
|
26
|
+
):
|
27
|
+
self.platform = self._detect_platform()
|
28
|
+
self.mem_per_process = mem_per_process
|
29
|
+
self.timeout_per_task = timeout_per_task
|
30
|
+
self.max_retries = max_retries
|
31
|
+
self.running = True
|
32
|
+
self.task_history = []
|
33
|
+
self._executor = None
|
34
|
+
self._shutdown_called = False
|
35
|
+
|
36
|
+
self.mode, default_workers = self._determine_optimal_settings()
|
37
|
+
self.max_workers = max_workers or default_workers
|
38
|
+
self.chunk_size = chunk_size or self._get_default_chunk_size()
|
39
|
+
|
40
|
+
self._init_platform_settings()
|
41
|
+
self._start_resource_monitor()
|
42
|
+
|
43
|
+
logging.info(f"Initialized {self.__class__.__name__} on {self.platform} (mode={self.mode}, workers={self.max_workers})")
|
44
|
+
|
45
|
+
def _detect_platform(self) -> str:
|
46
|
+
system = platform.system().lower()
|
47
|
+
if system == "linux":
|
48
|
+
return "wsl" if "microsoft" in platform.release().lower() else "linux"
|
49
|
+
return system
|
50
|
+
|
51
|
+
def _init_platform_settings(self):
|
52
|
+
if self.platform in ["linux", "wsl"]:
|
53
|
+
self.mp_context = mp.get_context("fork")
|
54
|
+
elif self.platform == "windows":
|
55
|
+
mp.set_start_method("spawn", force=True)
|
56
|
+
self.mp_context = mp.get_context("spawn")
|
57
|
+
else:
|
58
|
+
self.mp_context = None
|
59
|
+
|
60
|
+
def _determine_optimal_settings(self) -> Tuple[str, int]:
|
61
|
+
logical_cores = psutil.cpu_count(logical=True) or 1
|
62
|
+
available_mem = psutil.virtual_memory().available / 1024**3 # GB
|
63
|
+
|
64
|
+
mem_limit = max(1, int(available_mem / self.mem_per_process))
|
65
|
+
return ("process", min(logical_cores, mem_limit))
|
66
|
+
|
67
|
+
def _get_default_chunk_size(self) -> int:
|
68
|
+
return max(10, 100 // (psutil.cpu_count() or 1))
|
69
|
+
|
70
|
+
def _start_resource_monitor(self):
|
71
|
+
def monitor():
|
72
|
+
threshold = self.mem_per_process * 1024**3
|
73
|
+
while self.running:
|
74
|
+
try:
|
75
|
+
if psutil.virtual_memory().available < threshold:
|
76
|
+
self._scale_down_workers()
|
77
|
+
time.sleep(1)
|
78
|
+
except Exception as e:
|
79
|
+
logging.error(f"Resource monitor error: {e}")
|
80
|
+
|
81
|
+
threading.Thread(target=monitor, daemon=True).start()
|
82
|
+
|
83
|
+
def _scale_down_workers(self):
|
84
|
+
if self.max_workers > 1:
|
85
|
+
new_count = self.max_workers - 1
|
86
|
+
logging.warning(f"Scaling down workers from {self.max_workers} to {new_count}")
|
87
|
+
self.max_workers = new_count
|
88
|
+
self._restart_executor()
|
89
|
+
|
90
|
+
def _restart_executor(self):
|
91
|
+
if self._executor:
|
92
|
+
self._executor.shutdown(wait=False)
|
93
|
+
self._executor = None
|
94
|
+
|
95
|
+
def _get_executor(self):
|
96
|
+
if not self._executor:
|
97
|
+
Executor = ThreadPoolExecutor if self.mode == "thread" else ProcessPoolExecutor
|
98
|
+
self._executor = Executor(max_workers=self.max_workers, mp_context=self.mp_context if self.mode == "process" else None)
|
99
|
+
return self._executor
|
100
|
+
|
101
|
+
def run(self, func: Callable, params: List[Tuple], chunk_size: Optional[int] = None) -> List[Any]:
|
102
|
+
chunk_size = chunk_size or self.chunk_size
|
103
|
+
try:
|
104
|
+
for retry in range(self.max_retries + 1):
|
105
|
+
try:
|
106
|
+
start_time = time.monotonic()
|
107
|
+
results = self._execute_batch(func, params, chunk_size)
|
108
|
+
self._update_settings(time.monotonic() - start_time, len(params))
|
109
|
+
return results
|
110
|
+
except Exception as e:
|
111
|
+
logging.error(f"Attempt {retry + 1} failed: {e}")
|
112
|
+
self._handle_failure()
|
113
|
+
raise RuntimeError(f"Failed after {self.max_retries} retries")
|
114
|
+
finally:
|
115
|
+
# 仅关闭当前 executor,保留资源监控等运行状态
|
116
|
+
if self._executor:
|
117
|
+
try:
|
118
|
+
self._executor.shutdown(wait=True)
|
119
|
+
except Exception as e:
|
120
|
+
logging.error(f"Executor shutdown error: {e}")
|
121
|
+
finally:
|
122
|
+
self._executor = None
|
123
|
+
|
124
|
+
def _execute_batch(self, func: Callable, params: List[Tuple], chunk_size: int) -> List[Any]:
|
125
|
+
from oafuncs.oa_tool import pbar
|
126
|
+
if not params:
|
127
|
+
return []
|
128
|
+
|
129
|
+
if len(params) > chunk_size * 2:
|
130
|
+
return self._chunked_execution(func, params, chunk_size)
|
131
|
+
|
132
|
+
results = [None] * len(params)
|
133
|
+
with self._get_executor() as executor:
|
134
|
+
futures = {executor.submit(func, *args): idx for idx, args in enumerate(params)}
|
135
|
+
for future in pbar(as_completed(futures), "Parallel Tasks", total=len(futures)):
|
136
|
+
idx = futures[future]
|
137
|
+
try:
|
138
|
+
results[idx] = future.result(timeout=self.timeout_per_task)
|
139
|
+
except Exception as e:
|
140
|
+
results[idx] = self._handle_error(e, func, params[idx])
|
141
|
+
return results
|
142
|
+
|
143
|
+
def _chunked_execution(self, func: Callable, params: List[Tuple], chunk_size: int) -> List[Any]:
|
144
|
+
results = []
|
145
|
+
with self._get_executor() as executor:
|
146
|
+
futures = []
|
147
|
+
for i in range(0, len(params), chunk_size):
|
148
|
+
chunk = params[i : i + chunk_size]
|
149
|
+
futures.append(executor.submit(self._process_chunk, func, chunk))
|
150
|
+
|
151
|
+
for future in as_completed(futures):
|
152
|
+
try:
|
153
|
+
results.extend(future.result(timeout=self.timeout_per_task))
|
154
|
+
except Exception as e:
|
155
|
+
logging.error(f"Chunk failed: {e}")
|
156
|
+
results.extend([None] * chunk_size)
|
157
|
+
return results
|
158
|
+
|
159
|
+
@staticmethod
|
160
|
+
def _process_chunk(func: Callable, chunk: List[Tuple]) -> List[Any]:
|
161
|
+
return [func(*args) for args in chunk]
|
162
|
+
|
163
|
+
def _update_settings(self, duration: float, task_count: int):
|
164
|
+
self.task_history.append((duration, task_count))
|
165
|
+
self.chunk_size = max(5, min(100, self.chunk_size + (1 if duration < 5 else -1)))
|
166
|
+
|
167
|
+
def _handle_error(self, error: Exception, func: Callable, args: Tuple) -> Any:
|
168
|
+
if isinstance(error, TimeoutError):
|
169
|
+
logging.warning(f"Timeout processing {func.__name__}{args}")
|
170
|
+
elif isinstance(error, MemoryError):
|
171
|
+
logging.warning("Memory error detected")
|
172
|
+
self._scale_down_workers()
|
173
|
+
else:
|
174
|
+
logging.error(f"Error processing {func.__name__}{args}: {str(error)}")
|
175
|
+
return None
|
176
|
+
|
177
|
+
def _handle_failure(self):
|
178
|
+
if self.max_workers > 2:
|
179
|
+
self.max_workers = max(1, self.max_workers // 2)
|
180
|
+
self._restart_executor()
|
181
|
+
|
182
|
+
def shutdown(self):
|
183
|
+
if self._shutdown_called:
|
184
|
+
return
|
185
|
+
self._shutdown_called = True
|
186
|
+
self.running = False
|
187
|
+
# 基类不再打印日志,由子类统一处理
|
188
|
+
if self._executor:
|
189
|
+
try:
|
190
|
+
self._executor.shutdown(wait=True)
|
191
|
+
except Exception as e:
|
192
|
+
logging.error(f"Shutdown error: {e}")
|
193
|
+
finally:
|
194
|
+
self._executor = None
|
195
|
+
|
196
|
+
def __enter__(self):
|
197
|
+
return self
|
198
|
+
|
199
|
+
def __exit__(self, *exc_info):
|
200
|
+
self.shutdown()
|
201
|
+
|
202
|
+
def get_stats(self) -> Dict[str, Any]:
|
203
|
+
stats = {
|
204
|
+
"platform": self.platform,
|
205
|
+
"mode": self.mode,
|
206
|
+
"workers": self.max_workers,
|
207
|
+
"chunk_size": self.chunk_size,
|
208
|
+
"total_tasks": sum(count for _, count in self.task_history),
|
209
|
+
}
|
210
|
+
if self.task_history:
|
211
|
+
total_time = sum(time for time, _ in self.task_history)
|
212
|
+
stats["avg_task_throughput"] = stats["total_tasks"] / total_time if total_time else 0
|
213
|
+
return stats
|
214
|
+
|
215
|
+
|
216
|
+
def _test_func(a, b):
|
217
|
+
time.sleep(0.01)
|
218
|
+
return a + b
|
219
|
+
|
220
|
+
|
221
|
+
if __name__ == "__main__":
|
222
|
+
params = [(i, i * 2) for i in range(1000)]
|
223
|
+
|
224
|
+
with ParallelExecutor() as executor:
|
225
|
+
results = executor.run(_test_func, params)
|
226
|
+
|
227
|
+
# print("Results:", results)
|
228
|
+
|
229
|
+
print(f"Processed {len(results)} tasks")
|
230
|
+
print("Execution stats:", executor.get_stats())
|
oafuncs/oa_tool.py
CHANGED
@@ -37,7 +37,7 @@ class PEx(ParallelExecutor):
|
|
37
37
|
self,
|
38
38
|
max_workers: Optional[int] = None,
|
39
39
|
chunk_size: Optional[int] = None,
|
40
|
-
mem_per_process: float =
|
40
|
+
mem_per_process: float = 3.0, # 调大默认内存限制
|
41
41
|
timeout_per_task: int = 7200, # 延长默认超时时间
|
42
42
|
max_retries: int = 5, # 增加默认重试次数
|
43
43
|
progress_callback: Optional[Callable[[int, int], None]] = None,
|
@@ -7,7 +7,7 @@ oafuncs/oa_file.py,sha256=fLb0gRhq2AiPl-5ASDHMrx6Z267FmhqNcTV7CdCxTdI,16934
|
|
7
7
|
oafuncs/oa_help.py,sha256=0J5VaZX-cB0c090KxgmktQJBc0o00FsY-4wB8l5y00k,4178
|
8
8
|
oafuncs/oa_nc.py,sha256=mKNxQ9jPxfRH7xINyrX7tBhitG5gmOKm6Dn7stk5mdw,15279
|
9
9
|
oafuncs/oa_python.py,sha256=xYMQnM0cGq9xUCtcoMpnN0LG5Rc_s94tai5nC6CNJ3E,4831
|
10
|
-
oafuncs/oa_tool.py,sha256=
|
10
|
+
oafuncs/oa_tool.py,sha256=VHx15VqpbzNlVXh0-3nJqcDgLVaECMD1FvxJ_CrV39E,8046
|
11
11
|
oafuncs/_data/hycom.png,sha256=MadKs6Gyj5n9-TOu7L4atQfTXtF9dvN9w-tdU9IfygI,10945710
|
12
12
|
oafuncs/_data/oafuncs.png,sha256=o3VD7wm-kwDea5E98JqxXl04_78cBX7VcdUt7uQXGiU,3679898
|
13
13
|
oafuncs/_script/cprogressbar.py,sha256=nIOs42t6zURLTNjJglavqrI2TKO9GWD0AmZ_DOBmXDU,15949
|
@@ -16,8 +16,8 @@ oafuncs/_script/email.py,sha256=l5xDgdVj8O5V0J2SwjsHKdUuxOH2jZvwdMO_P0dImHU,2684
|
|
16
16
|
oafuncs/_script/netcdf_merge.py,sha256=tM9ePqLiEsE7eIsNM5XjEYeXwxjYOdNz5ejnEuI7xKw,6066
|
17
17
|
oafuncs/_script/netcdf_modify.py,sha256=XDlAEToe_lwfAetkBSENqU5df-wnH7MGuxNTjG1gwHY,4178
|
18
18
|
oafuncs/_script/netcdf_write.py,sha256=GvyUyUhzMonzSp3y4pT8ZAfbQrsh5J3dLnmINYJKhuE,21422
|
19
|
-
oafuncs/_script/parallel.py,sha256=
|
20
|
-
oafuncs/_script/
|
19
|
+
oafuncs/_script/parallel.py,sha256=glEeZEg6HU3q1E6kUF-9k4l__KmIa3KlOglbOUcqubU,10047
|
20
|
+
oafuncs/_script/parallel_bak.py,sha256=2ySmYZ9e_PLhhMocWCCFWCYZD3Gs_mxl0HxEzbIuQvA,8861
|
21
21
|
oafuncs/_script/plot_dataset.py,sha256=QrA4vOCzWbAJp3hf5YYzgIRUZdJB5_ugepgyT_YfnaY,16327
|
22
22
|
oafuncs/_script/replace_file_content.py,sha256=wIwvaISFNYWG58BLZHZP9ZgbC5OhoZ-cpR3y25U1EUM,5601
|
23
23
|
oafuncs/oa_down/User_Agent-list.txt,sha256=pHaMlElMvZ8TG4vf4BqkZYKqe0JIGkr4kCN0lM1Y9FQ,514295
|
@@ -37,8 +37,8 @@ oafuncs/oa_sign/__init__.py,sha256=JSx1fcWpmNhQBvX_Bmq3xysfSkkFMrjbJASxV_V6aqE,1
|
|
37
37
|
oafuncs/oa_sign/meteorological.py,sha256=3MSjy7HTcvz2zsITkjUMr_0Y027Gas1LFE9pk99990k,6110
|
38
38
|
oafuncs/oa_sign/ocean.py,sha256=3uYEzaq-27yVy23IQoqy-clhWu1I_fhPFBAQyT-OF4M,5562
|
39
39
|
oafuncs/oa_sign/scientific.py,sha256=moIl2MEY4uitbXoD596JmXookXGQtQsS-8_1NBBTx84,4689
|
40
|
-
oafuncs-0.0.98.
|
41
|
-
oafuncs-0.0.98.
|
42
|
-
oafuncs-0.0.98.
|
43
|
-
oafuncs-0.0.98.
|
44
|
-
oafuncs-0.0.98.
|
40
|
+
oafuncs-0.0.98.37.dist-info/licenses/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
|
41
|
+
oafuncs-0.0.98.37.dist-info/METADATA,sha256=EuBjhFOpWRAcZeADmTIFmg5FUSholH23O3qn5CsCR5U,4326
|
42
|
+
oafuncs-0.0.98.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
43
|
+
oafuncs-0.0.98.37.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
|
44
|
+
oafuncs-0.0.98.37.dist-info/RECORD,,
|
oafuncs/_script/parallel_test.py
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding=utf-8
|
3
|
-
"""
|
4
|
-
Author: Liu Kun && 16031215@qq.com
|
5
|
-
Date: 2025-04-08 16:18:49
|
6
|
-
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2025-04-08 16:18:50
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\parallel_test.py
|
9
|
-
Description:
|
10
|
-
EditPlatform: vscode
|
11
|
-
ComputerInfo: XPS 15 9510
|
12
|
-
SystemInfo: Windows 11
|
13
|
-
Python Version: 3.12
|
14
|
-
"""
|
File without changes
|
File without changes
|
File without changes
|