pysmartpool 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pysmartpool-0.1.0/LICENSE +21 -0
- pysmartpool-0.1.0/PKG-INFO +322 -0
- pysmartpool-0.1.0/README.md +305 -0
- pysmartpool-0.1.0/pyproject.toml +26 -0
- pysmartpool-0.1.0/pysmartpool.egg-info/PKG-INFO +322 -0
- pysmartpool-0.1.0/pysmartpool.egg-info/SOURCES.txt +25 -0
- pysmartpool-0.1.0/pysmartpool.egg-info/dependency_links.txt +1 -0
- pysmartpool-0.1.0/pysmartpool.egg-info/requires.txt +2 -0
- pysmartpool-0.1.0/pysmartpool.egg-info/top_level.txt +1 -0
- pysmartpool-0.1.0/setup.cfg +4 -0
- pysmartpool-0.1.0/smartpool/__init__.py +4 -0
- pysmartpool-0.1.0/smartpool/gpuinfos.py +333 -0
- pysmartpool-0.1.0/smartpool/interpreterpool/__init__.py +1 -0
- pysmartpool-0.1.0/smartpool/interpreterpool/interpreterpool.py +83 -0
- pysmartpool-0.1.0/smartpool/interpreterpool/interpreterworker.py +64 -0
- pysmartpool-0.1.0/smartpool/module_deps.py +47 -0
- pysmartpool-0.1.0/smartpool/pool.py +443 -0
- pysmartpool-0.1.0/smartpool/processpool/__init__.py +1 -0
- pysmartpool-0.1.0/smartpool/processpool/processpool.py +119 -0
- pysmartpool-0.1.0/smartpool/processpool/processworker.py +113 -0
- pysmartpool-0.1.0/smartpool/sysinfo.py +73 -0
- pysmartpool-0.1.0/smartpool/task.py +53 -0
- pysmartpool-0.1.0/smartpool/threadpool/__init__.py +1 -0
- pysmartpool-0.1.0/smartpool/threadpool/threadpool.py +156 -0
- pysmartpool-0.1.0/smartpool/threadpool/threadworker.py +72 -0
- pysmartpool-0.1.0/smartpool/utils.py +108 -0
- pysmartpool-0.1.0/smartpool/worker.py +120 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 王炳辉
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pysmartpool
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Smart thread/process/interpreter pool implementation.
|
|
5
|
+
Author-email: "王炳辉 (Bing-Hui WANG)" <binghui.wang@foxmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Time-Coder/smartpool
|
|
8
|
+
Project-URL: Repository, https://github.com/Time-Coder/smartpool.git
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: nvidia-ml-py
|
|
15
|
+
Requires-Dist: psutil
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# SmartPool
|
|
19
|
+
|
|
20
|
+
SmartPool is a Python library that provides intelligent resource-aware pooling mechanisms for parallel computing. It automatically manages CPU and GPU resources to optimize performance while preventing resource exhaustion.
|
|
21
|
+
|
|
22
|
+
## Features
|
|
23
|
+
|
|
24
|
+
- **Multiple Pool Types**: ProcessPool, ThreadPool, and InterpreterPool for different use cases
|
|
25
|
+
- **Intuitive API Design**: Almost the same usage as `concurrent.futures` pools
|
|
26
|
+
- **Automatic Resource Management**: Monitors and manages CPU cores, memory, and GPU resources
|
|
27
|
+
- **Hardware-Aware Scheduling**: Automatically detects system resources and schedules tasks accordingly
|
|
28
|
+
- **PyTorch Integration**: Support for PyTorch multiprocessing with tensor sharing to avoid serialization
|
|
29
|
+
- **Training Hot Migration**: Automatically moves CPU training tasks to GPU when `best_device()` changes
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install smartpool
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Examples
|
|
38
|
+
|
|
39
|
+
### Basic Usage
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from smartpool import ProcessPool
|
|
43
|
+
|
|
44
|
+
# Create a process pool that automatically manages system resources
|
|
45
|
+
with ProcessPool() as pool:
|
|
46
|
+
# Submit tasks with proper argument passing
|
|
47
|
+
futures = [pool.submit(expensive_computation, args=(arg,)) for arg in arguments]
|
|
48
|
+
|
|
49
|
+
# Get results
|
|
50
|
+
results = [future.result() for future in futures]
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Resource-Aware Task Scheduling
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from smartpool import ProcessPool, DataSize
|
|
57
|
+
|
|
58
|
+
# Tasks can specify their resource requirements
|
|
59
|
+
def memory_intensive_task(data):
|
|
60
|
+
# Your computation here
|
|
61
|
+
return processed_data
|
|
62
|
+
|
|
63
|
+
with ProcessPool(use_torch=True) as pool:
|
|
64
|
+
# Pool automatically schedules tasks based on available memory
|
|
65
|
+
future = pool.submit(
|
|
66
|
+
memory_intensive_task,
|
|
67
|
+
args=(large_dataset,),
|
|
68
|
+
need_cpu_cores=2, # Request 2 CPU cores
|
|
69
|
+
need_cpu_mem=1*DataSize.GB, # Request 4GB RAM
|
|
70
|
+
need_gpu_cores=1024, # Request 1024 CUDA cores (NOT percentage)
|
|
71
|
+
need_gpu_mem=1*DataSize.GB # Request 2GB GPU memory
|
|
72
|
+
)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### PyTorch Training Hot Migration from CPU to GPU
|
|
76
|
+
|
|
77
|
+
SmartPool automatically migrates training tasks from CPU to GPU when better devices become available:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
# Complete setup for training with optimizer migration
|
|
81
|
+
from smartpool import (
|
|
82
|
+
limit_num_single_thread,
|
|
83
|
+
best_device,
|
|
84
|
+
move_optimizer_to,
|
|
85
|
+
ProcessPool
|
|
86
|
+
)
|
|
87
|
+
# Critical: Call before importing torch/numpy
|
|
88
|
+
limit_num_single_thread()
|
|
89
|
+
|
|
90
|
+
import torch
|
|
91
|
+
|
|
92
|
+
def training_task():
|
|
93
|
+
device = best_device() # <-- get best suitable device at init time
|
|
94
|
+
old_device = device
|
|
95
|
+
|
|
96
|
+
for epoch in range(epochs):
|
|
97
|
+
for x, y in data_loader:
|
|
98
|
+
device = best_device() # <-- get best suitable device at each batch
|
|
99
|
+
x, y = x.to(device), y.to(device)
|
|
100
|
+
|
|
101
|
+
if old_device != device:
|
|
102
|
+
model.to(device) # move model to new device
|
|
103
|
+
move_optimizer_to(optimizer, device) # move optimizer to new device
|
|
104
|
+
old_device = device
|
|
105
|
+
|
|
106
|
+
do_other_things()
|
|
107
|
+
|
|
108
|
+
with ProcessPool(use_torch=True) as pool:
|
|
109
|
+
future = pool.submit(training_task, args=(model, optimizer, data))
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## API
|
|
113
|
+
|
|
114
|
+
### ProcessPool
|
|
115
|
+
|
|
116
|
+
Each worker run as a separate process with seperated GIL. Suitable for CPU-intensive tasks.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
class ProcessPool:
|
|
120
|
+
|
|
121
|
+
def __init__(
|
|
122
|
+
self, max_workers:int=0,
|
|
123
|
+
process_name_prefix:str="ProcessPool.worker:",
|
|
124
|
+
mp_context:str="spawn",
|
|
125
|
+
initializer:Optional[Callable[..., Any]]=None,
|
|
126
|
+
initargs:Tuple[Any, ...]=(),
|
|
127
|
+
initkwargs:Optional[Dict[str, Any]]=None,
|
|
128
|
+
*,
|
|
129
|
+
max_tasks_per_child:Optional[int]=None,
|
|
130
|
+
use_torch:bool=False
|
|
131
|
+
): ...
|
|
132
|
+
"""
|
|
133
|
+
Initializes a new ProcessPool instance.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
max_workers: The maximum number of processes that can be used to
|
|
137
|
+
execute the given calls. If None or not given then as many
|
|
138
|
+
worker processes will be created as the machine has processors.
|
|
139
|
+
mp_context: Select process start method from ['fork', 'spawn', 'forkserver']
|
|
140
|
+
initializer: A callable used to initialize worker processes.
|
|
141
|
+
initargs: A tuple of arguments to pass to the initializer.
|
|
142
|
+
initkwargs: A dictionary of keyword arguments to pass to the initializer.
|
|
143
|
+
max_tasks_per_child: The maximum number of tasks a worker process
|
|
144
|
+
can complete before it will exit and be replaced with a fresh
|
|
145
|
+
worker process. The default of None means worker process will
|
|
146
|
+
live as long as the executor. Requires a non-'fork' mp_context
|
|
147
|
+
start method. When given, we default to using 'spawn' if no
|
|
148
|
+
mp_context is supplied.
|
|
149
|
+
use_torch: Whether to use PyTorch multiprocessing with tensor sharing and GPU device support.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def submit(
|
|
153
|
+
self, func:Callable[..., Any],
|
|
154
|
+
args:Optional[Tuple[Any]]=None,
|
|
155
|
+
kwargs:Optional[Dict[str, Any]]=None,
|
|
156
|
+
need_cpu_cores:int=1, need_cpu_mem:int=0,
|
|
157
|
+
need_gpu_cores:int=0, need_gpu_mem:int=0
|
|
158
|
+
)->concurrent.futures.Future: ...
|
|
159
|
+
"""
|
|
160
|
+
Submits a callable to be executed with the given arguments.
|
|
161
|
+
|
|
162
|
+
Schedules the callable to be executed as fn(*args, **kwargs) and returns
|
|
163
|
+
a Future instance representing the execution of the callable.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
func: The callable to execute.
|
|
167
|
+
args: The arguments to pass to the callable.
|
|
168
|
+
kwargs: The keyword arguments to pass to the callable.
|
|
169
|
+
need_cpu_cores: The number of CPU cores required for the task.
|
|
170
|
+
need_cpu_mem: The amount of CPU memory required for the task.
|
|
171
|
+
need_gpu_cores: The number of CUDA cores required for the task.
|
|
172
|
+
need_gpu_mem: The amount of GPU memory required for the task.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
A concurrent.futures.Future representing the given call.
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
def map(
|
|
179
|
+
self, func:Callable[..., Any],
|
|
180
|
+
iterable:Iterable[Any],
|
|
181
|
+
need_cpu_cores:Union[int, Iterable[int]]=1,
|
|
182
|
+
need_cpu_mem:Union[int, Iterable[int]]=0,
|
|
183
|
+
need_gpu_cores:Union[int, Iterable[int]]=0,
|
|
184
|
+
need_gpu_mem:Union[int, Iterable[int]]=0,
|
|
185
|
+
timeout:Optional[Union[float, int]]=None,
|
|
186
|
+
chunksize:int=1
|
|
187
|
+
)->Iterable[Any]: ...
|
|
188
|
+
"""
|
|
189
|
+
Returns an iterator equivalent to map(func, iterable).
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
func: A callable that will take as many arguments as there are
|
|
193
|
+
passed iterables.
|
|
194
|
+
iterable: An iterable whose items will be passed to func as arguments.
|
|
195
|
+
need_cpu_cores: The number of CPU cores required for the each task.
|
|
196
|
+
need_cpu_mem: The amount of CPU memory required for each task.
|
|
197
|
+
need_gpu_cores: The number of CUDA cores required for each task.
|
|
198
|
+
need_gpu_mem: The amount of GPU memory required for each task.
|
|
199
|
+
timeout: The maximum number of seconds to wait. If None, then there
|
|
200
|
+
is no limit on the wait time.
|
|
201
|
+
chunksize: If greater than one, the iterables will be chopped into
|
|
202
|
+
chunks of size chunksize and submitted to the process pool.
|
|
203
|
+
If set to one, the items in the list will be sent one at a time.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
An iterator equivalent to: map(func, iterables).
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
TimeoutError: If the entire result iterator could not be generated
|
|
210
|
+
before the given timeout.
|
|
211
|
+
Exception: If fn(*args) raises for any values.
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
def starmap(
|
|
215
|
+
self, func:Callable[..., Any],
|
|
216
|
+
iterable:Iterable[Any],
|
|
217
|
+
need_cpu_cores:Union[int, Iterable[int]]=1,
|
|
218
|
+
need_cpu_mem:Union[int, Iterable[int]]=0,
|
|
219
|
+
need_gpu_cores:Union[int, Iterable[int]]=0,
|
|
220
|
+
need_gpu_mem:Union[int, Iterable[int]]=0,
|
|
221
|
+
timeout:Optional[Union[float, int]]=None,
|
|
222
|
+
chunksize:int=1
|
|
223
|
+
)->Iterable[Any]: ...
|
|
224
|
+
"""
|
|
225
|
+
Like `map()` method but the elements of the `iterable` are expected to
|
|
226
|
+
be iterables as well and will be unpacked as arguments. Hence
|
|
227
|
+
`func` and (a, b) becomes func(a, b).
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
def shutdown(self, wait:bool=True, *, cancel_futures:bool=False)->None: ...
|
|
231
|
+
"""
|
|
232
|
+
Clean-up the resources associated with the Executor.
|
|
233
|
+
|
|
234
|
+
It is safe to call this method several times. Otherwise, no other
|
|
235
|
+
methods can be called after this one.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
wait: If True then shutdown will not return until all running
|
|
239
|
+
futures have finished executing and the resources used by the
|
|
240
|
+
executor have been reclaimed.
|
|
241
|
+
cancel_futures: If True then shutdown will cancel all pending
|
|
242
|
+
futures. Futures that are completed or running will not be
|
|
243
|
+
cancelled.
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
def __enter__(self)->ProcessPool: ...
|
|
247
|
+
|
|
248
|
+
def __exit__(self, exc_type, exc_val, exc_tb)->None: ...
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### ThreadPool
|
|
252
|
+
|
|
253
|
+
Each worker run as a thread. Suitable for IO-intensive tasks.
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
class ThreadPool:
|
|
257
|
+
|
|
258
|
+
def __init__(
|
|
259
|
+
self, max_workers:int=0,
|
|
260
|
+
thread_name_prefix:str="ThreadPool.worker:",
|
|
261
|
+
initializer:Optional[Callable[..., Any]]=None,
|
|
262
|
+
initargs:Tuple[Any, ...]=(),
|
|
263
|
+
initkwargs:Optional[Dict[str, Any]]=None,
|
|
264
|
+
*,
|
|
265
|
+
max_tasks_per_child:Optional[int]=None,
|
|
266
|
+
use_torch:bool=False
|
|
267
|
+
): ...
|
|
268
|
+
"""
|
|
269
|
+
Initializes a new ProcessPool instance.
|
|
270
|
+
|
|
271
|
+
Same as ProcessPool
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
def submit(self, ...): ...
|
|
275
|
+
def map(self, ...): ...
|
|
276
|
+
def starmap(self, ...): ...
|
|
277
|
+
def shutdown(self, ...): ...
|
|
278
|
+
def __enter__(self): ...
|
|
279
|
+
def __exit__(self, exc_type, exc_val, exc_tb): ...
|
|
280
|
+
"""
|
|
281
|
+
All same as ProcessPool
|
|
282
|
+
"""
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
### InterpreterPool (Python 3.14+)
|
|
286
|
+
|
|
287
|
+
Each worker run as a thread within a isolated interpreter with seperated GIL. Suitable for CPU-intensive tasks.
|
|
288
|
+
Less overhead than ProcessPool when create/destroy workers and task switching.
|
|
289
|
+
But not support for numpy/torch.
|
|
290
|
+
|
|
291
|
+
```python
|
|
292
|
+
class InterpreterPool:
|
|
293
|
+
|
|
294
|
+
def __init__(
|
|
295
|
+
self, max_workers:int=0,
|
|
296
|
+
initializer:Optional[Callable[..., Any]]=None,
|
|
297
|
+
initargs:Tuple[Any, ...]=(),
|
|
298
|
+
initkwargs:Optional[Dict[str, Any]]=None,
|
|
299
|
+
*,
|
|
300
|
+
max_tasks_per_child:Optional[int]=None,
|
|
301
|
+
use_torch:bool=False
|
|
302
|
+
): ...
|
|
303
|
+
"""
|
|
304
|
+
Initializes a new InterpreterPool instance.
|
|
305
|
+
|
|
306
|
+
Same as ProcessPool
|
|
307
|
+
"""
|
|
308
|
+
|
|
309
|
+
def submit(self, ...): ...
|
|
310
|
+
def map(self, ...): ...
|
|
311
|
+
def starmap(self, ...): ...
|
|
312
|
+
def shutdown(self, ...): ...
|
|
313
|
+
def __enter__(self): ...
|
|
314
|
+
def __exit__(self, exc_type, exc_val, exc_tb): ...
|
|
315
|
+
"""
|
|
316
|
+
All same as ProcessPool
|
|
317
|
+
"""
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
## License
|
|
321
|
+
|
|
322
|
+
MIT License
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
# SmartPool
|
|
2
|
+
|
|
3
|
+
SmartPool is a Python library that provides intelligent resource-aware pooling mechanisms for parallel computing. It automatically manages CPU and GPU resources to optimize performance while preventing resource exhaustion.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Multiple Pool Types**: ProcessPool, ThreadPool, and InterpreterPool for different use cases
|
|
8
|
+
- **Intuitive API Design**: Almost the same usage as `concurrent.futures` pools
|
|
9
|
+
- **Automatic Resource Management**: Monitors and manages CPU cores, memory, and GPU resources
|
|
10
|
+
- **Hardware-Aware Scheduling**: Automatically detects system resources and schedules tasks accordingly
|
|
11
|
+
- **PyTorch Integration**: Support for PyTorch multiprocessing with tensor sharing to avoid serialization
|
|
12
|
+
- **Training Hot Migration**: Automatically moves CPU training tasks to GPU when `best_device()` changes
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install smartpool
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Examples
|
|
21
|
+
|
|
22
|
+
### Basic Usage
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from smartpool import ProcessPool
|
|
26
|
+
|
|
27
|
+
# Create a process pool that automatically manages system resources
|
|
28
|
+
with ProcessPool() as pool:
|
|
29
|
+
# Submit tasks with proper argument passing
|
|
30
|
+
futures = [pool.submit(expensive_computation, args=(arg,)) for arg in arguments]
|
|
31
|
+
|
|
32
|
+
# Get results
|
|
33
|
+
results = [future.result() for future in futures]
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Resource-Aware Task Scheduling
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from smartpool import ProcessPool, DataSize
|
|
40
|
+
|
|
41
|
+
# Tasks can specify their resource requirements
|
|
42
|
+
def memory_intensive_task(data):
|
|
43
|
+
# Your computation here
|
|
44
|
+
return processed_data
|
|
45
|
+
|
|
46
|
+
with ProcessPool(use_torch=True) as pool:
|
|
47
|
+
# Pool automatically schedules tasks based on available memory
|
|
48
|
+
future = pool.submit(
|
|
49
|
+
memory_intensive_task,
|
|
50
|
+
args=(large_dataset,),
|
|
51
|
+
need_cpu_cores=2, # Request 2 CPU cores
|
|
52
|
+
need_cpu_mem=1*DataSize.GB, # Request 4GB RAM
|
|
53
|
+
need_gpu_cores=1024, # Request 1024 CUDA cores (NOT percentage)
|
|
54
|
+
need_gpu_mem=1*DataSize.GB # Request 2GB GPU memory
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### PyTorch Training Hot Migration from CPU to GPU
|
|
59
|
+
|
|
60
|
+
SmartPool automatically migrates training tasks from CPU to GPU when better devices become available:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
# Complete setup for training with optimizer migration
|
|
64
|
+
from smartpool import (
|
|
65
|
+
limit_num_single_thread,
|
|
66
|
+
best_device,
|
|
67
|
+
move_optimizer_to,
|
|
68
|
+
ProcessPool
|
|
69
|
+
)
|
|
70
|
+
# Critical: Call before importing torch/numpy
|
|
71
|
+
limit_num_single_thread()
|
|
72
|
+
|
|
73
|
+
import torch
|
|
74
|
+
|
|
75
|
+
def training_task():
|
|
76
|
+
device = best_device() # <-- get best suitable device at init time
|
|
77
|
+
old_device = device
|
|
78
|
+
|
|
79
|
+
for epoch in range(epochs):
|
|
80
|
+
for x, y in data_loader:
|
|
81
|
+
device = best_device() # <-- get best suitable device at each batch
|
|
82
|
+
x, y = x.to(device), y.to(device)
|
|
83
|
+
|
|
84
|
+
if old_device != device:
|
|
85
|
+
model.to(device) # move model to new device
|
|
86
|
+
move_optimizer_to(optimizer, device) # move optimizer to new device
|
|
87
|
+
old_device = device
|
|
88
|
+
|
|
89
|
+
do_other_things()
|
|
90
|
+
|
|
91
|
+
with ProcessPool(use_torch=True) as pool:
|
|
92
|
+
future = pool.submit(training_task, args=(model, optimizer, data))
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## API
|
|
96
|
+
|
|
97
|
+
### ProcessPool
|
|
98
|
+
|
|
99
|
+
Each worker run as a separate process with seperated GIL. Suitable for CPU-intensive tasks.
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
class ProcessPool:
|
|
103
|
+
|
|
104
|
+
def __init__(
|
|
105
|
+
self, max_workers:int=0,
|
|
106
|
+
process_name_prefix:str="ProcessPool.worker:",
|
|
107
|
+
mp_context:str="spawn",
|
|
108
|
+
initializer:Optional[Callable[..., Any]]=None,
|
|
109
|
+
initargs:Tuple[Any, ...]=(),
|
|
110
|
+
initkwargs:Optional[Dict[str, Any]]=None,
|
|
111
|
+
*,
|
|
112
|
+
max_tasks_per_child:Optional[int]=None,
|
|
113
|
+
use_torch:bool=False
|
|
114
|
+
): ...
|
|
115
|
+
"""
|
|
116
|
+
Initializes a new ProcessPool instance.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
max_workers: The maximum number of processes that can be used to
|
|
120
|
+
execute the given calls. If None or not given then as many
|
|
121
|
+
worker processes will be created as the machine has processors.
|
|
122
|
+
mp_context: Select process start method from ['fork', 'spawn', 'forkserver']
|
|
123
|
+
initializer: A callable used to initialize worker processes.
|
|
124
|
+
initargs: A tuple of arguments to pass to the initializer.
|
|
125
|
+
initkwargs: A dictionary of keyword arguments to pass to the initializer.
|
|
126
|
+
max_tasks_per_child: The maximum number of tasks a worker process
|
|
127
|
+
can complete before it will exit and be replaced with a fresh
|
|
128
|
+
worker process. The default of None means worker process will
|
|
129
|
+
live as long as the executor. Requires a non-'fork' mp_context
|
|
130
|
+
start method. When given, we default to using 'spawn' if no
|
|
131
|
+
mp_context is supplied.
|
|
132
|
+
use_torch: Whether to use PyTorch multiprocessing with tensor sharing and GPU device support.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
def submit(
|
|
136
|
+
self, func:Callable[..., Any],
|
|
137
|
+
args:Optional[Tuple[Any]]=None,
|
|
138
|
+
kwargs:Optional[Dict[str, Any]]=None,
|
|
139
|
+
need_cpu_cores:int=1, need_cpu_mem:int=0,
|
|
140
|
+
need_gpu_cores:int=0, need_gpu_mem:int=0
|
|
141
|
+
)->concurrent.futures.Future: ...
|
|
142
|
+
"""
|
|
143
|
+
Submits a callable to be executed with the given arguments.
|
|
144
|
+
|
|
145
|
+
Schedules the callable to be executed as fn(*args, **kwargs) and returns
|
|
146
|
+
a Future instance representing the execution of the callable.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
func: The callable to execute.
|
|
150
|
+
args: The arguments to pass to the callable.
|
|
151
|
+
kwargs: The keyword arguments to pass to the callable.
|
|
152
|
+
need_cpu_cores: The number of CPU cores required for the task.
|
|
153
|
+
need_cpu_mem: The amount of CPU memory required for the task.
|
|
154
|
+
need_gpu_cores: The number of CUDA cores required for the task.
|
|
155
|
+
need_gpu_mem: The amount of GPU memory required for the task.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
A concurrent.futures.Future representing the given call.
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
def map(
|
|
162
|
+
self, func:Callable[..., Any],
|
|
163
|
+
iterable:Iterable[Any],
|
|
164
|
+
need_cpu_cores:Union[int, Iterable[int]]=1,
|
|
165
|
+
need_cpu_mem:Union[int, Iterable[int]]=0,
|
|
166
|
+
need_gpu_cores:Union[int, Iterable[int]]=0,
|
|
167
|
+
need_gpu_mem:Union[int, Iterable[int]]=0,
|
|
168
|
+
timeout:Optional[Union[float, int]]=None,
|
|
169
|
+
chunksize:int=1
|
|
170
|
+
)->Iterable[Any]: ...
|
|
171
|
+
"""
|
|
172
|
+
Returns an iterator equivalent to map(func, iterable).
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
func: A callable that will take as many arguments as there are
|
|
176
|
+
passed iterables.
|
|
177
|
+
iterable: An iterable whose items will be passed to func as arguments.
|
|
178
|
+
need_cpu_cores: The number of CPU cores required for the each task.
|
|
179
|
+
need_cpu_mem: The amount of CPU memory required for each task.
|
|
180
|
+
need_gpu_cores: The number of CUDA cores required for each task.
|
|
181
|
+
need_gpu_mem: The amount of GPU memory required for each task.
|
|
182
|
+
timeout: The maximum number of seconds to wait. If None, then there
|
|
183
|
+
is no limit on the wait time.
|
|
184
|
+
chunksize: If greater than one, the iterables will be chopped into
|
|
185
|
+
chunks of size chunksize and submitted to the process pool.
|
|
186
|
+
If set to one, the items in the list will be sent one at a time.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
An iterator equivalent to: map(func, iterables).
|
|
190
|
+
|
|
191
|
+
Raises:
|
|
192
|
+
TimeoutError: If the entire result iterator could not be generated
|
|
193
|
+
before the given timeout.
|
|
194
|
+
Exception: If fn(*args) raises for any values.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
def starmap(
|
|
198
|
+
self, func:Callable[..., Any],
|
|
199
|
+
iterable:Iterable[Any],
|
|
200
|
+
need_cpu_cores:Union[int, Iterable[int]]=1,
|
|
201
|
+
need_cpu_mem:Union[int, Iterable[int]]=0,
|
|
202
|
+
need_gpu_cores:Union[int, Iterable[int]]=0,
|
|
203
|
+
need_gpu_mem:Union[int, Iterable[int]]=0,
|
|
204
|
+
timeout:Optional[Union[float, int]]=None,
|
|
205
|
+
chunksize:int=1
|
|
206
|
+
)->Iterable[Any]: ...
|
|
207
|
+
"""
|
|
208
|
+
Like `map()` method but the elements of the `iterable` are expected to
|
|
209
|
+
be iterables as well and will be unpacked as arguments. Hence
|
|
210
|
+
`func` and (a, b) becomes func(a, b).
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
def shutdown(self, wait:bool=True, *, cancel_futures:bool=False)->None: ...
|
|
214
|
+
"""
|
|
215
|
+
Clean-up the resources associated with the Executor.
|
|
216
|
+
|
|
217
|
+
It is safe to call this method several times. Otherwise, no other
|
|
218
|
+
methods can be called after this one.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
wait: If True then shutdown will not return until all running
|
|
222
|
+
futures have finished executing and the resources used by the
|
|
223
|
+
executor have been reclaimed.
|
|
224
|
+
cancel_futures: If True then shutdown will cancel all pending
|
|
225
|
+
futures. Futures that are completed or running will not be
|
|
226
|
+
cancelled.
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
def __enter__(self)->ProcessPool: ...
|
|
230
|
+
|
|
231
|
+
def __exit__(self, exc_type, exc_val, exc_tb)->None: ...
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### ThreadPool
|
|
235
|
+
|
|
236
|
+
Each worker run as a thread. Suitable for IO-intensive tasks.
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
class ThreadPool:
|
|
240
|
+
|
|
241
|
+
def __init__(
|
|
242
|
+
self, max_workers:int=0,
|
|
243
|
+
thread_name_prefix:str="ThreadPool.worker:",
|
|
244
|
+
initializer:Optional[Callable[..., Any]]=None,
|
|
245
|
+
initargs:Tuple[Any, ...]=(),
|
|
246
|
+
initkwargs:Optional[Dict[str, Any]]=None,
|
|
247
|
+
*,
|
|
248
|
+
max_tasks_per_child:Optional[int]=None,
|
|
249
|
+
use_torch:bool=False
|
|
250
|
+
): ...
|
|
251
|
+
"""
|
|
252
|
+
Initializes a new ProcessPool instance.
|
|
253
|
+
|
|
254
|
+
Same as ProcessPool
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
def submit(self, ...): ...
|
|
258
|
+
def map(self, ...): ...
|
|
259
|
+
def starmap(self, ...): ...
|
|
260
|
+
def shutdown(self, ...): ...
|
|
261
|
+
def __enter__(self): ...
|
|
262
|
+
def __exit__(self, exc_type, exc_val, exc_tb): ...
|
|
263
|
+
"""
|
|
264
|
+
All same as ProcessPool
|
|
265
|
+
"""
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### InterpreterPool (Python 3.14+)
|
|
269
|
+
|
|
270
|
+
Each worker run as a thread within a isolated interpreter with seperated GIL. Suitable for CPU-intensive tasks.
|
|
271
|
+
Less overhead than ProcessPool when create/destroy workers and task switching.
|
|
272
|
+
But not support for numpy/torch.
|
|
273
|
+
|
|
274
|
+
```python
|
|
275
|
+
class InterpreterPool:
|
|
276
|
+
|
|
277
|
+
def __init__(
|
|
278
|
+
self, max_workers:int=0,
|
|
279
|
+
initializer:Optional[Callable[..., Any]]=None,
|
|
280
|
+
initargs:Tuple[Any, ...]=(),
|
|
281
|
+
initkwargs:Optional[Dict[str, Any]]=None,
|
|
282
|
+
*,
|
|
283
|
+
max_tasks_per_child:Optional[int]=None,
|
|
284
|
+
use_torch:bool=False
|
|
285
|
+
): ...
|
|
286
|
+
"""
|
|
287
|
+
Initializes a new InterpreterPool instance.
|
|
288
|
+
|
|
289
|
+
Same as ProcessPool
|
|
290
|
+
"""
|
|
291
|
+
|
|
292
|
+
def submit(self, ...): ...
|
|
293
|
+
def map(self, ...): ...
|
|
294
|
+
def starmap(self, ...): ...
|
|
295
|
+
def shutdown(self, ...): ...
|
|
296
|
+
def __enter__(self): ...
|
|
297
|
+
def __exit__(self, exc_type, exc_val, exc_tb): ...
|
|
298
|
+
"""
|
|
299
|
+
All same as ProcessPool
|
|
300
|
+
"""
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
## License
|
|
304
|
+
|
|
305
|
+
MIT License
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=45", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pysmartpool"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Smart thread/process/interpreter pool implementation."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [
|
|
11
|
+
{ name = "王炳辉 (Bing-Hui WANG)", email = "binghui.wang@foxmail.com" }
|
|
12
|
+
]
|
|
13
|
+
license = { text = "MIT" }
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"nvidia-ml-py",
|
|
21
|
+
"psutil"
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Homepage = "https://github.com/Time-Coder/smartpool"
|
|
26
|
+
Repository = "https://github.com/Time-Coder/smartpool.git"
|