bsplyne 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bsplyne/__init__.py +55 -0
- bsplyne/b_spline.py +2464 -0
- bsplyne/b_spline_basis.py +1000 -0
- bsplyne/geometries_in_3D.py +1193 -0
- bsplyne/multi_patch_b_spline.py +1731 -0
- bsplyne/my_wide_product.py +209 -0
- bsplyne/parallel_utils.py +378 -0
- bsplyne/save_utils.py +141 -0
- bsplyne-1.0.0.dist-info/METADATA +91 -0
- bsplyne-1.0.0.dist-info/RECORD +13 -0
- bsplyne-1.0.0.dist-info/WHEEL +5 -0
- bsplyne-1.0.0.dist-info/licenses/LICENSE.txt +70 -0
- bsplyne-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import scipy.sparse as sps
|
|
3
|
+
import numba as nb
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@nb.njit(cache=True)
|
|
7
|
+
def _wide_product_max_nnz(
|
|
8
|
+
a_indptr: np.ndarray, b_indptr: np.ndarray, height: int
|
|
9
|
+
) -> int:
|
|
10
|
+
"""
|
|
11
|
+
Compute the maximum number of nonzeros in the result.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
a_indptr : np.ndarray
|
|
16
|
+
CSR pointer array for matrix A.
|
|
17
|
+
b_indptr : np.ndarray
|
|
18
|
+
CSR pointer array for matrix B.
|
|
19
|
+
height : int
|
|
20
|
+
Number of rows (must be the same for both A and B).
|
|
21
|
+
|
|
22
|
+
Returns
|
|
23
|
+
-------
|
|
24
|
+
max_nnz : int
|
|
25
|
+
Total number of nonzero elements in the resulting matrix.
|
|
26
|
+
"""
|
|
27
|
+
max_nnz = 0
|
|
28
|
+
for i in range(height):
|
|
29
|
+
nnz_a = a_indptr[i + 1] - a_indptr[i]
|
|
30
|
+
nnz_b = b_indptr[i + 1] - b_indptr[i]
|
|
31
|
+
max_nnz += nnz_a * nnz_b
|
|
32
|
+
return max_nnz
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@nb.njit(cache=True)
|
|
36
|
+
def _wide_product_row(
|
|
37
|
+
a_data: np.ndarray,
|
|
38
|
+
a_indices: np.ndarray,
|
|
39
|
+
b_data: np.ndarray,
|
|
40
|
+
b_indices: np.ndarray,
|
|
41
|
+
b_width: int,
|
|
42
|
+
out_data: np.ndarray,
|
|
43
|
+
out_indices: np.ndarray,
|
|
44
|
+
) -> int:
|
|
45
|
+
"""
|
|
46
|
+
Compute the wide product for one row.
|
|
47
|
+
|
|
48
|
+
For each nonzero in the row of A and each nonzero in the row of B, it computes:
|
|
49
|
+
out_index = a_indices[i] * b_width + b_indices[j]
|
|
50
|
+
out_value = a_data[i] * b_data[j]
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
a_data : np.ndarray
|
|
55
|
+
Nonzero values for the row in A.
|
|
56
|
+
a_indices : np.ndarray
|
|
57
|
+
Column indices for the row in A.
|
|
58
|
+
b_data : np.ndarray
|
|
59
|
+
Nonzero values for the row in B.
|
|
60
|
+
b_indices : np.ndarray
|
|
61
|
+
Column indices for the row in B.
|
|
62
|
+
b_width : int
|
|
63
|
+
Number of columns in B.
|
|
64
|
+
out_data : np.ndarray
|
|
65
|
+
Preallocated output array for the row's data.
|
|
66
|
+
out_indices : np.ndarray
|
|
67
|
+
Preallocated output array for the row's indices.
|
|
68
|
+
|
|
69
|
+
Returns
|
|
70
|
+
-------
|
|
71
|
+
off : int
|
|
72
|
+
Number of nonzero entries computed for this row.
|
|
73
|
+
"""
|
|
74
|
+
off = 0
|
|
75
|
+
for i in range(a_data.shape[0]):
|
|
76
|
+
for j in range(b_data.shape[0]):
|
|
77
|
+
out_indices[off] = a_indices[i] * b_width + b_indices[j]
|
|
78
|
+
out_data[off] = a_data[i] * b_data[j]
|
|
79
|
+
off += 1
|
|
80
|
+
return off
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@nb.njit(cache=True)
|
|
84
|
+
def _wide_product_numba(
|
|
85
|
+
height: int,
|
|
86
|
+
a_data: np.ndarray,
|
|
87
|
+
a_indices: np.ndarray,
|
|
88
|
+
a_indptr: np.ndarray,
|
|
89
|
+
a_width: int,
|
|
90
|
+
b_data: np.ndarray,
|
|
91
|
+
b_indices: np.ndarray,
|
|
92
|
+
b_indptr: np.ndarray,
|
|
93
|
+
b_width: int,
|
|
94
|
+
):
|
|
95
|
+
"""
|
|
96
|
+
Compute the row-wise wide (Khatri-Rao) product for two CSR matrices.
|
|
97
|
+
|
|
98
|
+
For each row i, the result[i, :] = kron(A[i, :], B[i, :]), i.e. the Kronecker product
|
|
99
|
+
of the i-th rows of A and B.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
height : int
|
|
104
|
+
Number of rows in A and B.
|
|
105
|
+
a_data : np.ndarray
|
|
106
|
+
Data array for matrix A (CSR format).
|
|
107
|
+
a_indices : np.ndarray
|
|
108
|
+
Indices array for matrix A.
|
|
109
|
+
a_indptr : np.ndarray
|
|
110
|
+
Index pointer array for matrix A.
|
|
111
|
+
a_width : int
|
|
112
|
+
Number of columns in A.
|
|
113
|
+
b_data : np.ndarray
|
|
114
|
+
Data array for matrix B (CSR format).
|
|
115
|
+
b_indices : np.ndarray
|
|
116
|
+
Indices array for matrix B.
|
|
117
|
+
b_indptr : np.ndarray
|
|
118
|
+
Index pointer array for matrix B.
|
|
119
|
+
b_width : int
|
|
120
|
+
Number of columns in B.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
out_data : np.ndarray
|
|
125
|
+
Data array for the resulting CSR matrix.
|
|
126
|
+
out_indices : np.ndarray
|
|
127
|
+
Indices array for the resulting CSR matrix.
|
|
128
|
+
out_indptr : np.ndarray
|
|
129
|
+
Index pointer array for the resulting CSR matrix.
|
|
130
|
+
total_nnz : int
|
|
131
|
+
Total number of nonzero entries computed.
|
|
132
|
+
"""
|
|
133
|
+
max_nnz = _wide_product_max_nnz(a_indptr, b_indptr, height)
|
|
134
|
+
out_data = np.empty(max_nnz, dtype=a_data.dtype)
|
|
135
|
+
out_indices = np.empty(max_nnz, dtype=a_indices.dtype)
|
|
136
|
+
out_indptr = np.empty(height + 1, dtype=a_indptr.dtype)
|
|
137
|
+
|
|
138
|
+
off = 0
|
|
139
|
+
for i in range(height):
|
|
140
|
+
out_indptr[i] = off
|
|
141
|
+
a_start = a_indptr[i]
|
|
142
|
+
a_end = a_indptr[i + 1]
|
|
143
|
+
b_start = b_indptr[i]
|
|
144
|
+
b_end = b_indptr[i + 1]
|
|
145
|
+
|
|
146
|
+
row_nnz = _wide_product_row(
|
|
147
|
+
a_data[a_start:a_end],
|
|
148
|
+
a_indices[a_start:a_end],
|
|
149
|
+
b_data[b_start:b_end],
|
|
150
|
+
b_indices[b_start:b_end],
|
|
151
|
+
b_width,
|
|
152
|
+
out_data[off:],
|
|
153
|
+
out_indices[off:],
|
|
154
|
+
)
|
|
155
|
+
off += row_nnz
|
|
156
|
+
out_indptr[height] = off
|
|
157
|
+
return out_data[:off], out_indices[:off], out_indptr, off
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def my_wide_product(A: sps.spmatrix, B: sps.spmatrix) -> sps.csr_matrix:
|
|
161
|
+
"""
|
|
162
|
+
Compute a "1D" Kronecker product row by row.
|
|
163
|
+
|
|
164
|
+
For each row i, the result C[i, :] = kron(A[i, :], B[i, :]).
|
|
165
|
+
Matrices A and B must have the same number of rows.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
A : scipy.sparse.spmatrix
|
|
170
|
+
Input sparse matrix A in CSR format.
|
|
171
|
+
B : scipy.sparse.spmatrix
|
|
172
|
+
Input sparse matrix B in CSR format.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
C : scipy.sparse.csr_matrix
|
|
177
|
+
Resulting sparse matrix in CSR format with shape (A.shape[0], A.shape[1]*B.shape[1]).
|
|
178
|
+
"""
|
|
179
|
+
if A.shape[0] != B.shape[0]:
|
|
180
|
+
raise ValueError("A and B must have the same number of rows")
|
|
181
|
+
|
|
182
|
+
# Ensure matrices are in CSR format for fast row slicing.
|
|
183
|
+
if not sps.isspmatrix_csr(A):
|
|
184
|
+
A = A.tocsr()
|
|
185
|
+
if not sps.isspmatrix_csr(B):
|
|
186
|
+
B = B.tocsr()
|
|
187
|
+
|
|
188
|
+
height = A.shape[0]
|
|
189
|
+
a_width = A.shape[1]
|
|
190
|
+
b_width = B.shape[1]
|
|
191
|
+
|
|
192
|
+
out_data, out_indices, out_indptr, total_nnz = _wide_product_numba(
|
|
193
|
+
height,
|
|
194
|
+
A.data,
|
|
195
|
+
A.indices,
|
|
196
|
+
A.indptr,
|
|
197
|
+
a_width,
|
|
198
|
+
B.data,
|
|
199
|
+
B.indices,
|
|
200
|
+
B.indptr,
|
|
201
|
+
b_width,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Build the resulting CSR matrix.
|
|
205
|
+
C = sps.csr_matrix(
|
|
206
|
+
(out_data, out_indices, out_indptr), shape=(height, a_width * b_width)
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
return C
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
import multiprocessing as mp
|
|
2
|
+
from multiprocessing import shared_memory
|
|
3
|
+
import threading
|
|
4
|
+
import queue
|
|
5
|
+
from typing import Iterable, Union, Callable
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
import numpy as np
|
|
8
|
+
import tempfile, os
|
|
9
|
+
import time
|
|
10
|
+
import gc
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _save_worker(save_queue: queue.Queue):
|
|
14
|
+
"""
|
|
15
|
+
Background worker that saves data to disk from a queue.
|
|
16
|
+
|
|
17
|
+
This function runs in a separate thread. It waits for filename-result pairs
|
|
18
|
+
pushed into the `save_queue`, and writes them to disk using `np.save`.
|
|
19
|
+
When it receives a `None` sentinel value, it terminates.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
save_queue : queue.Queue
|
|
24
|
+
A thread-safe queue containing `(fname, result)` pairs to be saved. The thread
|
|
25
|
+
stops when it receives a `None` item.
|
|
26
|
+
|
|
27
|
+
Notes
|
|
28
|
+
-----
|
|
29
|
+
- Each result is saved as a `numpy.ndarray` with `dtype=object` using `np.save`.
|
|
30
|
+
- The queue can be bounded to control memory usage in the main thread.
|
|
31
|
+
"""
|
|
32
|
+
while True:
|
|
33
|
+
item = save_queue.get()
|
|
34
|
+
if item is None:
|
|
35
|
+
break # stop signal
|
|
36
|
+
fname, result = item
|
|
37
|
+
to_save = np.empty(1, dtype=object)
|
|
38
|
+
to_save[0] = result
|
|
39
|
+
np.save(fname, to_save, allow_pickle=True)
|
|
40
|
+
save_queue.task_done()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _worker(
|
|
44
|
+
func: Iterable[Callable],
|
|
45
|
+
block_args: Iterable[tuple],
|
|
46
|
+
idx: int,
|
|
47
|
+
temp_dir: str,
|
|
48
|
+
verbose: bool,
|
|
49
|
+
jupyter: bool,
|
|
50
|
+
pbar_title_prefix: str,
|
|
51
|
+
shared_mem: Union[tuple[str, tuple[int, ...], type], None],
|
|
52
|
+
) -> str:
|
|
53
|
+
"""
|
|
54
|
+
Apply a function to a sequence of argument tuples, saving results asynchronously during computation.
|
|
55
|
+
|
|
56
|
+
This function iterates over `block_args`, applying `func[i]` to each tuple of arguments,
|
|
57
|
+
and delegates saving to disk to a background thread. This allows computation to proceed
|
|
58
|
+
without waiting for disk writes to complete, reducing idle CPU time and improving throughput.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
func : Iterable[Callable]
|
|
63
|
+
Functions to apply to each tuple in `block_args`. Each function should accept unpacked arguments
|
|
64
|
+
from its corresponding tuple.
|
|
65
|
+
block_args : Iterable[tuple]
|
|
66
|
+
Iterable of `tuple` arguments to be passed to `func`. Each `tuple` is unpacked as arguments.
|
|
67
|
+
idx : int
|
|
68
|
+
Index of the current block, used for progress bar positioning and output file naming.
|
|
69
|
+
temp_dir : str
|
|
70
|
+
Path to the temporary directory where result files will be saved.
|
|
71
|
+
verbose : bool
|
|
72
|
+
If `True`, enables the progress bar. If `False`, disables it.
|
|
73
|
+
jupyter : bool
|
|
74
|
+
If `True`, sets the progress bar position to `0` for Jupyter notebook compatibility.
|
|
75
|
+
If `False`, uses `idx` as the position.
|
|
76
|
+
pbar_title_prefix : str
|
|
77
|
+
Prefix string for the progress bar description.
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
str
|
|
82
|
+
Path to the folder containing the saved `.npy` result files for this block.
|
|
83
|
+
|
|
84
|
+
Notes
|
|
85
|
+
-----
|
|
86
|
+
- A background thread is launched to handle saving to disk via `np.save`.
|
|
87
|
+
- A bounded queue is used to limit memory usage when saving is slower than computation.
|
|
88
|
+
- Result files are saved as `"res_{i}.npy"` in a folder named `"block_{idx}"` within `temp_dir`.
|
|
89
|
+
- Each file contains a single `numpy.ndarray` with `dtype=object` and one element.
|
|
90
|
+
- Explicit garbage collection is triggered after saving.
|
|
91
|
+
"""
|
|
92
|
+
position = 0 if jupyter else idx
|
|
93
|
+
block_folder = os.path.join(temp_dir, f"block_{idx}")
|
|
94
|
+
os.makedirs(block_folder, exist_ok=True)
|
|
95
|
+
|
|
96
|
+
# File d’écriture limitée pour éviter surcharge mémoire
|
|
97
|
+
save_queue = queue.Queue(maxsize=10)
|
|
98
|
+
writer_thread = threading.Thread(target=_save_worker, args=(save_queue,))
|
|
99
|
+
writer_thread.start()
|
|
100
|
+
|
|
101
|
+
with tqdm(
|
|
102
|
+
block_args,
|
|
103
|
+
desc=f"{pbar_title_prefix}: Block {idx}",
|
|
104
|
+
disable=not verbose,
|
|
105
|
+
position=position,
|
|
106
|
+
) as pbar:
|
|
107
|
+
if shared_mem is not None:
|
|
108
|
+
shm_name, shape, dtype = shared_mem
|
|
109
|
+
shm = shared_memory.SharedMemory(name=shm_name)
|
|
110
|
+
try:
|
|
111
|
+
array = np.ndarray(shape, dtype=dtype, buffer=shm.buf)
|
|
112
|
+
for i, args in enumerate(pbar):
|
|
113
|
+
result = func[i](*args, array)
|
|
114
|
+
fname = os.path.join(block_folder, f"res_{i}.npy")
|
|
115
|
+
save_queue.put((fname, result)) # enqueue (blocks if full)
|
|
116
|
+
finally:
|
|
117
|
+
shm.close()
|
|
118
|
+
else:
|
|
119
|
+
for i, args in enumerate(pbar):
|
|
120
|
+
result = func[i](*args)
|
|
121
|
+
fname = os.path.join(block_folder, f"res_{i}.npy")
|
|
122
|
+
save_queue.put((fname, result)) # mise en file (bloque si trop plein)
|
|
123
|
+
|
|
124
|
+
# Finalisation propre
|
|
125
|
+
save_queue.put(None) # signal de fin
|
|
126
|
+
writer_thread.join() # on attend que tout soit écrit
|
|
127
|
+
gc.collect()
|
|
128
|
+
return block_folder
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def parallel_blocks_inner(
|
|
132
|
+
funcs: Iterable[Callable],
|
|
133
|
+
all_args: Iterable[tuple],
|
|
134
|
+
num_blocks: int,
|
|
135
|
+
verbose: bool,
|
|
136
|
+
pbar_title: str,
|
|
137
|
+
disable_parallel: bool,
|
|
138
|
+
shared_mem_last_arg: Union[np.ndarray, None],
|
|
139
|
+
) -> list:
|
|
140
|
+
"""
|
|
141
|
+
Execute a list of functions with their corresponding argument tuples, optionally in parallel blocks.
|
|
142
|
+
|
|
143
|
+
This function performs the actual execution of tasks either sequentially or in parallel,
|
|
144
|
+
depending on the `disable_parallel` flag. When running in parallel, the tasks are divided into
|
|
145
|
+
`num_blocks` groups (blocks), each executed by a separate worker process. Intermediate results
|
|
146
|
+
are temporarily saved to disk as `.npy` files to limit memory usage and are reloaded sequentially
|
|
147
|
+
after all processes complete.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
funcs : Iterable[Callable]
|
|
152
|
+
List of functions to execute. Must have the same length as `all_args`.
|
|
153
|
+
Each function is called as `func(*args)` for its corresponding argument tuple.
|
|
154
|
+
all_args : Iterable[tuple]
|
|
155
|
+
List of tuples, each containing the arguments for the corresponding function in `funcs`.
|
|
156
|
+
num_blocks : int
|
|
157
|
+
Number of parallel blocks (i.e., worker processes) to use when `disable_parallel` is False.
|
|
158
|
+
Determines how many subsets of tasks will be distributed among processes.
|
|
159
|
+
verbose : bool
|
|
160
|
+
If True, enables progress bars and displays information about block processing and result gathering.
|
|
161
|
+
pbar_title : str
|
|
162
|
+
Title prefix used for progress bar descriptions.
|
|
163
|
+
disable_parallel : bool
|
|
164
|
+
If True, all tasks are executed sequentially in the current process. If False, tasks are divided
|
|
165
|
+
into blocks and processed in parallel using a multiprocessing pool.
|
|
166
|
+
shared_mem_last_arg : Union[np.ndarray, None]
|
|
167
|
+
Optional NumPy array placed in shared memory and appended automatically as the last argument
|
|
168
|
+
of each task. This is useful for sharing large read-only data (e.g., images, meshes) without
|
|
169
|
+
duplicating memory across processes.
|
|
170
|
+
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
list
|
|
174
|
+
List of results obtained from applying each function to its corresponding argument tuple,
|
|
175
|
+
preserving the original task order.
|
|
176
|
+
|
|
177
|
+
Notes
|
|
178
|
+
-----
|
|
179
|
+
- **Sequential mode:** if `disable_parallel` is True, all functions are executed in the current
|
|
180
|
+
process with an optional progress bar.
|
|
181
|
+
- **Parallel mode:**
|
|
182
|
+
* The tasks are split into `num_blocks` subsets.
|
|
183
|
+
* Each subset is processed by a separate worker via `multiprocessing.Pool`.
|
|
184
|
+
* Each worker writes its results as `.npy` files inside a temporary subfolder.
|
|
185
|
+
* After all workers complete, results are reloaded in the original order, and temporary files
|
|
186
|
+
and folders are deleted.
|
|
187
|
+
- **Shared memory:** if `shared_mem_last_arg` is provided, it is stored once in shared memory
|
|
188
|
+
and accessible by all workers, avoiding redundant copies of large data arrays.
|
|
189
|
+
- Compatible with both standard Python terminals and Jupyter notebooks (adaptive progress bars).
|
|
190
|
+
- Intended for internal use by higher-level orchestration functions such as `parallel_blocks()`.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
n_tasks = len(all_args)
|
|
194
|
+
|
|
195
|
+
# Detect if running in a Jupyter environment
|
|
196
|
+
try:
|
|
197
|
+
from IPython import get_ipython
|
|
198
|
+
|
|
199
|
+
jupyter = get_ipython().__class__.__name__ == "ZMQInteractiveShell"
|
|
200
|
+
except:
|
|
201
|
+
jupyter = False
|
|
202
|
+
|
|
203
|
+
# Runs in sequential if necessary
|
|
204
|
+
if disable_parallel:
|
|
205
|
+
if shared_mem_last_arg is not None:
|
|
206
|
+
all_args = [list(args) + [shared_mem_last_arg] for args in all_args]
|
|
207
|
+
results = []
|
|
208
|
+
position = 0 if jupyter else 0
|
|
209
|
+
for i, (func, args) in enumerate(
|
|
210
|
+
tqdm(
|
|
211
|
+
zip(funcs, all_args),
|
|
212
|
+
total=n_tasks,
|
|
213
|
+
desc=pbar_title,
|
|
214
|
+
disable=not verbose,
|
|
215
|
+
position=position,
|
|
216
|
+
)
|
|
217
|
+
):
|
|
218
|
+
results.append(func(*args))
|
|
219
|
+
return results
|
|
220
|
+
|
|
221
|
+
# Create the shared memory buffer if necessary
|
|
222
|
+
if shared_mem_last_arg is not None:
|
|
223
|
+
shm = shared_memory.SharedMemory(create=True, size=shared_mem_last_arg.nbytes)
|
|
224
|
+
np.ndarray(
|
|
225
|
+
shared_mem_last_arg.shape, dtype=shared_mem_last_arg.dtype, buffer=shm.buf
|
|
226
|
+
)[:] = shared_mem_last_arg
|
|
227
|
+
shared_mem = (shm.name, shared_mem_last_arg.shape, shared_mem_last_arg.dtype)
|
|
228
|
+
else:
|
|
229
|
+
shared_mem = None
|
|
230
|
+
|
|
231
|
+
# Split the functions and arguments into blocks
|
|
232
|
+
nb_each, extras = divmod(n_tasks, num_blocks)
|
|
233
|
+
sizes = extras * [nb_each + 1] + (num_blocks - extras) * [nb_each]
|
|
234
|
+
blocks = []
|
|
235
|
+
funcs_blocks = []
|
|
236
|
+
start = 0
|
|
237
|
+
for size in sizes:
|
|
238
|
+
end = start + size
|
|
239
|
+
blocks.append(all_args[start:end])
|
|
240
|
+
funcs_blocks.append(funcs[start:end])
|
|
241
|
+
start = end
|
|
242
|
+
|
|
243
|
+
temp_dir = tempfile.mkdtemp(prefix="parallel_blocks_")
|
|
244
|
+
args = [
|
|
245
|
+
(func_block, block, i, temp_dir, verbose, jupyter, pbar_title, shared_mem)
|
|
246
|
+
for i, (func_block, block) in enumerate(zip(funcs_blocks, blocks))
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
# Start the worker processes
|
|
250
|
+
with mp.Pool(num_blocks) as pool:
|
|
251
|
+
files = pool.starmap(_worker, args)
|
|
252
|
+
|
|
253
|
+
# Free the memory space allocated to the shared array
|
|
254
|
+
if shared_mem_last_arg is not None:
|
|
255
|
+
shm.close()
|
|
256
|
+
shm.unlink()
|
|
257
|
+
|
|
258
|
+
# Load and collect results from each file
|
|
259
|
+
with tqdm(total=n_tasks, desc=f"{pbar_title}: Gather", disable=not verbose) as pbar:
|
|
260
|
+
results = []
|
|
261
|
+
for idx, block_folder in enumerate(files):
|
|
262
|
+
for i in range(len(blocks[idx])):
|
|
263
|
+
fpath = os.path.join(block_folder, f"res_{i}.npy")
|
|
264
|
+
results.append(np.load(fpath, allow_pickle=True)[0])
|
|
265
|
+
os.remove(fpath)
|
|
266
|
+
pbar.update(1)
|
|
267
|
+
os.rmdir(block_folder)
|
|
268
|
+
os.rmdir(temp_dir)
|
|
269
|
+
return results
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def parallel_blocks(
|
|
273
|
+
funcs: Union[Callable, Iterable[Callable]],
|
|
274
|
+
all_args: Union[Iterable[tuple], None] = None,
|
|
275
|
+
num_blocks: Union[int, None] = None,
|
|
276
|
+
verbose: bool = True,
|
|
277
|
+
pbar_title: str = "Processing blocks",
|
|
278
|
+
disable_parallel: bool = False,
|
|
279
|
+
est_proc_cost: float = 0.5,
|
|
280
|
+
shared_mem_last_arg: Union[np.ndarray, None] = None,
|
|
281
|
+
) -> list:
|
|
282
|
+
"""
|
|
283
|
+
Execute a set of independent tasks sequentially or in parallel, depending on their estimated cost.
|
|
284
|
+
|
|
285
|
+
The function evaluates the runtime of the first task to decide whether parallelization is worth
|
|
286
|
+
the overhead of process creation. If parallel execution is deemed beneficial, the remaining tasks
|
|
287
|
+
are distributed across several blocks processed in parallel. Otherwise, all tasks are executed
|
|
288
|
+
sequentially. This strategy is especially useful when task runtimes are variable or short compared
|
|
289
|
+
to process spawning costs.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
funcs : Union[Callable, Iterable[Callable]]
|
|
294
|
+
Function or list of functions to execute.
|
|
295
|
+
- If a single function is provided, it will be applied to each argument tuple in `all_args`.
|
|
296
|
+
- If a list of functions is provided, it must have the same length as `all_args`, allowing each
|
|
297
|
+
task to use a distinct callable.
|
|
298
|
+
all_args : Union[Iterable[tuple], None], optional
|
|
299
|
+
Iterable of tuples containing the positional arguments for each function call.
|
|
300
|
+
If the function takes no arguments, set `all_args` to `None` (defaults to empty tuples).
|
|
301
|
+
num_blocks : Union[int, None], optional
|
|
302
|
+
Number of parallel blocks (i.e., worker processes) to use. Defaults to half the number of CPU cores.
|
|
303
|
+
A value of 1 forces sequential execution.
|
|
304
|
+
verbose : bool, optional
|
|
305
|
+
If True, displays timing information and progress bars. Default is True.
|
|
306
|
+
pbar_title : str, optional
|
|
307
|
+
Title prefix displayed in the progress bar. Default is "Processing blocks".
|
|
308
|
+
disable_parallel : bool, optional
|
|
309
|
+
If True, forces all computations to run sequentially regardless of estimated profitability.
|
|
310
|
+
Default is False.
|
|
311
|
+
est_proc_cost : float, optional
|
|
312
|
+
Estimated process creation cost in seconds. Used to determine whether parallelization
|
|
313
|
+
will yield a net speedup. Default is 0.5 s.
|
|
314
|
+
shared_mem_last_arg : Union[np.ndarray, None], optional
|
|
315
|
+
Shared-memory NumPy array to be appended automatically as the last argument in each task.
|
|
316
|
+
This allows tasks to read from a large, read-only array without duplicating it in memory.
|
|
317
|
+
Default is None.
|
|
318
|
+
|
|
319
|
+
Returns
|
|
320
|
+
-------
|
|
321
|
+
list
|
|
322
|
+
List of results, one per task, preserving the input order.
|
|
323
|
+
|
|
324
|
+
Notes
|
|
325
|
+
-----
|
|
326
|
+
- The first task is executed sequentially to estimate its runtime.
|
|
327
|
+
- Parallelization is enabled only if the estimated time saved exceeds the cost of process creation.
|
|
328
|
+
- When parallel mode is used, tasks are executed in blocks, and intermediate results are stored
|
|
329
|
+
temporarily on disk to limit memory usage, then reloaded and combined sequentially.
|
|
330
|
+
- Compatible with Jupyter progress bars (`tqdm.notebook`).
|
|
331
|
+
"""
|
|
332
|
+
if num_blocks is None:
|
|
333
|
+
num_blocks = max(1, os.cpu_count() // 2)
|
|
334
|
+
|
|
335
|
+
if callable(funcs):
|
|
336
|
+
assert (
|
|
337
|
+
all_args is not None
|
|
338
|
+
), "If 'funcs' is a single callable, 'all_args' must be provided as a list of argument tuples."
|
|
339
|
+
funcs = [funcs] * len(all_args)
|
|
340
|
+
else:
|
|
341
|
+
if all_args is None:
|
|
342
|
+
all_args = [()] * len(funcs)
|
|
343
|
+
assert len(funcs) == len(
|
|
344
|
+
all_args
|
|
345
|
+
), "If 'funcs' is an iterable of callables, its length must match the number of argument tuples in 'all_args'."
|
|
346
|
+
|
|
347
|
+
n_tasks = len(all_args)
|
|
348
|
+
if disable_parallel or num_blocks == 1 or n_tasks <= 1:
|
|
349
|
+
return parallel_blocks_inner(
|
|
350
|
+
funcs, all_args, num_blocks, verbose, pbar_title, True, shared_mem_last_arg
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
t0 = time.time()
|
|
354
|
+
first_result = (
|
|
355
|
+
funcs[0](*all_args[0])
|
|
356
|
+
if shared_mem_last_arg is None
|
|
357
|
+
else funcs[0](*all_args[0], shared_mem_last_arg)
|
|
358
|
+
)
|
|
359
|
+
t_first = time.time() - t0
|
|
360
|
+
t_thresh = (num_blocks / (num_blocks - 1)) * (num_blocks / n_tasks) * est_proc_cost
|
|
361
|
+
disable_parallel = t_first <= t_thresh
|
|
362
|
+
if verbose:
|
|
363
|
+
print(
|
|
364
|
+
f"First task time: {t_first:.3f}s, t_seuil: {t_thresh:.3f}s -> "
|
|
365
|
+
f"{'Sequential' if disable_parallel else 'Parallel'}"
|
|
366
|
+
)
|
|
367
|
+
results_rest = parallel_blocks_inner(
|
|
368
|
+
funcs[1:],
|
|
369
|
+
all_args[1:],
|
|
370
|
+
num_blocks,
|
|
371
|
+
verbose,
|
|
372
|
+
pbar_title,
|
|
373
|
+
disable_parallel,
|
|
374
|
+
shared_mem_last_arg,
|
|
375
|
+
)
|
|
376
|
+
results = [first_result] + results_rest
|
|
377
|
+
|
|
378
|
+
return results
|