numba-mpi 0.41__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_mpi/__init__.py +5 -1
- numba_mpi/api/initialized.py +2 -1
- numba_mpi/api/rank.py +3 -2
- numba_mpi/api/requests.py +18 -0
- numba_mpi/api/size.py +3 -2
- {numba_mpi-0.41.dist-info → numba_mpi-1.0.0.dist-info}/METADATA +41 -26
- {numba_mpi-0.41.dist-info → numba_mpi-1.0.0.dist-info}/RECORD +10 -10
- {numba_mpi-0.41.dist-info → numba_mpi-1.0.0.dist-info}/WHEEL +1 -1
- {numba_mpi-0.41.dist-info → numba_mpi-1.0.0.dist-info}/LICENSE +0 -0
- {numba_mpi-0.41.dist-info → numba_mpi-1.0.0.dist-info}/top_level.txt +0 -0
numba_mpi/__init__.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
"""
|
1
|
+
"""
|
2
|
+
.. include::../README.md
|
3
|
+
"""
|
2
4
|
|
3
5
|
from importlib.metadata import PackageNotFoundError, version
|
4
6
|
|
@@ -18,6 +20,8 @@ from .api.size import size
|
|
18
20
|
from .api.wtime import wtime
|
19
21
|
from .common import RequestType
|
20
22
|
|
23
|
+
SUCCESS = 0
|
24
|
+
|
21
25
|
try:
|
22
26
|
__version__ = version(__name__)
|
23
27
|
except PackageNotFoundError:
|
numba_mpi/api/initialized.py
CHANGED
numba_mpi/api/rank.py
CHANGED
@@ -15,8 +15,9 @@ _MPI_Comm_rank.argtypes = [_MpiComm, ctypes.c_void_p]
|
|
15
15
|
|
16
16
|
@numba.njit()
|
17
17
|
def rank():
|
18
|
-
"""wrapper for MPI_Comm_rank()"""
|
18
|
+
"""wrapper for MPI_Comm_rank(), in case of failure returns 0"""
|
19
19
|
value = np.empty(1, dtype=np.intc)
|
20
20
|
status = _MPI_Comm_rank(_mpi_addr(_MPI_Comm_World_ptr), value.ctypes.data)
|
21
|
-
|
21
|
+
if status != 0:
|
22
|
+
value[0] = 0
|
22
23
|
return value[0]
|
numba_mpi/api/requests.py
CHANGED
@@ -34,6 +34,9 @@ def wait(request):
|
|
34
34
|
"""Wrapper for MPI_Wait. Returns integer status code (0 == MPI_SUCCESS).
|
35
35
|
Status is currently not handled. Requires 'request' parameter to be a
|
36
36
|
c-style pointer to MPI_Request (such as returned by 'isend'/'irecv').
|
37
|
+
|
38
|
+
Uninitialized contents of 'request' (e.g., from numpy.empty()) may
|
39
|
+
cause invalid pointer dereference and segmentation faults.
|
37
40
|
"""
|
38
41
|
|
39
42
|
status_buffer = create_status_buffer()
|
@@ -64,6 +67,9 @@ def waitall(requests):
|
|
64
67
|
"""Wrapper for MPI_Waitall. Returns integer status code (0 == MPI_SUCCESS).
|
65
68
|
Status is currently not handled. Requires 'requests' parameter to be an
|
66
69
|
array or tuple of MPI_Request objects.
|
70
|
+
|
71
|
+
Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
|
72
|
+
cause invalid pointer dereference and segmentation faults.
|
67
73
|
"""
|
68
74
|
if isinstance(requests, np.ndarray):
|
69
75
|
return _waitall_array_impl(requests)
|
@@ -123,6 +129,9 @@ def waitany(requests):
|
|
123
129
|
status; second - the index of request that was completed. Status is
|
124
130
|
currently not handled. Requires 'requests' parameter to be an array
|
125
131
|
or tuple of MPI_Request objects.
|
132
|
+
|
133
|
+
Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
|
134
|
+
cause invalid pointer dereference and segmentation faults.
|
126
135
|
"""
|
127
136
|
|
128
137
|
if isinstance(requests, np.ndarray):
|
@@ -167,6 +176,9 @@ def test(request):
|
|
167
176
|
flag that indicates whether given request is completed. Status is currently
|
168
177
|
not handled. Requires 'request' parameter to be a c-style pointer to
|
169
178
|
MPI_Request (such as returned by 'isend'/'irecv').
|
179
|
+
|
180
|
+
Uninitialized contents of 'request' (e.g., from numpy.empty()) may
|
181
|
+
cause invalid pointer dereference and segmentation faults.
|
170
182
|
"""
|
171
183
|
|
172
184
|
status_buffer = create_status_buffer()
|
@@ -203,6 +215,9 @@ def testall(requests):
|
|
203
215
|
flag that indicates whether given request is completed. Status is currently
|
204
216
|
not handled. Requires 'requests' parameter to be an array or tuple of
|
205
217
|
MPI_Request objects.
|
218
|
+
|
219
|
+
Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
|
220
|
+
cause invalid pointer dereference and segmentation faults.
|
206
221
|
"""
|
207
222
|
if isinstance(requests, np.ndarray):
|
208
223
|
return _testall_array_impl(requests)
|
@@ -269,6 +284,9 @@ def testany(requests):
|
|
269
284
|
that indicates whether any of requests is completed, and index of request
|
270
285
|
that is guaranteed to be completed. Requires 'requests' parameter to be an
|
271
286
|
array or tuple of MPI_Request objects.
|
287
|
+
|
288
|
+
Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
|
289
|
+
cause invalid pointer dereference and segmentation faults.
|
272
290
|
"""
|
273
291
|
|
274
292
|
if isinstance(requests, np.ndarray):
|
numba_mpi/api/size.py
CHANGED
@@ -15,8 +15,9 @@ _MPI_Comm_size.argtypes = [_MpiComm, ctypes.c_void_p]
|
|
15
15
|
|
16
16
|
@numba.njit()
|
17
17
|
def size():
|
18
|
-
"""wrapper for MPI_Comm_size()"""
|
18
|
+
"""wrapper for MPI_Comm_size(), in case of failure returns 0"""
|
19
19
|
value = np.empty(1, dtype=np.intc)
|
20
20
|
status = _MPI_Comm_size(_mpi_addr(_MPI_Comm_World_ptr), value.ctypes.data)
|
21
|
-
|
21
|
+
if status != 0:
|
22
|
+
value[0] = 0
|
22
23
|
return value[0]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: numba-mpi
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0
|
4
4
|
Summary: Numba @njittable MPI wrappers tested on Linux, macOS and Windows
|
5
5
|
Home-page: https://github.com/numba-mpi/numba-mpi
|
6
6
|
Author: https://github.com/numba-mpi/numba-mpi/graphs/contributors
|
@@ -8,6 +8,7 @@ License: GPL v3
|
|
8
8
|
Project-URL: Tracker, https://github.com/numba-mpi/numba-mpi/issues
|
9
9
|
Project-URL: Documentation, https://numba-mpi.github.io/numba-mpi
|
10
10
|
Project-URL: Source, https://github.com/numba-mpi/numba-mpi
|
11
|
+
Requires-Python: >=3.8
|
11
12
|
Description-Content-Type: text/markdown
|
12
13
|
License-File: LICENSE
|
13
14
|
Requires-Dist: numba
|
@@ -16,6 +17,7 @@ Requires-Dist: mpi4py
|
|
16
17
|
Requires-Dist: psutil
|
17
18
|
Provides-Extra: tests
|
18
19
|
Requires-Dist: pytest <8.0.0 ; extra == 'tests'
|
20
|
+
Requires-Dist: py-pde ; extra == 'tests'
|
19
21
|
|
20
22
|
# <img src="https://raw.githubusercontent.com/numba-mpi/numba-mpi/main/.github/numba_mpi_logo.svg" style="height:50pt" alt="numba-mpi logo"> numba-mpi
|
21
23
|
|
@@ -79,40 +81,46 @@ hello()
|
|
79
81
|
|
80
82
|
### Example comparing numba-mpi vs. mpi4py performance:
|
81
83
|
|
82
|
-
The example below compares Numba
|
83
|
-
The sample code estimates $\pi$ by integration of
|
84
|
+
The example below compares `Numba`+`mpi4py` vs. `Numba`+`numba-mpi` performance.
|
85
|
+
The sample code estimates $\pi$ by numerical integration of $\int_0^1 (4/(1+x^2))dx=\pi$
|
84
86
|
dividing the workload into `n_intervals` handled by separate MPI processes
|
85
|
-
and then obtaining a sum using `allreduce
|
86
|
-
The computation is carried out in a JIT-compiled function and is repeated
|
87
|
-
`N_TIMES
|
88
|
-
inside of the JIT-compiled block for mpi4py and numba-mpi
|
87
|
+
and then obtaining a sum using `allreduce` (see, e.g., analogous [Matlab docs example](https://www.mathworks.com/help/parallel-computing/numerical-estimation-of-pi-using-message-passing.html)).
|
88
|
+
The computation is carried out in a JIT-compiled function `get_pi_part()` and is repeated
|
89
|
+
`N_TIMES`. The repetitions and the MPI-handled reduction are done outside or
|
90
|
+
inside of the JIT-compiled block for `mpi4py` and `numba-mpi`, respectively.
|
89
91
|
Timing is repeated `N_REPEAT` times and the minimum time is reported.
|
90
|
-
The generated plot shown below depicts the speedup obtained by replacing mpi4py
|
91
|
-
with numba_mpi as a function of `n_intervals` - the
|
92
|
-
|
93
|
-
|
92
|
+
The generated plot shown below depicts the speedup obtained by replacing `mpi4py`
|
93
|
+
with `numba_mpi`, plotted as a function of `N_TIMES / n_intervals` - the number of MPI calls per
|
94
|
+
interval. The speedup, which stems from avoiding roundtrips between JIT-compiled
|
95
|
+
and Python code is significant (150%-300%) in all cases. The more often communication
|
96
|
+
is needed (smaller `n_intervals`), the larger the measured speedup. Note that nothing
|
97
|
+
in the actual number crunching (within the `get_pi_part()` function) or in the employed communication logic
|
98
|
+
(handled by the same MPI library) differs between the `mpi4py` or `numba-mpi` solutions.
|
99
|
+
These are the overhead of `mpi4py` higher-level abstractions and the overhead of
|
100
|
+
repeatedly entering and leaving the JIT-compiled block if using `mpi4py`, which can be
|
101
|
+
eliminated by using `numba-mpi`, and which the measured differences in execution time
|
102
|
+
stem from.
|
94
103
|
```python
|
95
104
|
import timeit, mpi4py, numba, numpy as np, numba_mpi
|
96
105
|
|
97
106
|
N_TIMES = 10000
|
98
|
-
N_REPEAT = 10
|
99
107
|
RTOL = 1e-3
|
100
108
|
|
101
|
-
@numba.
|
102
|
-
def get_pi_part(
|
109
|
+
@numba.jit
|
110
|
+
def get_pi_part(n_intervals=1000000, rank=0, size=1):
|
103
111
|
h = 1 / n_intervals
|
104
112
|
partial_sum = 0.0
|
105
113
|
for i in range(rank + 1, n_intervals, size):
|
106
114
|
x = h * (i - 0.5)
|
107
115
|
partial_sum += 4 / (1 + x**2)
|
108
|
-
|
116
|
+
return h * partial_sum
|
109
117
|
|
110
|
-
@numba.
|
118
|
+
@numba.jit
|
111
119
|
def pi_numba_mpi(n_intervals):
|
112
120
|
pi = np.array([0.])
|
113
121
|
part = np.empty_like(pi)
|
114
122
|
for _ in range(N_TIMES):
|
115
|
-
get_pi_part(
|
123
|
+
part[0] = get_pi_part(n_intervals, numba_mpi.rank(), numba_mpi.size())
|
116
124
|
numba_mpi.allreduce(part, pi, numba_mpi.Operator.SUM)
|
117
125
|
assert abs(pi[0] - np.pi) / np.pi < RTOL
|
118
126
|
|
@@ -120,30 +128,30 @@ def pi_mpi4py(n_intervals):
|
|
120
128
|
pi = np.array([0.])
|
121
129
|
part = np.empty_like(pi)
|
122
130
|
for _ in range(N_TIMES):
|
123
|
-
get_pi_part(
|
131
|
+
part[0] = get_pi_part(n_intervals, mpi4py.MPI.COMM_WORLD.rank, mpi4py.MPI.COMM_WORLD.size)
|
124
132
|
mpi4py.MPI.COMM_WORLD.Allreduce(part, (pi, mpi4py.MPI.DOUBLE), op=mpi4py.MPI.SUM)
|
125
133
|
assert abs(pi[0] - np.pi) / np.pi < RTOL
|
126
134
|
|
127
|
-
plot_x = [
|
135
|
+
plot_x = [x for x in range(1, 11)]
|
128
136
|
plot_y = {'numba_mpi': [], 'mpi4py': []}
|
129
|
-
for
|
137
|
+
for x in plot_x:
|
130
138
|
for impl in plot_y:
|
131
139
|
plot_y[impl].append(min(timeit.repeat(
|
132
|
-
f"pi_{impl}({
|
140
|
+
f"pi_{impl}(n_intervals={N_TIMES // x})",
|
133
141
|
globals=locals(),
|
134
142
|
number=1,
|
135
|
-
repeat=
|
143
|
+
repeat=10
|
136
144
|
)))
|
137
145
|
|
138
146
|
if numba_mpi.rank() == 0:
|
139
147
|
from matplotlib import pyplot
|
140
148
|
pyplot.figure(figsize=(8.3, 3.5), tight_layout=True)
|
141
149
|
pyplot.plot(plot_x, np.array(plot_y['mpi4py'])/np.array(plot_y['numba_mpi']), marker='o')
|
142
|
-
pyplot.xlabel('
|
143
|
-
pyplot.ylabel('wall
|
150
|
+
pyplot.xlabel('number of MPI calls per interval')
|
151
|
+
pyplot.ylabel('mpi4py/numba-mpi wall-time ratio')
|
144
152
|
pyplot.title(f'mpiexec -np {numba_mpi.size()}')
|
145
153
|
pyplot.grid()
|
146
|
-
pyplot.savefig('readme_plot.
|
154
|
+
pyplot.savefig('readme_plot.svg')
|
147
155
|
```
|
148
156
|
|
149
157
|

|
@@ -161,6 +169,7 @@ if numba_mpi.rank() == 0:
|
|
161
169
|
- Intel MPI: https://intel.com/content/www/us/en/developer/tools/oneapi/mpi-library-documentation.html
|
162
170
|
- MPI bindings:
|
163
171
|
- Python: https://mpi4py.readthedocs.io
|
172
|
+
- Python/JAX: https://mpi4jax.readthedocs.io
|
164
173
|
- Julia: https://juliaparallel.org/MPI.jl
|
165
174
|
- Rust: https://docs.rs/mpi
|
166
175
|
- C++: https://boost.org/doc/html/mpi.html
|
@@ -168,5 +177,11 @@ if numba_mpi.rank() == 0:
|
|
168
177
|
|
169
178
|
### Acknowledgements:
|
170
179
|
|
171
|
-
|
180
|
+
We thank [all contributors](https://github.com/numba-mpi/numba-mpi/graphs/contributors) and users who reported feedback to the project
|
181
|
+
through [GitHub issues](https://github.com/numba-mpi/numba-mpi/issues).
|
182
|
+
|
183
|
+
Development of numba-mpi has been supported by the [Polish National Science Centre](https://ncn.gov.pl/en) (grant no. 2020/39/D/ST10/01220),
|
184
|
+
the [Max Planck Society](https://www.mpg.de/en) and the [European Union](https://erc.europa.eu/) (ERC, EmulSim, 101044662).
|
185
|
+
We further acknowledge Poland’s high-performance computing infrastructure [PLGrid](https://plgrid.pl) (HPC Centers: [ACK Cyfronet AGH](https://www.cyfronet.pl/en))
|
186
|
+
for providing computer facilities and support within computational grant no. PLG/2023/016369.
|
172
187
|
|
@@ -1,23 +1,23 @@
|
|
1
|
-
numba_mpi/__init__.py,sha256=
|
1
|
+
numba_mpi/__init__.py,sha256=_DsPxgrR80KiJTLqzZRNMVsK_TUJt7EfNFy_MWvBOWk,754
|
2
2
|
numba_mpi/common.py,sha256=2JJoUrd3Qa6GIFk6Zlt2NudS7ZurPxpVwBLRGSkCg5E,2266
|
3
3
|
numba_mpi/utils.py,sha256=gfGFuzmGgs4FnBqzPI91ftAq4UHgXb_HFkvxrVWkcIo,1866
|
4
4
|
numba_mpi/api/__init__.py,sha256=Zj5df4lWeGpxAXV8jKGFnmtLBQ50HwNU8dPf-os06X8,51
|
5
5
|
numba_mpi/api/allreduce.py,sha256=szS7YzrQ5a90LlKDiefyxVEiAXnsHbni5g2M1of0TmE,3261
|
6
6
|
numba_mpi/api/barrier.py,sha256=9VSJPBC4V0H-xo47uzlT8Hp4xmQhTNLxg5bAcX3Y03g,461
|
7
7
|
numba_mpi/api/bcast.py,sha256=8SsYFj9qRjx4l3Q9367JMZd469izlf4if1qusuYILqU,1843
|
8
|
-
numba_mpi/api/initialized.py,sha256=
|
8
|
+
numba_mpi/api/initialized.py,sha256=oKXpZzHeips0VU1U9wEF_578kOrfKb_IEXxD_aQ2c2E,497
|
9
9
|
numba_mpi/api/irecv.py,sha256=r4JvE7JJPN_hFpS79-idYL3dtp8tR0y0VoIRuHJ29lM,1120
|
10
10
|
numba_mpi/api/isend.py,sha256=2mpP4FhMk0GrikjDluKwRnpVywdLj9RD4HVVEMSj9A8,1080
|
11
11
|
numba_mpi/api/operator.py,sha256=3VTPZAdOP05bxdqt3lA0hRDICM-iaBMa4m-krEdO91s,342
|
12
|
-
numba_mpi/api/rank.py,sha256=
|
12
|
+
numba_mpi/api/rank.py,sha256=1xZvHUclsK20aMtK07JzXYxW5F4Er8HZgOmcf495sjo,597
|
13
13
|
numba_mpi/api/recv.py,sha256=YsYK-q7PNfi3zt0ftVddM363VsnJ4XFfmgMq8aeCr-o,1260
|
14
|
-
numba_mpi/api/requests.py,sha256=
|
14
|
+
numba_mpi/api/requests.py,sha256=5EhgFyeQCGP8YclSPwxP95c2AhBo19CLlShK0TxCR2U,9114
|
15
15
|
numba_mpi/api/scatter_gather.py,sha256=goZn4BxMKakWQHjfXIOdjzK3DJ-lTeaiQQwgnyQeZ_s,2410
|
16
16
|
numba_mpi/api/send.py,sha256=jn1hPw0YHBHOaeJop_ZbjaBChaqgfw3nM1xGhW9sabI,909
|
17
|
-
numba_mpi/api/size.py,sha256
|
17
|
+
numba_mpi/api/size.py,sha256=-RX-FtcIH4qDxCoGOhZjjgEWXpytt79vsH0YX9dtZuY,597
|
18
18
|
numba_mpi/api/wtime.py,sha256=qrTqlefW7K7hqnAQKkGYm8kgdiRGuSAGiHmPcTrhLzE,279
|
19
|
-
numba_mpi-0.
|
20
|
-
numba_mpi-0.
|
21
|
-
numba_mpi-0.
|
22
|
-
numba_mpi-0.
|
23
|
-
numba_mpi-0.
|
19
|
+
numba_mpi-1.0.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
20
|
+
numba_mpi-1.0.0.dist-info/METADATA,sha256=X9KbPWSmXB953BPIUua1e9ZVeXP2XeDM9Ppp0ailOKI,9687
|
21
|
+
numba_mpi-1.0.0.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
|
22
|
+
numba_mpi-1.0.0.dist-info/top_level.txt,sha256=yb_ktLmrfuhOZS0rjS81FFNC-gK_4c19WbLG2ViP73g,10
|
23
|
+
numba_mpi-1.0.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|