numba-mpi 0.41__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
numba_mpi/__init__.py CHANGED
@@ -1,4 +1,6 @@
1
- """ Numba @njittable MPI wrappers tested on Linux, macOS and Windows """
1
+ """
2
+ .. include::../README.md
3
+ """
2
4
 
3
5
  from importlib.metadata import PackageNotFoundError, version
4
6
 
@@ -18,6 +20,8 @@ from .api.size import size
18
20
  from .api.wtime import wtime
19
21
  from .common import RequestType
20
22
 
23
+ SUCCESS = 0
24
+
21
25
  try:
22
26
  __version__ = version(__name__)
23
27
  except PackageNotFoundError:
@@ -17,5 +17,6 @@ def initialized():
17
17
  """wrapper for MPI_Initialized()"""
18
18
  flag = np.empty((1,), dtype=np.intc)
19
19
  status = _MPI_Initialized(flag.ctypes.data)
20
- assert status == 0
20
+ if status != 0:
21
+ return False
21
22
  return bool(flag[0])
numba_mpi/api/rank.py CHANGED
@@ -15,8 +15,9 @@ _MPI_Comm_rank.argtypes = [_MpiComm, ctypes.c_void_p]
15
15
 
16
16
  @numba.njit()
17
17
  def rank():
18
- """wrapper for MPI_Comm_rank()"""
18
+ """wrapper for MPI_Comm_rank(), in case of failure returns 0"""
19
19
  value = np.empty(1, dtype=np.intc)
20
20
  status = _MPI_Comm_rank(_mpi_addr(_MPI_Comm_World_ptr), value.ctypes.data)
21
- assert status == 0
21
+ if status != 0:
22
+ value[0] = 0
22
23
  return value[0]
numba_mpi/api/requests.py CHANGED
@@ -34,6 +34,9 @@ def wait(request):
34
34
  """Wrapper for MPI_Wait. Returns integer status code (0 == MPI_SUCCESS).
35
35
  Status is currently not handled. Requires 'request' parameter to be a
36
36
  c-style pointer to MPI_Request (such as returned by 'isend'/'irecv').
37
+
38
+ Uninitialized contents of 'request' (e.g., from numpy.empty()) may
39
+ cause invalid pointer dereference and segmentation faults.
37
40
  """
38
41
 
39
42
  status_buffer = create_status_buffer()
@@ -64,6 +67,9 @@ def waitall(requests):
64
67
  """Wrapper for MPI_Waitall. Returns integer status code (0 == MPI_SUCCESS).
65
68
  Status is currently not handled. Requires 'requests' parameter to be an
66
69
  array or tuple of MPI_Request objects.
70
+
71
+ Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
72
+ cause invalid pointer dereference and segmentation faults.
67
73
  """
68
74
  if isinstance(requests, np.ndarray):
69
75
  return _waitall_array_impl(requests)
@@ -123,6 +129,9 @@ def waitany(requests):
123
129
  status; second - the index of request that was completed. Status is
124
130
  currently not handled. Requires 'requests' parameter to be an array
125
131
  or tuple of MPI_Request objects.
132
+
133
+ Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
134
+ cause invalid pointer dereference and segmentation faults.
126
135
  """
127
136
 
128
137
  if isinstance(requests, np.ndarray):
@@ -167,6 +176,9 @@ def test(request):
167
176
  flag that indicates whether given request is completed. Status is currently
168
177
  not handled. Requires 'request' parameter to be a c-style pointer to
169
178
  MPI_Request (such as returned by 'isend'/'irecv').
179
+
180
+ Uninitialized contents of 'request' (e.g., from numpy.empty()) may
181
+ cause invalid pointer dereference and segmentation faults.
170
182
  """
171
183
 
172
184
  status_buffer = create_status_buffer()
@@ -203,6 +215,9 @@ def testall(requests):
203
215
  flag that indicates whether given request is completed. Status is currently
204
216
  not handled. Requires 'requests' parameter to be an array or tuple of
205
217
  MPI_Request objects.
218
+
219
+ Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
220
+ cause invalid pointer dereference and segmentation faults.
206
221
  """
207
222
  if isinstance(requests, np.ndarray):
208
223
  return _testall_array_impl(requests)
@@ -269,6 +284,9 @@ def testany(requests):
269
284
  that indicates whether any of requests is completed, and index of request
270
285
  that is guaranteed to be completed. Requires 'requests' parameter to be an
271
286
  array or tuple of MPI_Request objects.
287
+
288
+ Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
289
+ cause invalid pointer dereference and segmentation faults.
272
290
  """
273
291
 
274
292
  if isinstance(requests, np.ndarray):
numba_mpi/api/size.py CHANGED
@@ -15,8 +15,9 @@ _MPI_Comm_size.argtypes = [_MpiComm, ctypes.c_void_p]
15
15
 
16
16
  @numba.njit()
17
17
  def size():
18
- """wrapper for MPI_Comm_size()"""
18
+ """wrapper for MPI_Comm_size(), in case of failure returns 0"""
19
19
  value = np.empty(1, dtype=np.intc)
20
20
  status = _MPI_Comm_size(_mpi_addr(_MPI_Comm_World_ptr), value.ctypes.data)
21
- assert status == 0
21
+ if status != 0:
22
+ value[0] = 0
22
23
  return value[0]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numba-mpi
3
- Version: 0.41
3
+ Version: 1.0.0
4
4
  Summary: Numba @njittable MPI wrappers tested on Linux, macOS and Windows
5
5
  Home-page: https://github.com/numba-mpi/numba-mpi
6
6
  Author: https://github.com/numba-mpi/numba-mpi/graphs/contributors
@@ -8,6 +8,7 @@ License: GPL v3
8
8
  Project-URL: Tracker, https://github.com/numba-mpi/numba-mpi/issues
9
9
  Project-URL: Documentation, https://numba-mpi.github.io/numba-mpi
10
10
  Project-URL: Source, https://github.com/numba-mpi/numba-mpi
11
+ Requires-Python: >=3.8
11
12
  Description-Content-Type: text/markdown
12
13
  License-File: LICENSE
13
14
  Requires-Dist: numba
@@ -16,6 +17,7 @@ Requires-Dist: mpi4py
16
17
  Requires-Dist: psutil
17
18
  Provides-Extra: tests
18
19
  Requires-Dist: pytest <8.0.0 ; extra == 'tests'
20
+ Requires-Dist: py-pde ; extra == 'tests'
19
21
 
20
22
  # <img src="https://raw.githubusercontent.com/numba-mpi/numba-mpi/main/.github/numba_mpi_logo.svg" style="height:50pt" alt="numba-mpi logo"> numba-mpi
21
23
 
@@ -79,40 +81,46 @@ hello()
79
81
 
80
82
  ### Example comparing numba-mpi vs. mpi4py performance:
81
83
 
82
- The example below compares Numba + mpi4py vs. Numba + numba-mpi performance.
83
- The sample code estimates $\pi$ by integration of $4/(1+x^2)$ between 0 and 1
84
+ The example below compares `Numba`+`mpi4py` vs. `Numba`+`numba-mpi` performance.
85
+ The sample code estimates $\pi$ by numerical integration of $\int_0^1 (4/(1+x^2))dx=\pi$
84
86
  dividing the workload into `n_intervals` handled by separate MPI processes
85
- and then obtaining a sum using `allreduce`.
86
- The computation is carried out in a JIT-compiled function and is repeated
87
- `N_TIMES`, the repetitions and the MPI-handled reduction are done outside or
88
- inside of the JIT-compiled block for mpi4py and numba-mpi, respectively.
87
+ and then obtaining a sum using `allreduce` (see, e.g., analogous [Matlab docs example](https://www.mathworks.com/help/parallel-computing/numerical-estimation-of-pi-using-message-passing.html)).
88
+ The computation is carried out in a JIT-compiled function `get_pi_part()` and is repeated
89
+ `N_TIMES`. The repetitions and the MPI-handled reduction are done outside or
90
+ inside of the JIT-compiled block for `mpi4py` and `numba-mpi`, respectively.
89
91
  Timing is repeated `N_REPEAT` times and the minimum time is reported.
90
- The generated plot shown below depicts the speedup obtained by replacing mpi4py
91
- with numba_mpi as a function of `n_intervals` - the more often communication
92
- is needed (smaller `n_intervals`), the larger the expected speedup.
93
-
92
+ The generated plot shown below depicts the speedup obtained by replacing `mpi4py`
93
+ with `numba_mpi`, plotted as a function of `N_TIMES / n_intervals` - the number of MPI calls per
94
+ interval. The speedup, which stems from avoiding roundtrips between JIT-compiled
95
+ and Python code is significant (150%-300%) in all cases. The more often communication
96
+ is needed (smaller `n_intervals`), the larger the measured speedup. Note that nothing
97
+ in the actual number crunching (within the `get_pi_part()` function) or in the employed communication logic
98
+ (handled by the same MPI library) differs between the `mpi4py` or `numba-mpi` solutions.
99
+ These are the overhead of `mpi4py` higher-level abstractions and the overhead of
100
+ repeatedly entering and leaving the JIT-compiled block if using `mpi4py`, which can be
101
+ eliminated by using `numba-mpi`, and which the measured differences in execution time
102
+ stem from.
94
103
  ```python
95
104
  import timeit, mpi4py, numba, numpy as np, numba_mpi
96
105
 
97
106
  N_TIMES = 10000
98
- N_REPEAT = 10
99
107
  RTOL = 1e-3
100
108
 
101
- @numba.njit
102
- def get_pi_part(out, n_intervals, rank, size):
109
+ @numba.jit
110
+ def get_pi_part(n_intervals=1000000, rank=0, size=1):
103
111
  h = 1 / n_intervals
104
112
  partial_sum = 0.0
105
113
  for i in range(rank + 1, n_intervals, size):
106
114
  x = h * (i - 0.5)
107
115
  partial_sum += 4 / (1 + x**2)
108
- out[0] = h * partial_sum
116
+ return h * partial_sum
109
117
 
110
- @numba.njit
118
+ @numba.jit
111
119
  def pi_numba_mpi(n_intervals):
112
120
  pi = np.array([0.])
113
121
  part = np.empty_like(pi)
114
122
  for _ in range(N_TIMES):
115
- get_pi_part(part, n_intervals, numba_mpi.rank(), numba_mpi.size())
123
+ part[0] = get_pi_part(n_intervals, numba_mpi.rank(), numba_mpi.size())
116
124
  numba_mpi.allreduce(part, pi, numba_mpi.Operator.SUM)
117
125
  assert abs(pi[0] - np.pi) / np.pi < RTOL
118
126
 
@@ -120,30 +128,30 @@ def pi_mpi4py(n_intervals):
120
128
  pi = np.array([0.])
121
129
  part = np.empty_like(pi)
122
130
  for _ in range(N_TIMES):
123
- get_pi_part(part, n_intervals, mpi4py.MPI.COMM_WORLD.rank, mpi4py.MPI.COMM_WORLD.size)
131
+ part[0] = get_pi_part(n_intervals, mpi4py.MPI.COMM_WORLD.rank, mpi4py.MPI.COMM_WORLD.size)
124
132
  mpi4py.MPI.COMM_WORLD.Allreduce(part, (pi, mpi4py.MPI.DOUBLE), op=mpi4py.MPI.SUM)
125
133
  assert abs(pi[0] - np.pi) / np.pi < RTOL
126
134
 
127
- plot_x = [1000 * k for k in range(1, 11)]
135
+ plot_x = [x for x in range(1, 11)]
128
136
  plot_y = {'numba_mpi': [], 'mpi4py': []}
129
- for n_intervals in plot_x:
137
+ for x in plot_x:
130
138
  for impl in plot_y:
131
139
  plot_y[impl].append(min(timeit.repeat(
132
- f"pi_{impl}({n_intervals})",
140
+ f"pi_{impl}(n_intervals={N_TIMES // x})",
133
141
  globals=locals(),
134
142
  number=1,
135
- repeat=N_REPEAT
143
+ repeat=10
136
144
  )))
137
145
 
138
146
  if numba_mpi.rank() == 0:
139
147
  from matplotlib import pyplot
140
148
  pyplot.figure(figsize=(8.3, 3.5), tight_layout=True)
141
149
  pyplot.plot(plot_x, np.array(plot_y['mpi4py'])/np.array(plot_y['numba_mpi']), marker='o')
142
- pyplot.xlabel('n_intervals (workload in between communication)')
143
- pyplot.ylabel('wall time ratio (mpi4py / numba_mpi)')
150
+ pyplot.xlabel('number of MPI calls per interval')
151
+ pyplot.ylabel('mpi4py/numba-mpi wall-time ratio')
144
152
  pyplot.title(f'mpiexec -np {numba_mpi.size()}')
145
153
  pyplot.grid()
146
- pyplot.savefig('readme_plot.png')
154
+ pyplot.savefig('readme_plot.svg')
147
155
  ```
148
156
 
149
157
  ![plot](https://github.com/numba-mpi/numba-mpi/releases/download/tip/readme_plot.png)
@@ -161,6 +169,7 @@ if numba_mpi.rank() == 0:
161
169
  - Intel MPI: https://intel.com/content/www/us/en/developer/tools/oneapi/mpi-library-documentation.html
162
170
  - MPI bindings:
163
171
  - Python: https://mpi4py.readthedocs.io
172
+ - Python/JAX: https://mpi4jax.readthedocs.io
164
173
  - Julia: https://juliaparallel.org/MPI.jl
165
174
  - Rust: https://docs.rs/mpi
166
175
  - C++: https://boost.org/doc/html/mpi.html
@@ -168,5 +177,11 @@ if numba_mpi.rank() == 0:
168
177
 
169
178
  ### Acknowledgements:
170
179
 
171
- Development of numba-mpi has been supported by the [Polish National Science Centre](https://ncn.gov.pl/en) (grant no. 2020/39/D/ST10/01220).
180
+ We thank [all contributors](https://github.com/numba-mpi/numba-mpi/graphs/contributors) and users who reported feedback to the project
181
+ through [GitHub issues](https://github.com/numba-mpi/numba-mpi/issues).
182
+
183
+ Development of numba-mpi has been supported by the [Polish National Science Centre](https://ncn.gov.pl/en) (grant no. 2020/39/D/ST10/01220),
184
+ the [Max Planck Society](https://www.mpg.de/en) and the [European Union](https://erc.europa.eu/) (ERC, EmulSim, 101044662).
185
+ We further acknowledge Poland’s high-performance computing infrastructure [PLGrid](https://plgrid.pl) (HPC Centers: [ACK Cyfronet AGH](https://www.cyfronet.pl/en))
186
+ for providing computer facilities and support within computational grant no. PLG/2023/016369.
172
187
 
@@ -1,23 +1,23 @@
1
- numba_mpi/__init__.py,sha256=mpW16BzokTCNGHClW6K4qGvMIRqPrw0K2OHNSCSml5Y,781
1
+ numba_mpi/__init__.py,sha256=_DsPxgrR80KiJTLqzZRNMVsK_TUJt7EfNFy_MWvBOWk,754
2
2
  numba_mpi/common.py,sha256=2JJoUrd3Qa6GIFk6Zlt2NudS7ZurPxpVwBLRGSkCg5E,2266
3
3
  numba_mpi/utils.py,sha256=gfGFuzmGgs4FnBqzPI91ftAq4UHgXb_HFkvxrVWkcIo,1866
4
4
  numba_mpi/api/__init__.py,sha256=Zj5df4lWeGpxAXV8jKGFnmtLBQ50HwNU8dPf-os06X8,51
5
5
  numba_mpi/api/allreduce.py,sha256=szS7YzrQ5a90LlKDiefyxVEiAXnsHbni5g2M1of0TmE,3261
6
6
  numba_mpi/api/barrier.py,sha256=9VSJPBC4V0H-xo47uzlT8Hp4xmQhTNLxg5bAcX3Y03g,461
7
7
  numba_mpi/api/bcast.py,sha256=8SsYFj9qRjx4l3Q9367JMZd469izlf4if1qusuYILqU,1843
8
- numba_mpi/api/initialized.py,sha256=fFmhOGl2GkwS9UvPetXy4YrgE4xhMVsyx7Ac8QP0530,479
8
+ numba_mpi/api/initialized.py,sha256=oKXpZzHeips0VU1U9wEF_578kOrfKb_IEXxD_aQ2c2E,497
9
9
  numba_mpi/api/irecv.py,sha256=r4JvE7JJPN_hFpS79-idYL3dtp8tR0y0VoIRuHJ29lM,1120
10
10
  numba_mpi/api/isend.py,sha256=2mpP4FhMk0GrikjDluKwRnpVywdLj9RD4HVVEMSj9A8,1080
11
11
  numba_mpi/api/operator.py,sha256=3VTPZAdOP05bxdqt3lA0hRDICM-iaBMa4m-krEdO91s,342
12
- numba_mpi/api/rank.py,sha256=pqayxw-5QDJ7VJ3gKrvuu1G0sBlYEZt1juhnaDi_JD8,549
12
+ numba_mpi/api/rank.py,sha256=1xZvHUclsK20aMtK07JzXYxW5F4Er8HZgOmcf495sjo,597
13
13
  numba_mpi/api/recv.py,sha256=YsYK-q7PNfi3zt0ftVddM363VsnJ4XFfmgMq8aeCr-o,1260
14
- numba_mpi/api/requests.py,sha256=oDe85ZQ4xFbHWlNdrDhqVLvCkcQHs_9upUf3ms8x58k,8300
14
+ numba_mpi/api/requests.py,sha256=5EhgFyeQCGP8YclSPwxP95c2AhBo19CLlShK0TxCR2U,9114
15
15
  numba_mpi/api/scatter_gather.py,sha256=goZn4BxMKakWQHjfXIOdjzK3DJ-lTeaiQQwgnyQeZ_s,2410
16
16
  numba_mpi/api/send.py,sha256=jn1hPw0YHBHOaeJop_ZbjaBChaqgfw3nM1xGhW9sabI,909
17
- numba_mpi/api/size.py,sha256=fYLeUrygvz_XcxIDsLiZlMtS-aiWfp58Zi7aIOAgaj8,549
17
+ numba_mpi/api/size.py,sha256=-RX-FtcIH4qDxCoGOhZjjgEWXpytt79vsH0YX9dtZuY,597
18
18
  numba_mpi/api/wtime.py,sha256=qrTqlefW7K7hqnAQKkGYm8kgdiRGuSAGiHmPcTrhLzE,279
19
- numba_mpi-0.41.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
20
- numba_mpi-0.41.dist-info/METADATA,sha256=4POxiWzQaU7S3NeU8b_GKcog4H6OBjvvV62h-nPHj5I,8147
21
- numba_mpi-0.41.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
22
- numba_mpi-0.41.dist-info/top_level.txt,sha256=yb_ktLmrfuhOZS0rjS81FFNC-gK_4c19WbLG2ViP73g,10
23
- numba_mpi-0.41.dist-info/RECORD,,
19
+ numba_mpi-1.0.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
20
+ numba_mpi-1.0.0.dist-info/METADATA,sha256=X9KbPWSmXB953BPIUua1e9ZVeXP2XeDM9Ppp0ailOKI,9687
21
+ numba_mpi-1.0.0.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
22
+ numba_mpi-1.0.0.dist-info/top_level.txt,sha256=yb_ktLmrfuhOZS0rjS81FFNC-gK_4c19WbLG2ViP73g,10
23
+ numba_mpi-1.0.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (70.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5