numba-mpi 0.41__tar.gz → 0.43__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {numba_mpi-0.41 → numba_mpi-0.43}/.github/workflows/readme_snippets.yml +3 -1
  2. {numba_mpi-0.41 → numba_mpi-0.43}/.github/workflows/tests+pypi.yml +2 -1
  3. numba_mpi-0.43/CODE_OF_CONDUCT.md +47 -0
  4. {numba_mpi-0.41 → numba_mpi-0.43}/PKG-INFO +32 -25
  5. {numba_mpi-0.41 → numba_mpi-0.43}/README.md +30 -24
  6. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/__init__.py +3 -1
  7. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/requests.py +18 -0
  8. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi.egg-info/PKG-INFO +32 -25
  9. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi.egg-info/SOURCES.txt +1 -0
  10. {numba_mpi-0.41 → numba_mpi-0.43}/setup.py +1 -0
  11. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_isend_irecv.py +23 -5
  12. {numba_mpi-0.41 → numba_mpi-0.43}/.github/numba_mpi_logo.svg +0 -0
  13. {numba_mpi-0.41 → numba_mpi-0.43}/.github/workflows/stale.yml +0 -0
  14. {numba_mpi-0.41 → numba_mpi-0.43}/.gitignore +0 -0
  15. {numba_mpi-0.41 → numba_mpi-0.43}/.pre-commit-config.yaml +0 -0
  16. {numba_mpi-0.41 → numba_mpi-0.43}/.vscode/settings.json +0 -0
  17. {numba_mpi-0.41 → numba_mpi-0.43}/.zenodo.json +0 -0
  18. {numba_mpi-0.41 → numba_mpi-0.43}/LICENSE +0 -0
  19. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/__init__.py +0 -0
  20. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/allreduce.py +0 -0
  21. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/barrier.py +0 -0
  22. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/bcast.py +0 -0
  23. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/initialized.py +0 -0
  24. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/irecv.py +0 -0
  25. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/isend.py +0 -0
  26. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/operator.py +0 -0
  27. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/rank.py +0 -0
  28. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/recv.py +0 -0
  29. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/scatter_gather.py +0 -0
  30. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/send.py +0 -0
  31. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/size.py +0 -0
  32. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/api/wtime.py +0 -0
  33. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/common.py +0 -0
  34. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi/utils.py +0 -0
  35. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi.egg-info/dependency_links.txt +0 -0
  36. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi.egg-info/requires.txt +0 -0
  37. {numba_mpi-0.41 → numba_mpi-0.43}/numba_mpi.egg-info/top_level.txt +0 -0
  38. {numba_mpi-0.41 → numba_mpi-0.43}/setup.cfg +0 -0
  39. {numba_mpi-0.41 → numba_mpi-0.43}/tests/__init__.py +0 -0
  40. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_allreduce.py +0 -0
  41. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_barrier.py +0 -0
  42. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_bcast.py +0 -0
  43. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_init.py +0 -0
  44. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_rank.py +0 -0
  45. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_scatter_gather.py +0 -0
  46. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_send_recv.py +0 -0
  47. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_size.py +0 -0
  48. {numba_mpi-0.41 → numba_mpi-0.43}/tests/api/test_wtime.py +0 -0
  49. {numba_mpi-0.41 → numba_mpi-0.43}/tests/common.py +0 -0
  50. {numba_mpi-0.41 → numba_mpi-0.43}/tests/test_version.py +0 -0
  51. {numba_mpi-0.41 → numba_mpi-0.43}/tests/utils.py +0 -0
@@ -23,12 +23,14 @@ jobs:
23
23
  - uses: mpi4py/setup-mpi@v1
24
24
  - run: pip install -e .
25
25
  - run: pip install pytest-codeblocks pytest matplotlib
26
+ - run: sudo apt-get install librsvg2-bin
26
27
  - run: python -c "import pytest_codeblocks; code=pytest_codeblocks.extract_from_file('README.md'); f=open('readme.py', 'w'); f.writelines(block.code for block in code if block.syntax=='python'); f.close()"
27
28
  - run: cat -n readme.py
28
29
  - run: mpiexec -n 4 python -We readme.py
30
+ - run: rsvg-convert readme_plot.svg > readme_plot.png
29
31
  - uses: actions/upload-artifact@v3
30
32
  with:
31
- path: readme_plot.png
33
+ path: readme_plot.*
32
34
  name: readme_plot
33
35
  - if: github.ref == 'refs/heads/main'
34
36
  uses: eine/tip@master
@@ -88,6 +88,7 @@ jobs:
88
88
  needs: [pylint, precommit, pdoc, zenodo_json]
89
89
  strategy:
90
90
  matrix:
91
+ mpi-np: [2, 3]
91
92
  platform: [ubuntu-latest, macos-12, windows-latest]
92
93
  python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
93
94
  mpi: [ 'mpich', 'openmpi', 'msmpi', 'intelmpi']
@@ -130,7 +131,7 @@ jobs:
130
131
  - run: pip install -e .[tests]
131
132
  - run: python -We -c "import mpi4py"
132
133
  - run: python -We -c "import numba_mpi"
133
- - run: mpiexec -n 2 pytest --durations=10 -p no:unraisableexception -We
134
+ - run: mpiexec -n ${{ matrix.mpi-np }} pytest --durations=10 -p no:unraisableexception -We
134
135
 
135
136
  dist:
136
137
  runs-on: ubuntu-latest
@@ -0,0 +1,47 @@
1
+ # Code of conduct for the numba-mpi open-source project
2
+
3
+ As [contributors and maintainers of this project](https://github.com/orgs/numba-mpi/people),
4
+ and in the interest of fostering an open and welcoming community, we pledge to respect all
5
+ people who contribute through reporting issues, posting feature requests, updating
6
+ documentation, submitting pull requests or patches, and other activities.
7
+
8
+ We are committed to making participation in this project a harassment-free experience for
9
+ everyone, regardless of level of technical experience and regardless of any non-technical
10
+ personal characteristic or identity trait (gender, religion, physicality, age, ethnicity,
11
+ essentially **any**).
12
+
13
+ Nurturing open and unemotional code reviews and community discussions, we aim at ensuring
14
+ fruitful and enriching collaboration experience and maintaining quality engineering
15
+ standards. This Code of Conduct applies both within project spaces and in public spaces
16
+ when an individual is representing the project or its community.
17
+
18
+ Examples of unacceptable behavior by participants include:
19
+
20
+ * Breaching project security (e.g., granting GitHub access rights without ensuring
21
+ collaborators' consent);
22
+ * Breaching collaborators' privacy (e.g., publishing other's private information without
23
+ permission);
24
+ * Force developments (e.g., merging or releasing against expressed collaborators' comments
25
+ against doing it);
26
+ * Any form of harassing language of exclusion;
27
+ * Other unethical or unprofessional conduct (if in doubt, ask!).
28
+
29
+ Project maintainers have the right and responsibility to remove, edit, or reject comments,
30
+ commits, code, issues, and other contributions that are not aligned to this Code of Conduct,
31
+ or to ban temporarily or permanently any contributor for other behaviors that they deem
32
+ inappropriate, threatening, offensive, or harmful. By adopting this Code of Conduct, project
33
+ maintainers commit themselves to fairly, consistently and collaboratively applying these
34
+ principles to every aspect of managing this project.
35
+
36
+ Reporting actions that are violating the hereby Code of Conduct can be done publicly on the
37
+ project GitHub space, or if needed, can be reported directly to any of the
38
+ [project maintainers listed on GitHub](https://github.com/orgs/numba-mpi/people)
39
+ (as of the time of writing: Sylwester Arabas, David Zwicker, Kacper Derlatka, et al.).
40
+ Maintainers are obligated to maintain confidentiality with regard to the reporter of a
41
+ privately reported incident.
42
+
43
+ Please note that, as of time of writing, the entirety of the project team is engaged in the
44
+ development purely on voluntary basis.
45
+
46
+ --
47
+ This Code of Conduct was inspired by [Numba Code of Conduct](https://github.com/numba/numba-governance/blob/accepted/code-of-conduct.md).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numba-mpi
3
- Version: 0.41
3
+ Version: 0.43
4
4
  Summary: Numba @njittable MPI wrappers tested on Linux, macOS and Windows
5
5
  Home-page: https://github.com/numba-mpi/numba-mpi
6
6
  Author: https://github.com/numba-mpi/numba-mpi/graphs/contributors
@@ -8,6 +8,7 @@ License: GPL v3
8
8
  Project-URL: Tracker, https://github.com/numba-mpi/numba-mpi/issues
9
9
  Project-URL: Documentation, https://numba-mpi.github.io/numba-mpi
10
10
  Project-URL: Source, https://github.com/numba-mpi/numba-mpi
11
+ Requires-Python: >=3.8
11
12
  Description-Content-Type: text/markdown
12
13
  License-File: LICENSE
13
14
  Requires-Dist: numba
@@ -79,40 +80,46 @@ hello()
79
80
 
80
81
  ### Example comparing numba-mpi vs. mpi4py performance:
81
82
 
82
- The example below compares Numba + mpi4py vs. Numba + numba-mpi performance.
83
- The sample code estimates $\pi$ by integration of $4/(1+x^2)$ between 0 and 1
83
+ The example below compares `Numba`+`mpi4py` vs. `Numba`+`numba-mpi` performance.
84
+ The sample code estimates $\pi$ by numerical integration of $\int_0^1 (4/(1+x^2))dx=\pi$
84
85
  dividing the workload into `n_intervals` handled by separate MPI processes
85
- and then obtaining a sum using `allreduce`.
86
- The computation is carried out in a JIT-compiled function and is repeated
87
- `N_TIMES`, the repetitions and the MPI-handled reduction are done outside or
88
- inside of the JIT-compiled block for mpi4py and numba-mpi, respectively.
86
+ and then obtaining a sum using `allreduce` (see, e.g., analogous [Matlab docs example](https://www.mathworks.com/help/parallel-computing/numerical-estimation-of-pi-using-message-passing.html)).
87
+ The computation is carried out in a JIT-compiled function `get_pi_part()` and is repeated
88
+ `N_TIMES`. The repetitions and the MPI-handled reduction are done outside or
89
+ inside of the JIT-compiled block for `mpi4py` and `numba-mpi`, respectively.
89
90
  Timing is repeated `N_REPEAT` times and the minimum time is reported.
90
- The generated plot shown below depicts the speedup obtained by replacing mpi4py
91
- with numba_mpi as a function of `n_intervals` - the more often communication
92
- is needed (smaller `n_intervals`), the larger the expected speedup.
93
-
91
+ The generated plot shown below depicts the speedup obtained by replacing `mpi4py`
92
+ with `numba_mpi`, plotted as a function of `N_TIMES / n_intervals` - the number of MPI calls per
93
+ interval. The speedup, which stems from avoiding roundtrips between JIT-compiled
94
+ and Python code is significant (150%-300%) in all cases. The more often communication
95
+ is needed (smaller `n_intervals`), the larger the measured speedup. Note that nothing
96
+ in the actual number crunching (within the `get_pi_part()` function) or in the employed communication logic
97
+ (handled by the same MPI library) differs between the `mpi4py` or `numba-mpi` solutions.
98
+ These are the overhead of `mpi4py` higher-level abstractions and the overhead of
99
+ repeatedly entering and leaving the JIT-compiled block if using `mpi4py`, which can be
100
+ eliminated by using `numba-mpi`, and which the measured differences in execution time
101
+ stem from.
94
102
  ```python
95
103
  import timeit, mpi4py, numba, numpy as np, numba_mpi
96
104
 
97
105
  N_TIMES = 10000
98
- N_REPEAT = 10
99
106
  RTOL = 1e-3
100
107
 
101
- @numba.njit
102
- def get_pi_part(out, n_intervals, rank, size):
108
+ @numba.jit
109
+ def get_pi_part(n_intervals=1000000, rank=0, size=1):
103
110
  h = 1 / n_intervals
104
111
  partial_sum = 0.0
105
112
  for i in range(rank + 1, n_intervals, size):
106
113
  x = h * (i - 0.5)
107
114
  partial_sum += 4 / (1 + x**2)
108
- out[0] = h * partial_sum
115
+ return h * partial_sum
109
116
 
110
- @numba.njit
117
+ @numba.jit
111
118
  def pi_numba_mpi(n_intervals):
112
119
  pi = np.array([0.])
113
120
  part = np.empty_like(pi)
114
121
  for _ in range(N_TIMES):
115
- get_pi_part(part, n_intervals, numba_mpi.rank(), numba_mpi.size())
122
+ part[0] = get_pi_part(n_intervals, numba_mpi.rank(), numba_mpi.size())
116
123
  numba_mpi.allreduce(part, pi, numba_mpi.Operator.SUM)
117
124
  assert abs(pi[0] - np.pi) / np.pi < RTOL
118
125
 
@@ -120,30 +127,30 @@ def pi_mpi4py(n_intervals):
120
127
  pi = np.array([0.])
121
128
  part = np.empty_like(pi)
122
129
  for _ in range(N_TIMES):
123
- get_pi_part(part, n_intervals, mpi4py.MPI.COMM_WORLD.rank, mpi4py.MPI.COMM_WORLD.size)
130
+ part[0] = get_pi_part(n_intervals, mpi4py.MPI.COMM_WORLD.rank, mpi4py.MPI.COMM_WORLD.size)
124
131
  mpi4py.MPI.COMM_WORLD.Allreduce(part, (pi, mpi4py.MPI.DOUBLE), op=mpi4py.MPI.SUM)
125
132
  assert abs(pi[0] - np.pi) / np.pi < RTOL
126
133
 
127
- plot_x = [1000 * k for k in range(1, 11)]
134
+ plot_x = [x for x in range(1, 11)]
128
135
  plot_y = {'numba_mpi': [], 'mpi4py': []}
129
- for n_intervals in plot_x:
136
+ for x in plot_x:
130
137
  for impl in plot_y:
131
138
  plot_y[impl].append(min(timeit.repeat(
132
- f"pi_{impl}({n_intervals})",
139
+ f"pi_{impl}(n_intervals={N_TIMES // x})",
133
140
  globals=locals(),
134
141
  number=1,
135
- repeat=N_REPEAT
142
+ repeat=10
136
143
  )))
137
144
 
138
145
  if numba_mpi.rank() == 0:
139
146
  from matplotlib import pyplot
140
147
  pyplot.figure(figsize=(8.3, 3.5), tight_layout=True)
141
148
  pyplot.plot(plot_x, np.array(plot_y['mpi4py'])/np.array(plot_y['numba_mpi']), marker='o')
142
- pyplot.xlabel('n_intervals (workload in between communication)')
143
- pyplot.ylabel('wall time ratio (mpi4py / numba_mpi)')
149
+ pyplot.xlabel('number of MPI calls per interval')
150
+ pyplot.ylabel('mpi4py/numba-mpi wall-time ratio')
144
151
  pyplot.title(f'mpiexec -np {numba_mpi.size()}')
145
152
  pyplot.grid()
146
- pyplot.savefig('readme_plot.png')
153
+ pyplot.savefig('readme_plot.svg')
147
154
  ```
148
155
 
149
156
  ![plot](https://github.com/numba-mpi/numba-mpi/releases/download/tip/readme_plot.png)
@@ -60,40 +60,46 @@ hello()
60
60
 
61
61
  ### Example comparing numba-mpi vs. mpi4py performance:
62
62
 
63
- The example below compares Numba + mpi4py vs. Numba + numba-mpi performance.
64
- The sample code estimates $\pi$ by integration of $4/(1+x^2)$ between 0 and 1
63
+ The example below compares `Numba`+`mpi4py` vs. `Numba`+`numba-mpi` performance.
64
+ The sample code estimates $\pi$ by numerical integration of $\int_0^1 (4/(1+x^2))dx=\pi$
65
65
  dividing the workload into `n_intervals` handled by separate MPI processes
66
- and then obtaining a sum using `allreduce`.
67
- The computation is carried out in a JIT-compiled function and is repeated
68
- `N_TIMES`, the repetitions and the MPI-handled reduction are done outside or
69
- inside of the JIT-compiled block for mpi4py and numba-mpi, respectively.
66
+ and then obtaining a sum using `allreduce` (see, e.g., analogous [Matlab docs example](https://www.mathworks.com/help/parallel-computing/numerical-estimation-of-pi-using-message-passing.html)).
67
+ The computation is carried out in a JIT-compiled function `get_pi_part()` and is repeated
68
+ `N_TIMES`. The repetitions and the MPI-handled reduction are done outside or
69
+ inside of the JIT-compiled block for `mpi4py` and `numba-mpi`, respectively.
70
70
  Timing is repeated `N_REPEAT` times and the minimum time is reported.
71
- The generated plot shown below depicts the speedup obtained by replacing mpi4py
72
- with numba_mpi as a function of `n_intervals` - the more often communication
73
- is needed (smaller `n_intervals`), the larger the expected speedup.
74
-
71
+ The generated plot shown below depicts the speedup obtained by replacing `mpi4py`
72
+ with `numba_mpi`, plotted as a function of `N_TIMES / n_intervals` - the number of MPI calls per
73
+ interval. The speedup, which stems from avoiding roundtrips between JIT-compiled
74
+ and Python code is significant (150%-300%) in all cases. The more often communication
75
+ is needed (smaller `n_intervals`), the larger the measured speedup. Note that nothing
76
+ in the actual number crunching (within the `get_pi_part()` function) or in the employed communication logic
77
+ (handled by the same MPI library) differs between the `mpi4py` or `numba-mpi` solutions.
78
+ These are the overhead of `mpi4py` higher-level abstractions and the overhead of
79
+ repeatedly entering and leaving the JIT-compiled block if using `mpi4py`, which can be
80
+ eliminated by using `numba-mpi`, and which the measured differences in execution time
81
+ stem from.
75
82
  ```python
76
83
  import timeit, mpi4py, numba, numpy as np, numba_mpi
77
84
 
78
85
  N_TIMES = 10000
79
- N_REPEAT = 10
80
86
  RTOL = 1e-3
81
87
 
82
- @numba.njit
83
- def get_pi_part(out, n_intervals, rank, size):
88
+ @numba.jit
89
+ def get_pi_part(n_intervals=1000000, rank=0, size=1):
84
90
  h = 1 / n_intervals
85
91
  partial_sum = 0.0
86
92
  for i in range(rank + 1, n_intervals, size):
87
93
  x = h * (i - 0.5)
88
94
  partial_sum += 4 / (1 + x**2)
89
- out[0] = h * partial_sum
95
+ return h * partial_sum
90
96
 
91
- @numba.njit
97
+ @numba.jit
92
98
  def pi_numba_mpi(n_intervals):
93
99
  pi = np.array([0.])
94
100
  part = np.empty_like(pi)
95
101
  for _ in range(N_TIMES):
96
- get_pi_part(part, n_intervals, numba_mpi.rank(), numba_mpi.size())
102
+ part[0] = get_pi_part(n_intervals, numba_mpi.rank(), numba_mpi.size())
97
103
  numba_mpi.allreduce(part, pi, numba_mpi.Operator.SUM)
98
104
  assert abs(pi[0] - np.pi) / np.pi < RTOL
99
105
 
@@ -101,30 +107,30 @@ def pi_mpi4py(n_intervals):
101
107
  pi = np.array([0.])
102
108
  part = np.empty_like(pi)
103
109
  for _ in range(N_TIMES):
104
- get_pi_part(part, n_intervals, mpi4py.MPI.COMM_WORLD.rank, mpi4py.MPI.COMM_WORLD.size)
110
+ part[0] = get_pi_part(n_intervals, mpi4py.MPI.COMM_WORLD.rank, mpi4py.MPI.COMM_WORLD.size)
105
111
  mpi4py.MPI.COMM_WORLD.Allreduce(part, (pi, mpi4py.MPI.DOUBLE), op=mpi4py.MPI.SUM)
106
112
  assert abs(pi[0] - np.pi) / np.pi < RTOL
107
113
 
108
- plot_x = [1000 * k for k in range(1, 11)]
114
+ plot_x = [x for x in range(1, 11)]
109
115
  plot_y = {'numba_mpi': [], 'mpi4py': []}
110
- for n_intervals in plot_x:
116
+ for x in plot_x:
111
117
  for impl in plot_y:
112
118
  plot_y[impl].append(min(timeit.repeat(
113
- f"pi_{impl}({n_intervals})",
119
+ f"pi_{impl}(n_intervals={N_TIMES // x})",
114
120
  globals=locals(),
115
121
  number=1,
116
- repeat=N_REPEAT
122
+ repeat=10
117
123
  )))
118
124
 
119
125
  if numba_mpi.rank() == 0:
120
126
  from matplotlib import pyplot
121
127
  pyplot.figure(figsize=(8.3, 3.5), tight_layout=True)
122
128
  pyplot.plot(plot_x, np.array(plot_y['mpi4py'])/np.array(plot_y['numba_mpi']), marker='o')
123
- pyplot.xlabel('n_intervals (workload in between communication)')
124
- pyplot.ylabel('wall time ratio (mpi4py / numba_mpi)')
129
+ pyplot.xlabel('number of MPI calls per interval')
130
+ pyplot.ylabel('mpi4py/numba-mpi wall-time ratio')
125
131
  pyplot.title(f'mpiexec -np {numba_mpi.size()}')
126
132
  pyplot.grid()
127
- pyplot.savefig('readme_plot.png')
133
+ pyplot.savefig('readme_plot.svg')
128
134
  ```
129
135
 
130
136
  ![plot](https://github.com/numba-mpi/numba-mpi/releases/download/tip/readme_plot.png)
@@ -1,4 +1,6 @@
1
- """ Numba @njittable MPI wrappers tested on Linux, macOS and Windows """
1
+ """
2
+ .. include::../README.md
3
+ """
2
4
 
3
5
  from importlib.metadata import PackageNotFoundError, version
4
6
 
@@ -34,6 +34,9 @@ def wait(request):
34
34
  """Wrapper for MPI_Wait. Returns integer status code (0 == MPI_SUCCESS).
35
35
  Status is currently not handled. Requires 'request' parameter to be a
36
36
  c-style pointer to MPI_Request (such as returned by 'isend'/'irecv').
37
+
38
+ Uninitialized contents of 'request' (e.g., from numpy.empty()) may
39
+ cause invalid pointer dereference and segmentation faults.
37
40
  """
38
41
 
39
42
  status_buffer = create_status_buffer()
@@ -64,6 +67,9 @@ def waitall(requests):
64
67
  """Wrapper for MPI_Waitall. Returns integer status code (0 == MPI_SUCCESS).
65
68
  Status is currently not handled. Requires 'requests' parameter to be an
66
69
  array or tuple of MPI_Request objects.
70
+
71
+ Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
72
+ cause invalid pointer dereference and segmentation faults.
67
73
  """
68
74
  if isinstance(requests, np.ndarray):
69
75
  return _waitall_array_impl(requests)
@@ -123,6 +129,9 @@ def waitany(requests):
123
129
  status; second - the index of request that was completed. Status is
124
130
  currently not handled. Requires 'requests' parameter to be an array
125
131
  or tuple of MPI_Request objects.
132
+
133
+ Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
134
+ cause invalid pointer dereference and segmentation faults.
126
135
  """
127
136
 
128
137
  if isinstance(requests, np.ndarray):
@@ -167,6 +176,9 @@ def test(request):
167
176
  flag that indicates whether given request is completed. Status is currently
168
177
  not handled. Requires 'request' parameter to be a c-style pointer to
169
178
  MPI_Request (such as returned by 'isend'/'irecv').
179
+
180
+ Uninitialized contents of 'request' (e.g., from numpy.empty()) may
181
+ cause invalid pointer dereference and segmentation faults.
170
182
  """
171
183
 
172
184
  status_buffer = create_status_buffer()
@@ -203,6 +215,9 @@ def testall(requests):
203
215
  flag that indicates whether given request is completed. Status is currently
204
216
  not handled. Requires 'requests' parameter to be an array or tuple of
205
217
  MPI_Request objects.
218
+
219
+ Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
220
+ cause invalid pointer dereference and segmentation faults.
206
221
  """
207
222
  if isinstance(requests, np.ndarray):
208
223
  return _testall_array_impl(requests)
@@ -269,6 +284,9 @@ def testany(requests):
269
284
  that indicates whether any of requests is completed, and index of request
270
285
  that is guaranteed to be completed. Requires 'requests' parameter to be an
271
286
  array or tuple of MPI_Request objects.
287
+
288
+ Uninitialized contents of 'requests' (e.g., from numpy.empty()) may
289
+ cause invalid pointer dereference and segmentation faults.
272
290
  """
273
291
 
274
292
  if isinstance(requests, np.ndarray):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numba-mpi
3
- Version: 0.41
3
+ Version: 0.43
4
4
  Summary: Numba @njittable MPI wrappers tested on Linux, macOS and Windows
5
5
  Home-page: https://github.com/numba-mpi/numba-mpi
6
6
  Author: https://github.com/numba-mpi/numba-mpi/graphs/contributors
@@ -8,6 +8,7 @@ License: GPL v3
8
8
  Project-URL: Tracker, https://github.com/numba-mpi/numba-mpi/issues
9
9
  Project-URL: Documentation, https://numba-mpi.github.io/numba-mpi
10
10
  Project-URL: Source, https://github.com/numba-mpi/numba-mpi
11
+ Requires-Python: >=3.8
11
12
  Description-Content-Type: text/markdown
12
13
  License-File: LICENSE
13
14
  Requires-Dist: numba
@@ -79,40 +80,46 @@ hello()
79
80
 
80
81
  ### Example comparing numba-mpi vs. mpi4py performance:
81
82
 
82
- The example below compares Numba + mpi4py vs. Numba + numba-mpi performance.
83
- The sample code estimates $\pi$ by integration of $4/(1+x^2)$ between 0 and 1
83
+ The example below compares `Numba`+`mpi4py` vs. `Numba`+`numba-mpi` performance.
84
+ The sample code estimates $\pi$ by numerical integration of $\int_0^1 (4/(1+x^2))dx=\pi$
84
85
  dividing the workload into `n_intervals` handled by separate MPI processes
85
- and then obtaining a sum using `allreduce`.
86
- The computation is carried out in a JIT-compiled function and is repeated
87
- `N_TIMES`, the repetitions and the MPI-handled reduction are done outside or
88
- inside of the JIT-compiled block for mpi4py and numba-mpi, respectively.
86
+ and then obtaining a sum using `allreduce` (see, e.g., analogous [Matlab docs example](https://www.mathworks.com/help/parallel-computing/numerical-estimation-of-pi-using-message-passing.html)).
87
+ The computation is carried out in a JIT-compiled function `get_pi_part()` and is repeated
88
+ `N_TIMES`. The repetitions and the MPI-handled reduction are done outside or
89
+ inside of the JIT-compiled block for `mpi4py` and `numba-mpi`, respectively.
89
90
  Timing is repeated `N_REPEAT` times and the minimum time is reported.
90
- The generated plot shown below depicts the speedup obtained by replacing mpi4py
91
- with numba_mpi as a function of `n_intervals` - the more often communication
92
- is needed (smaller `n_intervals`), the larger the expected speedup.
93
-
91
+ The generated plot shown below depicts the speedup obtained by replacing `mpi4py`
92
+ with `numba_mpi`, plotted as a function of `N_TIMES / n_intervals` - the number of MPI calls per
93
+ interval. The speedup, which stems from avoiding roundtrips between JIT-compiled
94
+ and Python code is significant (150%-300%) in all cases. The more often communication
95
+ is needed (smaller `n_intervals`), the larger the measured speedup. Note that nothing
96
+ in the actual number crunching (within the `get_pi_part()` function) or in the employed communication logic
97
+ (handled by the same MPI library) differs between the `mpi4py` or `numba-mpi` solutions.
98
+ These are the overhead of `mpi4py` higher-level abstractions and the overhead of
99
+ repeatedly entering and leaving the JIT-compiled block if using `mpi4py`, which can be
100
+ eliminated by using `numba-mpi`, and which the measured differences in execution time
101
+ stem from.
94
102
  ```python
95
103
  import timeit, mpi4py, numba, numpy as np, numba_mpi
96
104
 
97
105
  N_TIMES = 10000
98
- N_REPEAT = 10
99
106
  RTOL = 1e-3
100
107
 
101
- @numba.njit
102
- def get_pi_part(out, n_intervals, rank, size):
108
+ @numba.jit
109
+ def get_pi_part(n_intervals=1000000, rank=0, size=1):
103
110
  h = 1 / n_intervals
104
111
  partial_sum = 0.0
105
112
  for i in range(rank + 1, n_intervals, size):
106
113
  x = h * (i - 0.5)
107
114
  partial_sum += 4 / (1 + x**2)
108
- out[0] = h * partial_sum
115
+ return h * partial_sum
109
116
 
110
- @numba.njit
117
+ @numba.jit
111
118
  def pi_numba_mpi(n_intervals):
112
119
  pi = np.array([0.])
113
120
  part = np.empty_like(pi)
114
121
  for _ in range(N_TIMES):
115
- get_pi_part(part, n_intervals, numba_mpi.rank(), numba_mpi.size())
122
+ part[0] = get_pi_part(n_intervals, numba_mpi.rank(), numba_mpi.size())
116
123
  numba_mpi.allreduce(part, pi, numba_mpi.Operator.SUM)
117
124
  assert abs(pi[0] - np.pi) / np.pi < RTOL
118
125
 
@@ -120,30 +127,30 @@ def pi_mpi4py(n_intervals):
120
127
  pi = np.array([0.])
121
128
  part = np.empty_like(pi)
122
129
  for _ in range(N_TIMES):
123
- get_pi_part(part, n_intervals, mpi4py.MPI.COMM_WORLD.rank, mpi4py.MPI.COMM_WORLD.size)
130
+ part[0] = get_pi_part(n_intervals, mpi4py.MPI.COMM_WORLD.rank, mpi4py.MPI.COMM_WORLD.size)
124
131
  mpi4py.MPI.COMM_WORLD.Allreduce(part, (pi, mpi4py.MPI.DOUBLE), op=mpi4py.MPI.SUM)
125
132
  assert abs(pi[0] - np.pi) / np.pi < RTOL
126
133
 
127
- plot_x = [1000 * k for k in range(1, 11)]
134
+ plot_x = [x for x in range(1, 11)]
128
135
  plot_y = {'numba_mpi': [], 'mpi4py': []}
129
- for n_intervals in plot_x:
136
+ for x in plot_x:
130
137
  for impl in plot_y:
131
138
  plot_y[impl].append(min(timeit.repeat(
132
- f"pi_{impl}({n_intervals})",
139
+ f"pi_{impl}(n_intervals={N_TIMES // x})",
133
140
  globals=locals(),
134
141
  number=1,
135
- repeat=N_REPEAT
142
+ repeat=10
136
143
  )))
137
144
 
138
145
  if numba_mpi.rank() == 0:
139
146
  from matplotlib import pyplot
140
147
  pyplot.figure(figsize=(8.3, 3.5), tight_layout=True)
141
148
  pyplot.plot(plot_x, np.array(plot_y['mpi4py'])/np.array(plot_y['numba_mpi']), marker='o')
142
- pyplot.xlabel('n_intervals (workload in between communication)')
143
- pyplot.ylabel('wall time ratio (mpi4py / numba_mpi)')
149
+ pyplot.xlabel('number of MPI calls per interval')
150
+ pyplot.ylabel('mpi4py/numba-mpi wall-time ratio')
144
151
  pyplot.title(f'mpiexec -np {numba_mpi.size()}')
145
152
  pyplot.grid()
146
- pyplot.savefig('readme_plot.png')
153
+ pyplot.savefig('readme_plot.svg')
147
154
  ```
148
155
 
149
156
  ![plot](https://github.com/numba-mpi/numba-mpi/releases/download/tip/readme_plot.png)
@@ -1,6 +1,7 @@
1
1
  .gitignore
2
2
  .pre-commit-config.yaml
3
3
  .zenodo.json
4
+ CODE_OF_CONDUCT.md
4
5
  LICENSE
5
6
  README.md
6
7
  setup.py
@@ -18,6 +18,7 @@ setup(
18
18
  "local_scheme": lambda _: "",
19
19
  "version_scheme": "post-release",
20
20
  },
21
+ python_requires=">=3.8",
21
22
  setup_requires=["setuptools_scm"],
22
23
  license="GPL v3",
23
24
  description="Numba @njittable MPI wrappers tested on Linux, macOS and Windows",
@@ -176,7 +176,7 @@ def test_isend_irecv_waitall(isnd, ircv, wall, data_type):
176
176
  dst1 = np.empty_like(src1)
177
177
  dst2 = np.empty_like(src2)
178
178
 
179
- reqs = np.empty((2,), dtype=mpi.RequestType)
179
+ reqs = np.zeros((2,), dtype=mpi.RequestType)
180
180
  if mpi.rank() == 0:
181
181
  status, reqs[0:1] = isnd(src1, dest=1, tag=11)
182
182
  assert status == MPI_SUCCESS
@@ -245,7 +245,7 @@ def test_isend_irecv_waitall_exchange(isnd, ircv, wall):
245
245
  src = get_random_array((5,))
246
246
  dst = np.empty_like(src)
247
247
 
248
- reqs = np.empty((2,), dtype=mpi.RequestType)
248
+ reqs = np.zeros((2,), dtype=mpi.RequestType)
249
249
  if mpi.rank() == 0:
250
250
  status, reqs[0:1] = isnd(src, dest=1, tag=11)
251
251
  assert status == MPI_SUCCESS
@@ -263,6 +263,24 @@ def test_isend_irecv_waitall_exchange(isnd, ircv, wall):
263
263
  np.testing.assert_equal(dst, src)
264
264
 
265
265
 
266
+ @pytest.mark.parametrize(
267
+ "fun",
268
+ (
269
+ jit_waitany.py_func,
270
+ jit_waitall.py_func,
271
+ jit_testany.py_func,
272
+ jit_testall.py_func,
273
+ jit_waitany,
274
+ jit_waitall,
275
+ jit_testany,
276
+ jit_testall,
277
+ ),
278
+ )
279
+ def test_wall_segfault(fun):
280
+ reqs = np.zeros((2,), dtype=mpi.RequestType)
281
+ fun(reqs)
282
+
283
+
266
284
  @pytest.mark.parametrize(
267
285
  "isnd, ircv, wany, wall",
268
286
  [
@@ -282,7 +300,7 @@ def test_isend_irecv_waitany(isnd, ircv, wany, wall, data_type):
282
300
  dst1 = np.empty_like(src1)
283
301
  dst2 = np.empty_like(src2)
284
302
 
285
- reqs = np.empty((2,), dtype=mpi.RequestType)
303
+ reqs = np.zeros((2,), dtype=mpi.RequestType)
286
304
  if mpi.rank() == 0:
287
305
  status, reqs[0:1] = isnd(src1, dest=1, tag=11)
288
306
  assert status == MPI_SUCCESS
@@ -356,7 +374,7 @@ def test_isend_irecv_testall(isnd, ircv, tall, wall):
356
374
  dst1 = np.empty_like(src1)
357
375
  dst2 = np.empty_like(src2)
358
376
 
359
- reqs = np.empty((2,), dtype=mpi.RequestType)
377
+ reqs = np.zeros((2,), dtype=mpi.RequestType)
360
378
  if mpi.rank() == 0:
361
379
  time.sleep(TEST_WAIT_FULL_IN_SECONDS)
362
380
 
@@ -402,7 +420,7 @@ def test_isend_irecv_testany(isnd, ircv, tany, wall):
402
420
  dst1 = np.empty_like(src1)
403
421
  dst2 = np.empty_like(src2)
404
422
 
405
- reqs = np.empty((2,), dtype=mpi.RequestType)
423
+ reqs = np.zeros((2,), dtype=mpi.RequestType)
406
424
  if mpi.rank() == 0:
407
425
  time.sleep(TEST_WAIT_FULL_IN_SECONDS)
408
426
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes