numba-cuda 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/compiler.py +7 -6
  3. numba_cuda/numba/cuda/cudadecl.py +6 -2
  4. numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -1
  5. numba_cuda/numba/cuda/cudadrv/driver.py +1 -20
  6. numba_cuda/numba/cuda/cudadrv/linkable_code.py +13 -9
  7. numba_cuda/numba/cuda/cudadrv/nvrtc.py +5 -1
  8. numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -1
  9. numba_cuda/numba/cuda/decorators.py +9 -2
  10. numba_cuda/numba/cuda/dispatcher.py +22 -3
  11. numba_cuda/numba/cuda/runtime/__init__.py +1 -0
  12. numba_cuda/numba/cuda/runtime/memsys.cu +94 -0
  13. numba_cuda/numba/cuda/runtime/memsys.cuh +17 -0
  14. numba_cuda/numba/cuda/runtime/nrt.cu +19 -22
  15. numba_cuda/numba/cuda/runtime/nrt.py +318 -0
  16. numba_cuda/numba/cuda/testing.py +11 -1
  17. numba_cuda/numba/cuda/tests/__init__.py +1 -0
  18. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +31 -0
  19. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +145 -11
  20. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +10 -7
  21. numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +105 -1
  22. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +162 -40
  23. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +114 -0
  24. numba_cuda/numba/cuda/tests/support.py +11 -0
  25. numba_cuda/numba/cuda/utils.py +22 -0
  26. {numba_cuda-0.3.0.dist-info → numba_cuda-0.5.0.dist-info}/METADATA +21 -3
  27. {numba_cuda-0.3.0.dist-info → numba_cuda-0.5.0.dist-info}/RECORD +30 -23
  28. {numba_cuda-0.3.0.dist-info → numba_cuda-0.5.0.dist-info}/WHEEL +1 -1
  29. {numba_cuda-0.3.0.dist-info → numba_cuda-0.5.0.dist-info}/LICENSE +0 -0
  30. {numba_cuda-0.3.0.dist-info → numba_cuda-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,12 @@
1
+ import math
2
+
3
+ import numpy as np
1
4
 
2
5
  from numba.core import errors, types
3
6
  from numba.core.extending import overload
4
7
  from numba.np.arrayobj import (_check_const_str_dtype, is_nonelike,
5
- ty_parse_dtype, ty_parse_shape, numpy_empty_nd)
8
+ ty_parse_dtype, ty_parse_shape, numpy_empty_nd,
9
+ numpy_empty_like_nd)
6
10
 
7
11
 
8
12
  # Typical tests for allocation use array construction (e.g. np.zeros, np.empty,
@@ -20,6 +24,18 @@ def cuda_empty(shape, dtype):
20
24
  pass
21
25
 
22
26
 
27
+ def cuda_empty_like(arr):
28
+ pass
29
+
30
+
31
+ def cuda_arange(start):
32
+ pass
33
+
34
+
35
+ def cuda_ones(shape):
36
+ pass
37
+
38
+
23
39
  @overload(cuda_empty)
24
40
  def ol_cuda_empty(shape, dtype):
25
41
  _check_const_str_dtype("empty", dtype)
@@ -40,3 +56,91 @@ def ol_cuda_empty(shape, dtype):
40
56
  else:
41
57
  msg = f"Cannot parse input types to function np.empty({shape}, {dtype})"
42
58
  raise errors.TypingError(msg)
59
+
60
+
61
+ @overload(cuda_empty_like)
62
+ def ol_cuda_empty_like(arr):
63
+
64
+ if isinstance(arr, types.Array):
65
+ nb_dtype = arr.dtype
66
+ else:
67
+ nb_dtype = arr
68
+
69
+ if isinstance(arr, types.Array):
70
+ layout = arr.layout if arr.layout != 'A' else 'C'
71
+ retty = arr.copy(dtype=nb_dtype, layout=layout, readonly=False)
72
+ else:
73
+ retty = types.Array(nb_dtype, 0, 'C')
74
+
75
+ def impl(arr):
76
+ dtype = None
77
+ return numpy_empty_like_nd(arr, dtype, retty)
78
+ return impl
79
+
80
+
81
+ def _arange_dtype(*args):
82
+ bounds = [a for a in args if not isinstance(a, types.NoneType)]
83
+
84
+ if any(isinstance(a, types.Complex) for a in bounds):
85
+ dtype = types.complex128
86
+ elif any(isinstance(a, types.Float) for a in bounds):
87
+ dtype = types.float64
88
+ else:
89
+ # `np.arange(10).dtype` is always `np.dtype(int)`, aka `np.int_`, which
90
+ # in all released versions of numpy corresponds to the C `long` type.
91
+ # Windows 64 is broken by default here because Numba (as of 0.47) does
92
+ # not differentiate between Python and NumPy integers, so a `typeof(1)`
93
+ # on w64 is `int64`, i.e. `intp`. This means an arange(<some int>) will
94
+ # be typed as arange(int64) and the following will yield int64 opposed
95
+ # to int32. Example: without a load of analysis to work out of the args
96
+ # were wrapped in NumPy int*() calls it's not possible to detect the
97
+ # difference between `np.arange(10)` and `np.arange(np.int64(10)`.
98
+ NPY_TY = getattr(types, "int%s" % (8 * np.dtype(int).itemsize))
99
+
100
+ # unliteral these types such that `max` works.
101
+ unliteral_bounds = [types.unliteral(x) for x in bounds]
102
+ dtype = max(unliteral_bounds + [NPY_TY,])
103
+
104
+ return dtype
105
+
106
+
107
+ @overload(cuda_arange)
108
+ def ol_cuda_arange(start):
109
+ """Simplified arange with just 1 argument."""
110
+ if (not isinstance(start, types.Number)):
111
+ return
112
+
113
+ start_value = getattr(start, "literal_value", None)
114
+
115
+ def impl(start):
116
+ # Allow for improved performance if given literal arguments.
117
+ lit_start = start_value if start_value is not None else start
118
+
119
+ _step = 1
120
+ _start, _stop = 0, lit_start
121
+
122
+ nitems_c = (_stop - _start) / _step
123
+ nitems_r = int(math.ceil(nitems_c.real))
124
+
125
+ # Binary operator needed for compiler branch pruning.
126
+ nitems = max(nitems_r, 0)
127
+
128
+ arr = cuda_empty(nitems, np.int64)
129
+ val = _start
130
+ for i in range(nitems):
131
+ arr[i] = val + (i * _step)
132
+ return arr
133
+
134
+ return impl
135
+
136
+
137
+ @overload(cuda_ones)
138
+ def ol_cuda_ones(shape):
139
+
140
+ def impl(shape):
141
+ arr = cuda_empty(shape, np.float64)
142
+ arr_flat = arr.flat
143
+ for idx in range(len(arr_flat)):
144
+ arr_flat[idx] = 1
145
+ return arr
146
+ return impl
@@ -1,47 +1,22 @@
1
1
  import re
2
- import gc
2
+ import os
3
+
3
4
  import numpy as np
4
5
  import unittest
5
- from unittest.mock import patch
6
- from numba.core.runtime import rtsys
7
- from numba.tests.support import EnableNRTStatsMixin
8
6
  from numba.cuda.testing import CUDATestCase
9
7
 
10
- from .mock_numpy import cuda_empty
8
+ from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_ones, cuda_arange
9
+ from numba.tests.support import run_in_subprocess, override_config
11
10
 
12
11
  from numba import cuda
13
-
14
-
15
- class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
16
-
17
- def setUp(self):
18
- # Clean up any NRT-backed objects hanging in a dead reference cycle
19
- gc.collect()
20
- super(TestNrtRefCt, self).setUp()
21
-
22
- @unittest.expectedFailure
23
- def test_no_return(self):
24
- """
25
- Test issue #1291
26
- """
27
- n = 10
28
-
29
- @cuda.jit
30
- def kernel():
31
- for i in range(n):
32
- temp = cuda_empty(2, np.float64) # noqa: F841
33
- return None
34
-
35
- init_stats = rtsys.get_allocation_stats()
36
-
37
- with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
38
- kernel[1,1]()
39
- cur_stats = rtsys.get_allocation_stats()
40
- self.assertEqual(cur_stats.alloc - init_stats.alloc, n)
41
- self.assertEqual(cur_stats.free - init_stats.free, n)
12
+ from numba.cuda.runtime.nrt import rtsys
42
13
 
43
14
 
44
15
  class TestNrtBasic(CUDATestCase):
16
+ def run(self, result=None):
17
+ with override_config("CUDA_ENABLE_NRT", True):
18
+ super(TestNrtBasic, self).run(result)
19
+
45
20
  def test_nrt_launches(self):
46
21
  @cuda.jit
47
22
  def f(x):
@@ -52,8 +27,7 @@ class TestNrtBasic(CUDATestCase):
52
27
  x = cuda_empty(10, np.int64)
53
28
  f(x)
54
29
 
55
- with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
56
- g[1,1]()
30
+ g[1,1]()
57
31
  cuda.synchronize()
58
32
 
59
33
  def test_nrt_ptx_contains_refcount(self):
@@ -66,8 +40,7 @@ class TestNrtBasic(CUDATestCase):
66
40
  x = cuda_empty(10, np.int64)
67
41
  f(x)
68
42
 
69
- with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
70
- g[1,1]()
43
+ g[1,1]()
71
44
 
72
45
  ptx = next(iter(g.inspect_asm().values()))
73
46
 
@@ -100,11 +73,160 @@ class TestNrtBasic(CUDATestCase):
100
73
 
101
74
  out_ary = np.zeros(1, dtype=np.int64)
102
75
 
103
- with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
104
- g[1,1](out_ary)
76
+ g[1,1](out_ary)
105
77
 
106
78
  self.assertEqual(out_ary[0], 1)
107
79
 
108
80
 
81
+ class TestNrtStatistics(CUDATestCase):
82
+
83
+ def setUp(self):
84
+ self._stream = cuda.default_stream()
85
+ # Store the current stats state
86
+ self.__stats_state = rtsys.memsys_stats_enabled(self._stream)
87
+
88
+ def tearDown(self):
89
+ # Set stats state back to whatever it was before the test ran
90
+ if self.__stats_state:
91
+ rtsys.memsys_enable_stats(self._stream)
92
+ else:
93
+ rtsys.memsys_disable_stats(self._stream)
94
+
95
+ def test_stats_env_var_explicit_on(self):
96
+ # Checks that explicitly turning the stats on via the env var works.
97
+ src = """if 1:
98
+ from numba import cuda
99
+ from numba.cuda.runtime import rtsys
100
+ from numba.cuda.tests.nrt.mock_numpy import cuda_arange
101
+
102
+ @cuda.jit
103
+ def foo():
104
+ x = cuda_arange(10)[0]
105
+
106
+ # initialize the NRT before use
107
+ rtsys.initialize()
108
+ assert rtsys.memsys_stats_enabled(), "Stats not enabled"
109
+ orig_stats = rtsys.get_allocation_stats()
110
+ foo[1, 1]()
111
+ new_stats = rtsys.get_allocation_stats()
112
+ total_alloc = new_stats.alloc - orig_stats.alloc
113
+ total_free = new_stats.free - orig_stats.free
114
+ total_mi_alloc = new_stats.mi_alloc - orig_stats.mi_alloc
115
+ total_mi_free = new_stats.mi_free - orig_stats.mi_free
116
+
117
+ expected = 1
118
+ assert total_alloc == expected, \\
119
+ f"total_alloc != expected, {total_alloc} != {expected}"
120
+ assert total_free == expected, \\
121
+ f"total_free != expected, {total_free} != {expected}"
122
+ assert total_mi_alloc == expected, \\
123
+ f"total_mi_alloc != expected, {total_mi_alloc} != {expected}"
124
+ assert total_mi_free == expected, \\
125
+ f"total_mi_free != expected, {total_mi_free} != {expected}"
126
+ """
127
+
128
+ # Check env var explicitly being set works
129
+ env = os.environ.copy()
130
+ env['NUMBA_CUDA_NRT_STATS'] = "1"
131
+ env['NUMBA_CUDA_ENABLE_NRT'] = "1"
132
+ run_in_subprocess(src, env=env)
133
+
134
+ def check_env_var_off(self, env):
135
+
136
+ src = """if 1:
137
+ from numba import cuda
138
+ import numpy as np
139
+ from numba.cuda.runtime import rtsys
140
+
141
+ @cuda.jit
142
+ def foo():
143
+ arr = np.arange(10)[0]
144
+
145
+ assert rtsys.memsys_stats_enabled() == False
146
+ try:
147
+ rtsys.get_allocation_stats()
148
+ except RuntimeError as e:
149
+ assert "NRT stats are disabled." in str(e)
150
+ """
151
+ run_in_subprocess(src, env=env)
152
+
153
+ def test_stats_env_var_explicit_off(self):
154
+ # Checks that explicitly turning the stats off via the env var works.
155
+ env = os.environ.copy()
156
+ env['NUMBA_CUDA_NRT_STATS'] = "0"
157
+ self.check_env_var_off(env)
158
+
159
+ def test_stats_env_var_default_off(self):
160
+ # Checks that the env var not being set is the same as "off", i.e.
161
+ # default for Numba is off.
162
+ env = os.environ.copy()
163
+ env.pop('NUMBA_CUDA_NRT_STATS', None)
164
+ self.check_env_var_off(env)
165
+
166
+ def test_stats_status_toggle(self):
167
+
168
+ @cuda.jit
169
+ def foo():
170
+ tmp = cuda_ones(3)
171
+ arr = cuda_arange(5 * tmp[0]) # noqa: F841
172
+ return None
173
+
174
+ with override_config('CUDA_ENABLE_NRT', True):
175
+ # Switch on stats
176
+ rtsys.memsys_enable_stats()
177
+ # check the stats are on
178
+ self.assertTrue(rtsys.memsys_stats_enabled())
179
+
180
+ for i in range(2):
181
+ # capture the stats state
182
+ stats_1 = rtsys.get_allocation_stats()
183
+ # Switch off stats
184
+ rtsys.memsys_disable_stats()
185
+ # check the stats are off
186
+ self.assertFalse(rtsys.memsys_stats_enabled())
187
+ # run something that would move the counters were they enabled
188
+ foo[1, 1]()
189
+ # Switch on stats
190
+ rtsys.memsys_enable_stats()
191
+ # check the stats are on
192
+ self.assertTrue(rtsys.memsys_stats_enabled())
193
+ # capture the stats state (should not have changed)
194
+ stats_2 = rtsys.get_allocation_stats()
195
+ # run something that will move the counters
196
+ foo[1, 1]()
197
+ # capture the stats state (should have changed)
198
+ stats_3 = rtsys.get_allocation_stats()
199
+ # check stats_1 == stats_2
200
+ self.assertEqual(stats_1, stats_2)
201
+ # check stats_2 < stats_3
202
+ self.assertLess(stats_2, stats_3)
203
+
204
+ def test_rtsys_stats_query_raises_exception_when_disabled(self):
205
+ # Checks that the standard rtsys.get_allocation_stats() query raises
206
+ # when stats counters are turned off.
207
+
208
+ rtsys.memsys_disable_stats()
209
+ self.assertFalse(rtsys.memsys_stats_enabled())
210
+
211
+ with self.assertRaises(RuntimeError) as raises:
212
+ rtsys.get_allocation_stats()
213
+
214
+ self.assertIn("NRT stats are disabled.", str(raises.exception))
215
+
216
+ def test_nrt_explicit_stats_query_raises_exception_when_disabled(self):
217
+ # Checks the various memsys_get_stats functions raise if queried when
218
+ # the stats counters are disabled.
219
+ method_variations = ('alloc', 'free', 'mi_alloc', 'mi_free')
220
+ for meth in method_variations:
221
+ stats_func = getattr(rtsys, f'memsys_get_stats_{meth}')
222
+ with self.subTest(stats_func=stats_func):
223
+ # Turn stats off
224
+ rtsys.memsys_disable_stats()
225
+ self.assertFalse(rtsys.memsys_stats_enabled())
226
+ with self.assertRaises(RuntimeError) as raises:
227
+ stats_func()
228
+ self.assertIn("NRT stats are disabled.", str(raises.exception))
229
+
230
+
109
231
  if __name__ == '__main__':
110
232
  unittest.main()
@@ -0,0 +1,114 @@
1
+ import numpy as np
2
+ import unittest
3
+ from numba.tests.support import override_config
4
+ from numba.cuda.runtime import rtsys
5
+ from numba.cuda.tests.support import EnableNRTStatsMixin
6
+ from numba.cuda.testing import CUDATestCase
7
+ from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_empty_like
8
+
9
+ from numba import cuda
10
+
11
+
12
+ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
13
+
14
+ def setUp(self):
15
+ super(TestNrtRefCt, self).setUp()
16
+
17
+ def tearDown(self):
18
+ super(TestNrtRefCt, self).tearDown()
19
+
20
+ def run(self, result=None):
21
+ with override_config("CUDA_ENABLE_NRT", True):
22
+ super(TestNrtRefCt, self).run(result)
23
+
24
+ def test_no_return(self):
25
+ """
26
+ Test issue #1291
27
+ """
28
+
29
+ n = 10
30
+
31
+ @cuda.jit
32
+ def kernel():
33
+ for i in range(n):
34
+ temp = cuda_empty(2, np.float64) # noqa: F841
35
+ return None
36
+
37
+ init_stats = rtsys.get_allocation_stats()
38
+ kernel[1, 1]()
39
+ cur_stats = rtsys.get_allocation_stats()
40
+ self.assertEqual(cur_stats.alloc - init_stats.alloc, n)
41
+ self.assertEqual(cur_stats.free - init_stats.free, n)
42
+
43
+ def test_escaping_var_init_in_loop(self):
44
+ """
45
+ Test issue #1297
46
+ """
47
+
48
+ @cuda.jit
49
+ def g(n):
50
+
51
+ x = cuda_empty((n, 2), np.float64)
52
+
53
+ for i in range(n):
54
+ y = x[i]
55
+
56
+ for i in range(n):
57
+ y = x[i] # noqa: F841
58
+
59
+ return None
60
+
61
+ init_stats = rtsys.get_allocation_stats()
62
+ g[1, 1](10)
63
+ cur_stats = rtsys.get_allocation_stats()
64
+ self.assertEqual(cur_stats.alloc - init_stats.alloc, 1)
65
+ self.assertEqual(cur_stats.free - init_stats.free, 1)
66
+
67
+ def test_invalid_computation_of_lifetime(self):
68
+ """
69
+ Test issue #1573
70
+ """
71
+ @cuda.jit
72
+ def if_with_allocation_and_initialization(arr1, test1):
73
+ tmp_arr = cuda_empty_like(arr1)
74
+
75
+ for i in range(tmp_arr.shape[0]):
76
+ pass
77
+
78
+ if test1:
79
+ cuda_empty_like(arr1)
80
+
81
+ arr = np.random.random((5, 5)) # the values are not consumed
82
+
83
+ init_stats = rtsys.get_allocation_stats()
84
+ if_with_allocation_and_initialization[1, 1](arr, False)
85
+ cur_stats = rtsys.get_allocation_stats()
86
+ self.assertEqual(cur_stats.alloc - init_stats.alloc,
87
+ cur_stats.free - init_stats.free)
88
+
89
+ def test_del_at_beginning_of_loop(self):
90
+ """
91
+ Test issue #1734
92
+ """
93
+ @cuda.jit
94
+ def f(arr):
95
+ res = 0
96
+
97
+ for i in (0, 1):
98
+ # `del t` is issued here before defining t. It must be
99
+ # correctly handled by the lowering phase.
100
+ t = arr[i]
101
+ if t[i] > 1:
102
+ res += t[i]
103
+
104
+ arr = np.ones((2, 2))
105
+
106
+ init_stats = rtsys.get_allocation_stats()
107
+ f[1, 1](arr)
108
+ cur_stats = rtsys.get_allocation_stats()
109
+ self.assertEqual(cur_stats.alloc - init_stats.alloc,
110
+ cur_stats.free - init_stats.free)
111
+
112
+
113
+ if __name__ == '__main__':
114
+ unittest.main()
@@ -0,0 +1,11 @@
1
+ from numba.cuda.runtime.nrt import rtsys
2
+
3
+
4
+ class EnableNRTStatsMixin(object):
5
+ """Mixin to enable the NRT statistics counters."""
6
+
7
+ def setUp(self):
8
+ rtsys.memsys_enable_stats()
9
+
10
+ def tearDown(self):
11
+ rtsys.memsys_disable_stats()
@@ -0,0 +1,22 @@
1
+ import os
2
+ import warnings
3
+ import traceback
4
+
5
+
6
+ def _readenv(name, ctor, default):
7
+ value = os.environ.get(name)
8
+ if value is None:
9
+ return default() if callable(default) else default
10
+ try:
11
+ if ctor is bool:
12
+ return value.lower() in {'1', "true"}
13
+ return ctor(value)
14
+ except Exception:
15
+ warnings.warn(
16
+ f"Environment variable '{name}' is defined but its associated "
17
+ f"value '{value}' could not be parsed.\n"
18
+ "The parse failed with exception:\n"
19
+ f"{traceback.format_exc()}",
20
+ RuntimeWarning
21
+ )
22
+ return default
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: numba-cuda
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -27,7 +27,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
27
27
  To raise questions or initiate discussions, please use the [Numba Discourse
28
28
  forum](https://numba.discourse.group).
29
29
 
30
- ## Building from source
30
+ ## Installation with pip
31
+
32
+ ```shell
33
+ pip install numba-cuda
34
+ ```
35
+
36
+ ## Installation with Conda
37
+
38
+ ```shell
39
+ conda install -c conda-forge numba-cuda
40
+ ```
41
+
42
+ ## Installation from source
31
43
 
32
44
  Install as an editable install:
33
45
 
@@ -53,3 +65,9 @@ which will show a path like:
53
65
  ```
54
66
  <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
55
67
  ```
68
+
69
+ ## Contributing Guide
70
+
71
+ Review the
72
+ [CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
73
+ file for information on how to contribute code and issues to the project.