numba-cuda 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.0
1
+ 0.8.0
@@ -427,6 +427,8 @@ def kernel_fixup(kernel, debug):
427
427
  if tm_name == 'types':
428
428
  types = tm_value
429
429
  types.operands = types.operands[1:]
430
+ if config.DUMP_LLVM:
431
+ types._clear_string_cache()
430
432
 
431
433
  # Mark as a kernel for NVVM
432
434
 
@@ -199,12 +199,52 @@ class NVVM(object):
199
199
 
200
200
 
201
201
  class CompilationUnit(object):
202
- def __init__(self):
202
+ """
203
+ A CompilationUnit is a set of LLVM modules that are compiled to PTX or
204
+ LTO-IR with NVVM.
205
+
206
+ Compilation options are accepted as a dict mapping option names to values,
207
+ with the following considerations:
208
+
209
+ - Underscores (`_`) in option names are converted to dashes (`-`), to match
210
+ NVVM's option name format.
211
+ - Options that take a value will be emitted in the form "-<name>=<value>".
212
+ - Booleans passed as option values will be converted to integers.
213
+ - Options which take no value (such as `-gen-lto`) should have a value of
214
+ `None` and will be emitted in the form "-<name>".
215
+
216
+ For documentation on NVVM compilation options, see the CUDA Toolkit
217
+ Documentation:
218
+
219
+ https://docs.nvidia.com/cuda/libnvvm-api/index.html#_CPPv418nvvmCompileProgram11nvvmProgramiPPKc
220
+ """
221
+
222
+ def __init__(self, options):
203
223
  self.driver = NVVM()
204
224
  self._handle = nvvm_program()
205
225
  err = self.driver.nvvmCreateProgram(byref(self._handle))
206
226
  self.driver.check_error(err, 'Failed to create CU')
207
227
 
228
+ def stringify_option(k, v):
229
+ k = k.replace('_', '-')
230
+
231
+ if v is None:
232
+ return f'-{k}'.encode('utf-8')
233
+
234
+ if isinstance(v, bool):
235
+ v = int(v)
236
+
237
+ return f'-{k}={v}'.encode('utf-8')
238
+
239
+ options = [stringify_option(k, v) for k, v in options.items()]
240
+ option_ptrs = (c_char_p * len(options))(*[c_char_p(x) for x in options])
241
+
242
+ # We keep both the options and the pointers to them so that options are
243
+ # not destroyed before we've used their values
244
+ self.options = options
245
+ self.option_ptrs = option_ptrs
246
+ self.n_options = len(options)
247
+
208
248
  def __del__(self):
209
249
  driver = NVVM()
210
250
  err = driver.nvvmDestroyProgram(byref(self._handle))
@@ -230,60 +270,35 @@ class CompilationUnit(object):
230
270
  len(buffer), None)
231
271
  self.driver.check_error(err, 'Failed to add module')
232
272
 
233
- def compile(self, **options):
234
- """Perform Compilation.
235
-
236
- Compilation options are accepted as keyword arguments, with the
237
- following considerations:
238
-
239
- - Underscores (`_`) in option names are converted to dashes (`-`), to
240
- match NVVM's option name format.
241
- - Options that take a value will be emitted in the form
242
- "-<name>=<value>".
243
- - Booleans passed as option values will be converted to integers.
244
- - Options which take no value (such as `-gen-lto`) should have a value
245
- of `None` passed in and will be emitted in the form "-<name>".
246
-
247
- For documentation on NVVM compilation options, see the CUDA Toolkit
248
- Documentation:
249
-
250
- https://docs.nvidia.com/cuda/libnvvm-api/index.html#_CPPv418nvvmCompileProgram11nvvmProgramiPPKc
273
+ def verify(self):
251
274
  """
252
-
253
- def stringify_option(k, v):
254
- k = k.replace('_', '-')
255
-
256
- if v is None:
257
- return f'-{k}'
258
-
259
- if isinstance(v, bool):
260
- v = int(v)
261
-
262
- return f'-{k}={v}'
263
-
264
- options = [stringify_option(k, v) for k, v in options.items()]
265
-
266
- c_opts = (c_char_p * len(options))(*[c_char_p(x.encode('utf8'))
267
- for x in options])
268
- # verify
269
- err = self.driver.nvvmVerifyProgram(self._handle, len(options), c_opts)
275
+ Run the NVVM verifier on all code added to the compilation unit.
276
+ """
277
+ err = self.driver.nvvmVerifyProgram(self._handle, self.n_options,
278
+ self.option_ptrs)
270
279
  self._try_error(err, 'Failed to verify\n')
271
280
 
272
- # compile
273
- err = self.driver.nvvmCompileProgram(self._handle, len(options), c_opts)
281
+ def compile(self):
282
+ """
283
+ Compile all modules added to the compilation unit and return the
284
+ resulting PTX or LTO-IR (depending on the options).
285
+ """
286
+ err = self.driver.nvvmCompileProgram(self._handle, self.n_options,
287
+ self.option_ptrs)
274
288
  self._try_error(err, 'Failed to compile\n')
275
289
 
276
- # get result
277
- reslen = c_size_t()
278
- err = self.driver.nvvmGetCompiledResultSize(self._handle, byref(reslen))
290
+ # Get result
291
+ result_size = c_size_t()
292
+ err = self.driver.nvvmGetCompiledResultSize(self._handle,
293
+ byref(result_size))
279
294
 
280
295
  self._try_error(err, 'Failed to get size of compiled result.')
281
296
 
282
- output_buffer = (c_char * reslen.value)()
297
+ output_buffer = (c_char * result_size.value)()
283
298
  err = self.driver.nvvmGetCompiledResult(self._handle, output_buffer)
284
299
  self._try_error(err, 'Failed to get compiled result.')
285
300
 
286
- # get log
301
+ # Get log
287
302
  self.log = self.get_log()
288
303
  if self.log:
289
304
  warnings.warn(self.log, category=NvvmWarning)
@@ -615,40 +630,44 @@ def llvm_replace(llvmir):
615
630
  for decl, fn in replacements:
616
631
  llvmir = llvmir.replace(decl, fn)
617
632
 
618
- llvmir = llvm140_to_70_ir(llvmir)
633
+ llvmir = llvm150_to_70_ir(llvmir)
619
634
 
620
635
  return llvmir
621
636
 
622
637
 
623
- def compile_ir(llvmir, **opts):
638
+ def compile_ir(llvmir, **options):
624
639
  if isinstance(llvmir, str):
625
640
  llvmir = [llvmir]
626
641
 
627
- if opts.pop('fastmath', False):
628
- opts.update({
642
+ if options.pop('fastmath', False):
643
+ options.update({
629
644
  'ftz': True,
630
645
  'fma': True,
631
646
  'prec_div': False,
632
647
  'prec_sqrt': False,
633
648
  })
634
649
 
635
- cu = CompilationUnit()
636
- libdevice = LibDevice()
650
+ cu = CompilationUnit(options)
637
651
 
638
652
  for mod in llvmir:
639
653
  mod = llvm_replace(mod)
640
654
  cu.add_module(mod.encode('utf8'))
655
+ cu.verify()
656
+
657
+ # We add libdevice following verification so that it is not subject to the
658
+ # verifier's requirements
659
+ libdevice = LibDevice()
641
660
  cu.lazy_add_module(libdevice.get())
642
661
 
643
- return cu.compile(**opts)
662
+ return cu.compile()
644
663
 
645
664
 
646
665
  re_attributes_def = re.compile(r"^attributes #\d+ = \{ ([\w\s]+)\ }")
647
666
 
648
667
 
649
- def llvm140_to_70_ir(ir):
668
+ def llvm150_to_70_ir(ir):
650
669
  """
651
- Convert LLVM 14.0 IR for LLVM 7.0.
670
+ Convert LLVM 15.0 IR for LLVM 7.0.
652
671
  """
653
672
  buf = []
654
673
  for line in ir.splitlines():
@@ -968,6 +968,10 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
968
968
 
969
969
  A (template, pysig, args, kws) tuple is returned.
970
970
  """
971
+ # Fold keyword arguments and resolve default values
972
+ pysig, args = self._compiler.fold_argument_types(args, kws)
973
+ kws = {}
974
+
971
975
  # Ensure an exactly-matching overload is available if we can
972
976
  # compile. We proceed with the typing even if we can't compile
973
977
  # because we may be able to force a cast on the caller side.
@@ -5,7 +5,8 @@ import numpy as np
5
5
 
6
6
  from numba import cuda, config
7
7
  from numba.core.runtime.nrt import _nrt_mstats
8
- from numba.cuda.cudadrv.driver import Linker, driver, launch_kernel
8
+ from numba.cuda.cudadrv.driver import (Linker, driver, launch_kernel,
9
+ USE_NV_BINDING)
9
10
  from numba.cuda.cudadrv import devices
10
11
  from numba.cuda.api import get_current_device
11
12
  from numba.cuda.utils import _readenv
@@ -128,6 +129,18 @@ class _Runtime:
128
129
  cooperative=False
129
130
  )
130
131
 
132
+ def _ctypes_pointer(self, array):
133
+ """
134
+ Given an array, return a ctypes pointer to the data suitable for
135
+ passing to ``launch_kernel``.
136
+ """
137
+ ptr = array.device_ctypes_pointer
138
+
139
+ if USE_NV_BINDING:
140
+ ptr = ctypes.c_void_p(int(ptr))
141
+
142
+ return ptr
143
+
131
144
  def ensure_initialized(self, stream=None):
132
145
  """
133
146
  If memsys is not initialized, initialize memsys
@@ -174,12 +187,13 @@ class _Runtime:
174
187
  context
175
188
  """
176
189
  enabled_ar = cuda.managed_array(1, np.uint8)
190
+ enabled_ptr = self._ctypes_pointer(enabled_ar)
177
191
 
178
192
  self._single_thread_launch(
179
193
  self._memsys_module,
180
194
  stream,
181
195
  "NRT_MemSys_stats_enabled",
182
- (enabled_ar.device_ctypes_pointer,)
196
+ (enabled_ptr,)
183
197
  )
184
198
 
185
199
  cuda.synchronize()
@@ -198,12 +212,13 @@ class _Runtime:
198
212
  ])
199
213
 
200
214
  stats_for_read = cuda.managed_array(1, dt)
215
+ stats_ptr = self._ctypes_pointer(stats_for_read)
201
216
 
202
217
  self._single_thread_launch(
203
218
  self._memsys_module,
204
219
  stream,
205
220
  "NRT_MemSys_read",
206
- [stats_for_read.device_ctypes_pointer]
221
+ [stats_ptr]
207
222
  )
208
223
  cuda.synchronize()
209
224
 
@@ -231,11 +246,13 @@ class _Runtime:
231
246
  Get a single stat from the memsys
232
247
  """
233
248
  got = cuda.managed_array(1, np.uint64)
249
+ got_ptr = self._ctypes_pointer(got)
250
+
234
251
  self._single_thread_launch(
235
252
  self._memsys_module,
236
253
  stream,
237
254
  f"NRT_MemSys_read_{stat}",
238
- [got.device_ctypes_pointer]
255
+ [got_ptr]
239
256
  )
240
257
 
241
258
  cuda.synchronize()
@@ -294,11 +311,13 @@ class _Runtime:
294
311
  raise RuntimeError(
295
312
  "Please allocate NRT Memsys first before setting to module.")
296
313
 
314
+ memsys_ptr = self._ctypes_pointer(self._memsys)
315
+
297
316
  self._single_thread_launch(
298
317
  module,
299
318
  stream,
300
319
  "NRT_MemSys_set",
301
- [self._memsys.device_ctypes_pointer,]
320
+ [memsys_ptr]
302
321
  )
303
322
 
304
323
  @_alloc_init_guard
@@ -261,7 +261,8 @@ class TestLinker(CUDATestCase):
261
261
 
262
262
 
263
263
  @unittest.skipIf(
264
- not PYNVJITLINK_INSTALLED, reason="Pynvjitlink is not installed"
264
+ not PYNVJITLINK_INSTALLED or not TEST_BIN_DIR,
265
+ reason="pynvjitlink not enabled"
265
266
  )
266
267
  class TestLinkerUsage(CUDATestCase):
267
268
  """Test that whether pynvjitlink can be enabled by both environment variable
@@ -1,4 +1,4 @@
1
- from numba.tests.support import override_config
1
+ from numba.tests.support import (override_config, captured_stdout)
2
2
  from numba.cuda.testing import skip_on_cudasim
3
3
  from numba import cuda
4
4
  from numba.core import types
@@ -268,7 +268,7 @@ class TestCudaDebugInfo(CUDATestCase):
268
268
  three_device_fns(kernel_debug=False, leaf_debug=True)
269
269
  three_device_fns(kernel_debug=False, leaf_debug=False)
270
270
 
271
- def test_kernel_args_types(self):
271
+ def _test_kernel_args_types(self):
272
272
  sig = (types.int32, types.int32)
273
273
 
274
274
  @cuda.jit("void(int32, int32)", debug=True, opt=False)
@@ -298,6 +298,15 @@ class TestCudaDebugInfo(CUDATestCase):
298
298
  match = re.compile(pat).search(llvm_ir)
299
299
  self.assertIsNotNone(match, msg=llvm_ir)
300
300
 
301
+ def test_kernel_args_types(self):
302
+ self._test_kernel_args_types()
303
+
304
+ def test_kernel_args_types_dump(self):
305
+ # see issue#135
306
+ with override_config('DUMP_LLVM', 1):
307
+ with captured_stdout():
308
+ self._test_kernel_args_types()
309
+
301
310
 
302
311
  if __name__ == '__main__':
303
312
  unittest.main()
@@ -56,6 +56,10 @@ def target_overloaded_calls_target_overloaded():
56
56
  pass
57
57
 
58
58
 
59
+ def default_values_and_kwargs():
60
+ pass
61
+
62
+
59
63
  # To recognise which functions are resolved for a call, we identify each with a
60
64
  # prime number. Each function called multiplies a value by its prime (starting
61
65
  # with the value 1), and we can check that the result is as expected based on
@@ -185,6 +189,13 @@ def ol_generic_calls_target_overloaded_cuda(x):
185
189
  return impl
186
190
 
187
191
 
192
+ @overload(default_values_and_kwargs)
193
+ def ol_default_values_and_kwargs(out, x, y=5, z=6):
194
+ def impl(out, x, y=5, z=6):
195
+ out[0], out[1] = x + y, z
196
+ return impl
197
+
198
+
188
199
  @skip_on_cudasim('Overloading not supported in cudasim')
189
200
  class TestOverload(CUDATestCase):
190
201
  def check_overload(self, kernel, expected):
@@ -330,6 +341,18 @@ class TestOverload(CUDATestCase):
330
341
  def cuda_target_attr_use(res, dummy):
331
342
  res[0] = dummy.cuda_only
332
343
 
344
+ def test_default_values_and_kwargs(self):
345
+ """
346
+ Test default values and kwargs.
347
+ """
348
+ @cuda.jit()
349
+ def kernel(a, b, out):
350
+ default_values_and_kwargs(out, a, z=b)
351
+
352
+ out = np.empty(2, dtype=np.int64)
353
+ kernel[1,1](1, 2, out)
354
+ self.assertEqual(tuple(out), (6, 2))
355
+
333
356
 
334
357
  if __name__ == '__main__':
335
358
  unittest.main()
@@ -5,7 +5,6 @@ import numpy as np
5
5
  import unittest
6
6
  from numba.cuda.testing import CUDATestCase
7
7
 
8
- from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_ones, cuda_arange
9
8
  from numba.tests.support import run_in_subprocess, override_config
10
9
 
11
10
  from numba import cuda
@@ -24,7 +23,7 @@ class TestNrtBasic(CUDATestCase):
24
23
 
25
24
  @cuda.jit
26
25
  def g():
27
- x = cuda_empty(10, np.int64)
26
+ x = np.empty(10, np.int64)
28
27
  f(x)
29
28
 
30
29
  g[1,1]()
@@ -37,7 +36,7 @@ class TestNrtBasic(CUDATestCase):
37
36
 
38
37
  @cuda.jit
39
38
  def g():
40
- x = cuda_empty(10, np.int64)
39
+ x = np.empty(10, np.int64)
41
40
  f(x)
42
41
 
43
42
  g[1,1]()
@@ -66,7 +65,7 @@ class TestNrtBasic(CUDATestCase):
66
65
 
67
66
  @cuda.jit
68
67
  def g(out_ary):
69
- x = cuda_empty(10, np.int64)
68
+ x = np.empty(10, np.int64)
70
69
  x[5] = 1
71
70
  y = f(x)
72
71
  out_ary[0] = y[0]
@@ -97,11 +96,11 @@ class TestNrtStatistics(CUDATestCase):
97
96
  src = """if 1:
98
97
  from numba import cuda
99
98
  from numba.cuda.runtime import rtsys
100
- from numba.cuda.tests.nrt.mock_numpy import cuda_arange
99
+ import numpy as np
101
100
 
102
101
  @cuda.jit
103
102
  def foo():
104
- x = cuda_arange(10)[0]
103
+ x = np.arange(10)[0]
105
104
 
106
105
  # initialize the NRT before use
107
106
  rtsys.initialize()
@@ -167,8 +166,8 @@ class TestNrtStatistics(CUDATestCase):
167
166
 
168
167
  @cuda.jit
169
168
  def foo():
170
- tmp = cuda_ones(3)
171
- arr = cuda_arange(5 * tmp[0]) # noqa: F841
169
+ tmp = np.ones(3)
170
+ arr = np.arange(5 * tmp[0]) # noqa: F841
172
171
  return None
173
172
 
174
173
  with (
@@ -230,6 +229,38 @@ class TestNrtStatistics(CUDATestCase):
230
229
  stats_func()
231
230
  self.assertIn("NRT stats are disabled.", str(raises.exception))
232
231
 
232
+ def test_read_one_stat(self):
233
+ @cuda.jit
234
+ def foo():
235
+ tmp = np.ones(3)
236
+ arr = np.arange(5 * tmp[0]) # noqa: F841
237
+ return None
238
+
239
+ with (
240
+ override_config('CUDA_ENABLE_NRT', True),
241
+ override_config('CUDA_NRT_STATS', True)
242
+ ):
243
+
244
+ # Switch on stats
245
+ rtsys.memsys_enable_stats()
246
+
247
+ # Launch the kernel a couple of times to increase stats
248
+ foo[1, 1]()
249
+ foo[1, 1]()
250
+
251
+ # Get stats struct and individual stats
252
+ stats = rtsys.get_allocation_stats()
253
+ stats_alloc = rtsys.memsys_get_stats_alloc()
254
+ stats_mi_alloc = rtsys.memsys_get_stats_mi_alloc()
255
+ stats_free = rtsys.memsys_get_stats_free()
256
+ stats_mi_free = rtsys.memsys_get_stats_mi_free()
257
+
258
+ # Check individual stats match stats struct
259
+ self.assertEqual(stats.alloc, stats_alloc)
260
+ self.assertEqual(stats.mi_alloc, stats_mi_alloc)
261
+ self.assertEqual(stats.free, stats_free)
262
+ self.assertEqual(stats.mi_free, stats_mi_free)
263
+
233
264
 
234
265
  if __name__ == '__main__':
235
266
  unittest.main()
@@ -4,7 +4,6 @@ from numba.tests.support import override_config
4
4
  from numba.cuda.runtime import rtsys
5
5
  from numba.cuda.tests.support import EnableNRTStatsMixin
6
6
  from numba.cuda.testing import CUDATestCase
7
- from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_empty_like
8
7
 
9
8
  from numba import cuda
10
9
 
@@ -34,7 +33,7 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
34
33
  @cuda.jit
35
34
  def kernel():
36
35
  for i in range(n):
37
- temp = cuda_empty(2, np.float64) # noqa: F841
36
+ temp = np.empty(2) # noqa: F841
38
37
  return None
39
38
 
40
39
  init_stats = rtsys.get_allocation_stats()
@@ -51,7 +50,7 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
51
50
  @cuda.jit
52
51
  def g(n):
53
52
 
54
- x = cuda_empty((n, 2), np.float64)
53
+ x = np.empty((n, 2))
55
54
 
56
55
  for i in range(n):
57
56
  y = x[i]
@@ -73,13 +72,13 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
73
72
  """
74
73
  @cuda.jit
75
74
  def if_with_allocation_and_initialization(arr1, test1):
76
- tmp_arr = cuda_empty_like(arr1)
75
+ tmp_arr = np.empty_like(arr1)
77
76
 
78
77
  for i in range(tmp_arr.shape[0]):
79
78
  pass
80
79
 
81
80
  if test1:
82
- cuda_empty_like(arr1)
81
+ np.empty_like(arr1)
83
82
 
84
83
  arr = np.random.random((5, 5)) # the values are not consumed
85
84
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: numba-cuda
3
- Version: 0.6.0
3
+ Version: 0.8.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -12,6 +12,7 @@ Requires-Python: >=3.9
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: numba>=0.59.1
15
+ Dynamic: license-file
15
16
 
16
17
  <div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
17
18
 
@@ -1,6 +1,6 @@
1
1
  _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
2
2
  _numba_cuda_redirector.py,sha256=QKJmYICSQvjvph0Zw9OW015MsuKxIF28GPFjR35AXLM,2681
3
- numba_cuda/VERSION,sha256=l6XW5UCmEg0Jw53bZn4Ojiusf8wv_vgTuC4I_WA2W84,6
3
+ numba_cuda/VERSION,sha256=pmeA2iMQO-rxJDK0GFCJZrMaKjp4f5RopbbNqoZnwe8,6
4
4
  numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
5
5
  numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
6
6
  numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
@@ -9,7 +9,7 @@ numba_cuda/numba/cuda/api_util.py,sha256=aQfUV2-4RM_oGVvckMjbMr5e3effOQNX04v1T0O
9
9
  numba_cuda/numba/cuda/args.py,sha256=HloHkw_PQal2DT-I70Xf_XbnGObS1jiUgcRrQ85Gq28,1978
10
10
  numba_cuda/numba/cuda/cg.py,sha256=9V1uZqyGOJX1aFd9c6GAPbLSqq83lE8LoP-vxxrKENY,1490
11
11
  numba_cuda/numba/cuda/codegen.py,sha256=ghdYBKZ3Mzk2UlLE64HkrAjb60PN9fibSNkWFRQuj4M,13184
12
- numba_cuda/numba/cuda/compiler.py,sha256=aWP_aunOOw8RZsTKf-S3YdH5MDkY6kLN5Xr5B2XgOfk,24214
12
+ numba_cuda/numba/cuda/compiler.py,sha256=zwTPJ7JkW3dj8rkAuYFh3jBSpT4qNow97YcXluhPatI,24323
13
13
  numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=iv84_F6Q9kFjV_kclrQz1msh6Dud8mI3qNkswTid7Qc,953
14
14
  numba_cuda/numba/cuda/cuda_fp16.h,sha256=1IC0mdNdkvKbvAe0-f4uYVS7WFrVqOyI1nRUbBiqr6A,126844
15
15
  numba_cuda/numba/cuda/cuda_fp16.hpp,sha256=vJ7NUr2X2tKhAP7ojydAiCoOjVO6n4QGoXD6m9Srrlw,89130
@@ -22,7 +22,7 @@ numba_cuda/numba/cuda/decorators.py,sha256=MqmbEXVVgIV1G_feYtccKBRTDL0VALWf0Ljbr
22
22
  numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
23
23
  numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
24
24
  numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
25
- numba_cuda/numba/cuda/dispatcher.py,sha256=j2nAjlqNAIAoQVCQ4ZQD--hQDsnFLXedlvaXdCMNKEc,44354
25
+ numba_cuda/numba/cuda/dispatcher.py,sha256=HxAlWeMHcipagMdGsfKOhggffUJBl2JB12h_MmWbPZ4,44500
26
26
  numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
27
27
  numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
28
28
  numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
@@ -60,7 +60,7 @@ numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=bWBvnndrzWu24SXm7cilCwNFXS
60
60
  numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
61
61
  numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
62
62
  numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=XM9_Vllv7HzH5wZIR2lwFictyX68XDtNbyLkXlL6NTI,11003
63
- numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=cAoQmZ0bO8i3wPTQq5D0UeMtfnXdGebqYpU4W0kUIEY,24237
63
+ numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=AgrWDNnGfIjvnTsQcEix60EnhFSI8Nbg7oOf5VWk87g,25038
64
64
  numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=WdeUoWzsYNYodx8kMRLVIjnNs0QzwpCihd2Q0AaqItE,226
65
65
  numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=Tj9ACrzQqNmDSO6xfpzw12EsQknSywQ-ZGuWMbDdHnQ,4255
66
66
  numba_cuda/numba/cuda/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -70,7 +70,7 @@ numba_cuda/numba/cuda/runtime/__init__.py,sha256=rDi_pA5HnwpuwT8wwy0hparfO7HWgfj
70
70
  numba_cuda/numba/cuda/runtime/memsys.cu,sha256=5nTXrstrUBVLeLvnDUReyhRGvVILK--VdM1u3oUCa2o,2386
71
71
  numba_cuda/numba/cuda/runtime/memsys.cuh,sha256=4oDvs7LvcMmdkN58b8e0nBqPka_sdagoULSKRut74DY,503
72
72
  numba_cuda/numba/cuda/runtime/nrt.cu,sha256=WB7jQxT1bLdkY6Tm7-_ytVLjJxK4iU0OFifbPIpLwvw,5403
73
- numba_cuda/numba/cuda/runtime/nrt.py,sha256=pmacryGZn25IHjdRMwT2vZipdtu0xsjpPDic_hlRxkA,9195
73
+ numba_cuda/numba/cuda/runtime/nrt.py,sha256=j_LK8kNa3mla-Bkhoupmy4fgbM8ws0wqhiek0tbf3FQ,9683
74
74
  numba_cuda/numba/cuda/simulator/__init__.py,sha256=crW0VQ_8e7DMRSHKoAIziZ37ea5mpbh_49tR9M3d5YY,1610
75
75
  numba_cuda/numba/cuda/simulator/api.py,sha256=K_fX-w9X4grGx2IAp0XlBW9rth5l7wibMwinQvkE7Jc,3237
76
76
  numba_cuda/numba/cuda/simulator/compiler.py,sha256=eXnvmzSKzIZZzBz6ZFJ-vMNyRAgqbCiB-AO5IJXuUyM,232
@@ -111,7 +111,7 @@ numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py,sha256=0KPe4E9wOZsSV_0QI0Lmj
111
111
  numba_cuda/numba/cuda/tests/cudadrv/test_linker.py,sha256=_l2_EQEko2Jet5ooj4XMT0L4BjOuqLjbONGj1_MVI50,10161
112
112
  numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py,sha256=kYXYMkx_3GPAITKp4reLeM8KSzKkpxiC8nxnBvXpaTA,4979
113
113
  numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py,sha256=984jATSa01SRoSrVqxPeO6ujJ7w2jsnZa39ABInFLVI,1529
114
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=oZywLDuX-l1LJvIIU4QCsE7UCwtIKbBP7u6GxVDpD_g,11316
114
+ numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=EPS4-U2tnqFCG-QF-9j2POMKaYiWogHIbpknMwdYGD8,11335
115
115
  numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=DF7KV5uh-yMztks0f47NhpalV64dvsNy-f8HY6GhAhE,7373
116
116
  numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py,sha256=u_TthSS2N-2J4eBIuF4PGg33AjD-wxly7MKpz0vRAKc,944
117
117
  numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py,sha256=MQWZx1j3lbEpWmIpQ1bV9szrGOV3VHN0QrEnJRjAhW4,508
@@ -146,7 +146,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=73FCQbNaA
146
146
  numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=y7cNQZOZJo5Sv16ql3E5QaRksw-U3RkXss9YDcNeiTk,2137
147
147
  numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=2in1Cq8y9zAFoka7H72wF1D0awEd3n7bv56sUPgoNAQ,3508
148
148
  numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=3MYNiMe75rgBF1T0vsJ7r-nkW5jPvov_tDms9KXo2UU,3449
149
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=jI43jMbPS9Rbr3YI2mZBrDwH9MGjmyVlczv7QxxPoAs,10993
149
+ numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=SbeIIASsv5eZapp6i9KZlztN3-OPFiXg6YmtbYIh0eY,11288
150
150
  numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=eDVymTQXTzW0WeAgTMDKYtOi1YAM310IUxGp3Y1ICjs,13162
151
151
  numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=oX-l_L4H8rME1IolwhAyordSGJ152nnuqGAFdWjfgas,26587
152
152
  numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=0GWiwvZ1FTzSl1FfMxttkWaWrowASfXrSDT8XAR4ZHw,3560
@@ -182,7 +182,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py,sha256=MfCbyJZu1XsCJOCSw
182
182
  numba_cuda/numba/cuda/tests/cudapy/test_nondet.py,sha256=mYMX0R1tmBLRe5ZAwiDVFFuSyMuPav5guuqL3WHWGPY,1378
183
183
  numba_cuda/numba/cuda/tests/cudapy/test_operator.py,sha256=0nJej4D898_JU-jhlif44fR2yu42keK4GoCLP810l3U,13295
184
184
  numba_cuda/numba/cuda/tests/cudapy/test_optimization.py,sha256=IRTI-b7hwMaJxtxFRzoTjpzzeqWGzNyCJPT6C4GugX4,2925
185
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py,sha256=u4yUDVFcV9E3NWMlNjM81e3IW4KaIkcDtXig8JYevsw,8538
185
+ numba_cuda/numba/cuda/tests/cudapy/test_overload.py,sha256=27olU7CNkKD0vf4BgBHhEhWPbGm1Y83wfSsQtZMJvXg,9087
186
186
  numba_cuda/numba/cuda/tests/cudapy/test_powi.py,sha256=TI82rYRnkSnwv9VN6PMpBnr9JqMJ_F3HhH4cKY6O8tw,3276
187
187
  numba_cuda/numba/cuda/tests/cudapy/test_print.py,sha256=r2xmMNx80_ANi3uFB3CQt3AHAXG_JdhStY1S796hlK0,4466
188
188
  numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py,sha256=R88Vfgg3mSAZ0Jy6WT6dJNmkFTsxnVnEmO7XqpqyxuU,986
@@ -238,15 +238,14 @@ numba_cuda/numba/cuda/tests/nocuda/test_import.py,sha256=teiL8rpFGQOh41kyBSSNHHF
238
238
  numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA9Ym-iT_B972bgFRu3UkRtwIgWtuI,7948
239
239
  numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
240
240
  numba_cuda/numba/cuda/tests/nrt/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
241
- numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=Cx2DGhm2bJheShP2Ja1w9YLlRTeAMM7u1UYHsPnTzA8,4552
242
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=wByXeagVoxsAu_pmfuYQ7vmeJt82h4VXwCBsDYQfsps,7727
243
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=SnVvTis8YyaqsElRaGQ-34dnWgGavvc2Ovm2xZ_PD3Q,3240
241
+ numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=KYDhlWJRqtIonAEy0-bUvLwPTEVMCDbfFoAXrPWtvqA,8721
242
+ numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=Xbxf07LNiiXzCA0rrmCSe_B-3Oi_8LwjNVVR3Lbc0Hc,3136
244
243
  numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq6pJwHEwmKVmJOJxPBtsMTbnuzqYkik,2679
245
244
  numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=V0raLZLGSiWbE_K-JluI0CnmNkXbhlMVj-TH7P1OV8E,5014
246
245
  numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
247
246
  numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
248
- numba_cuda-0.6.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
249
- numba_cuda-0.6.0.dist-info/METADATA,sha256=iNU56EXHsnAcAcwgNXglPh6H47Quz31_-6r9RevpJ_Q,1836
250
- numba_cuda-0.6.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
251
- numba_cuda-0.6.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
252
- numba_cuda-0.6.0.dist-info/RECORD,,
247
+ numba_cuda-0.8.0.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
248
+ numba_cuda-0.8.0.dist-info/METADATA,sha256=-DMmEoMsrK2h_Xk6hZEovTXhUURYzw9W4KDrl13ehRM,1858
249
+ numba_cuda-0.8.0.dist-info/WHEEL,sha256=tTnHoFhvKQHCh4jz3yCn0WPTYIy7wXx3CJtJ7SJGV7c,91
250
+ numba_cuda-0.8.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
251
+ numba_cuda-0.8.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (77.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,146 +0,0 @@
1
- import math
2
-
3
- import numpy as np
4
-
5
- from numba.core import errors, types
6
- from numba.core.extending import overload
7
- from numba.np.arrayobj import (_check_const_str_dtype, is_nonelike,
8
- ty_parse_dtype, ty_parse_shape, numpy_empty_nd,
9
- numpy_empty_like_nd)
10
-
11
-
12
- # Typical tests for allocation use array construction (e.g. np.zeros, np.empty,
13
- # etc.) to induce allocations. These don't work in the CUDA target because they
14
- # need keyword arguments, which are presently not supported properly in the
15
- # CUDA target.
16
- #
17
- # To work around this, we can define our own function, that works like
18
- # the desired one, except that it uses only positional arguments.
19
- #
20
- # Once the CUDA target supports keyword arguments, this workaround will no
21
- # longer be necessary and the tests in this module should be switched to use
22
- # the relevant NumPy functions instead.
23
- def cuda_empty(shape, dtype):
24
- pass
25
-
26
-
27
- def cuda_empty_like(arr):
28
- pass
29
-
30
-
31
- def cuda_arange(start):
32
- pass
33
-
34
-
35
- def cuda_ones(shape):
36
- pass
37
-
38
-
39
- @overload(cuda_empty)
40
- def ol_cuda_empty(shape, dtype):
41
- _check_const_str_dtype("empty", dtype)
42
- if (dtype is float or
43
- (isinstance(dtype, types.Function) and dtype.typing_key is float) or
44
- is_nonelike(dtype)): #default
45
- nb_dtype = types.double
46
- else:
47
- nb_dtype = ty_parse_dtype(dtype)
48
-
49
- ndim = ty_parse_shape(shape)
50
- if nb_dtype is not None and ndim is not None:
51
- retty = types.Array(dtype=nb_dtype, ndim=ndim, layout='C')
52
-
53
- def impl(shape, dtype):
54
- return numpy_empty_nd(shape, dtype, retty)
55
- return impl
56
- else:
57
- msg = f"Cannot parse input types to function np.empty({shape}, {dtype})"
58
- raise errors.TypingError(msg)
59
-
60
-
61
- @overload(cuda_empty_like)
62
- def ol_cuda_empty_like(arr):
63
-
64
- if isinstance(arr, types.Array):
65
- nb_dtype = arr.dtype
66
- else:
67
- nb_dtype = arr
68
-
69
- if isinstance(arr, types.Array):
70
- layout = arr.layout if arr.layout != 'A' else 'C'
71
- retty = arr.copy(dtype=nb_dtype, layout=layout, readonly=False)
72
- else:
73
- retty = types.Array(nb_dtype, 0, 'C')
74
-
75
- def impl(arr):
76
- dtype = None
77
- return numpy_empty_like_nd(arr, dtype, retty)
78
- return impl
79
-
80
-
81
- def _arange_dtype(*args):
82
- bounds = [a for a in args if not isinstance(a, types.NoneType)]
83
-
84
- if any(isinstance(a, types.Complex) for a in bounds):
85
- dtype = types.complex128
86
- elif any(isinstance(a, types.Float) for a in bounds):
87
- dtype = types.float64
88
- else:
89
- # `np.arange(10).dtype` is always `np.dtype(int)`, aka `np.int_`, which
90
- # in all released versions of numpy corresponds to the C `long` type.
91
- # Windows 64 is broken by default here because Numba (as of 0.47) does
92
- # not differentiate between Python and NumPy integers, so a `typeof(1)`
93
- # on w64 is `int64`, i.e. `intp`. This means an arange(<some int>) will
94
- # be typed as arange(int64) and the following will yield int64 opposed
95
- # to int32. Example: without a load of analysis to work out of the args
96
- # were wrapped in NumPy int*() calls it's not possible to detect the
97
- # difference between `np.arange(10)` and `np.arange(np.int64(10)`.
98
- NPY_TY = getattr(types, "int%s" % (8 * np.dtype(int).itemsize))
99
-
100
- # unliteral these types such that `max` works.
101
- unliteral_bounds = [types.unliteral(x) for x in bounds]
102
- dtype = max(unliteral_bounds + [NPY_TY,])
103
-
104
- return dtype
105
-
106
-
107
- @overload(cuda_arange)
108
- def ol_cuda_arange(start):
109
- """Simplified arange with just 1 argument."""
110
- if (not isinstance(start, types.Number)):
111
- return
112
-
113
- start_value = getattr(start, "literal_value", None)
114
-
115
- def impl(start):
116
- # Allow for improved performance if given literal arguments.
117
- lit_start = start_value if start_value is not None else start
118
-
119
- _step = 1
120
- _start, _stop = 0, lit_start
121
-
122
- nitems_c = (_stop - _start) / _step
123
- nitems_r = int(math.ceil(nitems_c.real))
124
-
125
- # Binary operator needed for compiler branch pruning.
126
- nitems = max(nitems_r, 0)
127
-
128
- arr = cuda_empty(nitems, np.int64)
129
- val = _start
130
- for i in range(nitems):
131
- arr[i] = val + (i * _step)
132
- return arr
133
-
134
- return impl
135
-
136
-
137
- @overload(cuda_ones)
138
- def ol_cuda_ones(shape):
139
-
140
- def impl(shape):
141
- arr = cuda_empty(shape, np.float64)
142
- arr_flat = arr.flat
143
- for idx in range(len(arr_flat)):
144
- arr_flat[idx] = 1
145
- return arr
146
- return impl