numba-cuda 0.18.1__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (88) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +2 -2
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +1 -1
  5. numba_cuda/numba/cuda/api.py +2 -7
  6. numba_cuda/numba/cuda/compiler.py +7 -4
  7. numba_cuda/numba/cuda/core/interpreter.py +3592 -0
  8. numba_cuda/numba/cuda/core/ir_utils.py +2645 -0
  9. numba_cuda/numba/cuda/core/sigutils.py +55 -0
  10. numba_cuda/numba/cuda/cuda_paths.py +9 -17
  11. numba_cuda/numba/cuda/cudadecl.py +1 -1
  12. numba_cuda/numba/cuda/cudadrv/driver.py +4 -19
  13. numba_cuda/numba/cuda/cudadrv/libs.py +1 -2
  14. numba_cuda/numba/cuda/cudadrv/nvrtc.py +44 -44
  15. numba_cuda/numba/cuda/cudadrv/nvvm.py +3 -18
  16. numba_cuda/numba/cuda/cudadrv/runtime.py +12 -1
  17. numba_cuda/numba/cuda/cudamath.py +1 -1
  18. numba_cuda/numba/cuda/decorators.py +4 -3
  19. numba_cuda/numba/cuda/deviceufunc.py +2 -1
  20. numba_cuda/numba/cuda/dispatcher.py +3 -2
  21. numba_cuda/numba/cuda/extending.py +1 -1
  22. numba_cuda/numba/cuda/itanium_mangler.py +211 -0
  23. numba_cuda/numba/cuda/libdevicedecl.py +1 -1
  24. numba_cuda/numba/cuda/libdevicefuncs.py +1 -1
  25. numba_cuda/numba/cuda/lowering.py +1 -1
  26. numba_cuda/numba/cuda/simulator/api.py +1 -1
  27. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -7
  28. numba_cuda/numba/cuda/target.py +1 -2
  29. numba_cuda/numba/cuda/testing.py +4 -6
  30. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +80 -0
  31. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +1 -1
  32. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  33. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +1 -1
  34. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  35. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +1 -1
  36. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +1 -1
  37. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +1 -1
  38. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +4 -6
  39. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -4
  40. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  41. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +1 -3
  42. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +1 -3
  43. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +146 -3
  44. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +1 -1
  45. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -4
  46. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +1 -1
  47. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +1 -1
  48. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  49. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +1 -284
  50. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +473 -0
  51. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +1 -1
  52. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  53. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -6
  54. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +1 -1
  55. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +1 -1
  56. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +295 -0
  57. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +1 -1
  58. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  59. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +1 -1
  60. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +5 -1
  61. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +1 -1
  62. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +1 -1
  63. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +1 -1
  64. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +1 -1
  65. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +1 -1
  66. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +1 -1
  67. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +1 -1
  68. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +1 -1
  69. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +1 -1
  70. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +1 -1
  71. numba_cuda/numba/cuda/tests/nocuda/test_import.py +1 -1
  72. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -2
  73. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +1 -1
  74. numba_cuda/numba/cuda/tests/support.py +752 -0
  75. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -3
  76. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +4 -1
  77. numba_cuda/numba/cuda/typing/__init__.py +8 -0
  78. numba_cuda/numba/cuda/typing/templates.py +1453 -0
  79. numba_cuda/numba/cuda/vector_types.py +3 -3
  80. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/METADATA +21 -28
  81. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/RECORD +84 -79
  82. numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -3749
  83. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -2683
  84. numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -3794
  85. numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -2614
  86. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/WHEEL +0 -0
  87. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/licenses/LICENSE +0 -0
  88. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,55 @@
1
+ from numba.core import types, typing
2
+
3
+
4
+ def is_signature(sig):
5
+ """
6
+ Return whether *sig* is a potentially valid signature
7
+ specification (for user-facing APIs).
8
+ """
9
+ return isinstance(sig, (str, tuple, typing.Signature))
10
+
11
+
12
+ def _parse_signature_string(signature_str):
13
+ """
14
+ Parameters
15
+ ----------
16
+ signature_str : str
17
+ """
18
+ # Just eval signature_str using the types submodules as globals
19
+ return eval(signature_str, {}, types.__dict__)
20
+
21
+
22
+ def normalize_signature(sig):
23
+ """
24
+ From *sig* (a signature specification), return a ``(args, return_type)``
25
+ tuple, where ``args`` itself is a tuple of types, and ``return_type``
26
+ can be None if not specified.
27
+ """
28
+ if isinstance(sig, str):
29
+ parsed = _parse_signature_string(sig)
30
+ else:
31
+ parsed = sig
32
+ if isinstance(parsed, tuple):
33
+ args, return_type = parsed, None
34
+ elif isinstance(parsed, typing.Signature):
35
+ args, return_type = parsed.args, parsed.return_type
36
+ else:
37
+ raise TypeError(
38
+ "invalid signature: %r (type: %r) evaluates to %r "
39
+ "instead of tuple or Signature"
40
+ % (sig, sig.__class__.__name__, parsed.__class__.__name__)
41
+ )
42
+
43
+ def check_type(ty):
44
+ if not isinstance(ty, types.Type):
45
+ raise TypeError(
46
+ "invalid type in signature: expected a type "
47
+ "instance, got %r" % (ty,)
48
+ )
49
+
50
+ if return_type is not None:
51
+ check_type(return_type)
52
+ for ty in args:
53
+ check_type(ty)
54
+
55
+ return args, return_type
@@ -148,7 +148,6 @@ def get_nvrtc_dso_path():
148
148
  # Check for each version of the NVRTC DLL, preferring the most
149
149
  # recent.
150
150
  versions = (
151
- "112" if IS_WIN32 else "11.2",
152
151
  "120" if IS_WIN32 else "12",
153
152
  "130" if IS_WIN32 else "13",
154
153
  )
@@ -303,16 +302,16 @@ def get_nvidia_nvvm_ctk():
303
302
 
304
303
  # Assume the existence of NVVM in the conda env implies that a CUDA toolkit
305
304
  # conda package is installed.
305
+ if IS_WIN32:
306
+ # The path used on Windows
307
+ libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
308
+ else:
309
+ # The path used on Linux is different to that on Windows
310
+ libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
306
311
 
307
- # First, try the location used on Linux and the Windows 11.x packages
308
- libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
309
312
  if not os.path.exists(libdir) or not os.path.isdir(libdir):
310
- # If that fails, try the location used for Windows 12.x packages
311
- libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
312
- if not os.path.exists(libdir) or not os.path.isdir(libdir):
313
- # If that doesn't exist either, assume we don't have the NVIDIA
314
- # conda package
315
- return
313
+ # If the path doesn't exist, we didn't find the NVIDIA conda package
314
+ return
316
315
 
317
316
  paths = find_lib("nvvm", libdir=libdir)
318
317
  if not paths:
@@ -346,15 +345,8 @@ def get_nvidia_static_cudalib_ctk():
346
345
  if not nvvm_ctk:
347
346
  return
348
347
 
349
- if IS_WIN32 and ("Library" not in nvvm_ctk):
350
- # Location specific to CUDA 11.x packages on Windows
351
- dirs = ("Lib", "x64")
352
- else:
353
- # Linux, or Windows with CUDA 12.x packages
354
- dirs = ("lib",)
355
-
356
348
  env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
357
- return os.path.join(env_dir, *dirs)
349
+ return os.path.join(env_dir, "lib")
358
350
 
359
351
 
360
352
  def get_cuda_home(*subdirs):
@@ -9,7 +9,7 @@ from numba.core.typing.npydecl import (
9
9
  math_operations,
10
10
  bit_twiddling_functions,
11
11
  )
12
- from numba.core.typing.templates import (
12
+ from numba.cuda.typing.templates import (
13
13
  AttributeTemplate,
14
14
  ConcreteTemplate,
15
15
  AbstractTemplate,
@@ -54,12 +54,6 @@ from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
54
54
  from numba.cuda.utils import cached_file_read
55
55
  from numba.cuda.cudadrv import enums, drvapi, nvrtc
56
56
 
57
- try:
58
- from pynvjitlink.api import NvJitLinker, NvJitLinkError
59
- except ImportError:
60
- NvJitLinker, NvJitLinkError = None, None
61
-
62
-
63
57
  USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
64
58
 
65
59
  if USE_NV_BINDING:
@@ -640,7 +634,7 @@ class Device(object):
640
634
 
641
635
  if USE_NV_BINDING:
642
636
  buf = driver.cuDeviceGetName(bufsz, self.id)
643
- name = buf.decode("utf-8").rstrip("\0")
637
+ name = buf.split(b"\x00")[0]
644
638
  else:
645
639
  buf = (c_char * bufsz)()
646
640
  driver.cuDeviceGetName(buf, bufsz, self.id)
@@ -2808,19 +2802,10 @@ class _LinkerBase(metaclass=ABCMeta):
2808
2802
  lto=None,
2809
2803
  additional_flags=None,
2810
2804
  ):
2811
- driver_ver = driver.get_version()
2812
- if driver_ver < (12, 0):
2813
- if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2814
- linker = MVCLinker
2815
- elif USE_NV_BINDING:
2816
- linker = _Linker
2817
- else:
2818
- linker = CtypesLinker
2805
+ if USE_NV_BINDING:
2806
+ linker = _Linker
2819
2807
  else:
2820
- if USE_NV_BINDING:
2821
- linker = _Linker
2822
- else:
2823
- linker = CtypesLinker
2808
+ linker = CtypesLinker
2824
2809
 
2825
2810
  params = (max_registers, lineinfo, cc)
2826
2811
  if linker is _Linker:
@@ -2,8 +2,7 @@
2
2
 
3
3
  CUDA Toolkit libraries can be available via either:
4
4
 
5
- - the `cuda-nvcc` and `cuda-nvrtc` conda packages for CUDA 12,
6
- - the `cudatoolkit` conda package for CUDA 11,
5
+ - the `cuda-nvcc` and `cuda-nvrtc` conda packages,
7
6
  - a user supplied location from CUDA_HOME,
8
7
  - a system wide location,
9
8
  - package-specific locations (e.g. the Debian NVIDIA packages),
@@ -29,6 +29,7 @@ nvrtc_program = c_void_p
29
29
  nvrtc_result = c_int
30
30
 
31
31
  if config.CUDA_USE_NVIDIA_BINDING:
32
+ from cuda.bindings import nvrtc as bindings_nvrtc
32
33
  from cuda.core.experimental import Program, ProgramOptions
33
34
 
34
35
 
@@ -142,6 +143,10 @@ class NVRTC:
142
143
 
143
144
  def __new__(cls):
144
145
  with _nvrtc_lock:
146
+ if config.CUDA_USE_NVIDIA_BINDING:
147
+ raise RuntimeError(
148
+ "NVRTC objects should not be used with cuda-python bindings"
149
+ )
145
150
  if cls.__INSTANCE is None:
146
151
  from numba.cuda.cudadrv.libs import open_cudalib
147
152
 
@@ -154,16 +159,9 @@ class NVRTC:
154
159
 
155
160
  # Find & populate functions
156
161
  for name, proto in inst._PROTOTYPES.items():
157
- try:
158
- func = getattr(lib, name)
159
- func.restype = proto[0]
160
- func.argtypes = proto[1:]
161
- except AttributeError:
162
- if "LTOIR" in name:
163
- # CUDA 11 does not have LTOIR functions; ignore
164
- continue
165
- else:
166
- raise
162
+ func = getattr(lib, name)
163
+ func.restype = proto[0]
164
+ func.argtypes = proto[1:]
167
165
 
168
166
  @functools.wraps(func)
169
167
  def checked_call(*args, func=func, name=name):
@@ -303,32 +301,35 @@ def compile(src, name, cc, ltoir=False):
303
301
  :return: The compiled PTX and compilation log
304
302
  :rtype: tuple
305
303
  """
306
- nvrtc = NVRTC()
307
- program = nvrtc.create_program(src, name)
308
304
 
309
- version = nvrtc.get_version()
310
- ver_str = lambda v: ".".join(v)
311
- if version < (11, 2):
312
- raise RuntimeError(
313
- "Unsupported CUDA version. CUDA 11.2 or higher is required."
314
- )
315
- else:
316
- supported_arch = nvrtc.get_supported_archs()
317
- try:
318
- found = max(filter(lambda v: v <= cc, [v for v in supported_arch]))
319
- except ValueError:
305
+ if config.CUDA_USE_NVIDIA_BINDING:
306
+ retcode, *version = bindings_nvrtc.nvrtcVersion()
307
+ if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
320
308
  raise RuntimeError(
321
- f"Device compute capability {ver_str(cc)} is less than the "
322
- f"minimum supported by NVRTC {ver_str(version)}. Supported "
323
- "compute capabilities are "
324
- f"{', '.join([ver_str(v) for v in supported_arch])}."
309
+ f"{retcode.name} when calling nvrtcGetSupportedArchs()"
325
310
  )
311
+ version = tuple(version)
312
+ else:
313
+ nvrtc = NVRTC()
314
+ version = nvrtc.get_version()
326
315
 
327
- if found != cc:
328
- warnings.warn(
329
- f"Device compute capability {ver_str(cc)} is not supported by "
330
- f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
331
- )
316
+ ver_str = lambda version: ".".join(str(v) for v in version)
317
+ supported_ccs = get_supported_ccs()
318
+ try:
319
+ found = max(filter(lambda v: v <= cc, [v for v in supported_ccs]))
320
+ except ValueError:
321
+ raise RuntimeError(
322
+ f"Device compute capability {ver_str(cc)} is less than the "
323
+ f"minimum supported by NVRTC {ver_str(version)}. Supported "
324
+ "compute capabilities are "
325
+ f"{', '.join([ver_str(v) for v in supported_ccs])}."
326
+ )
327
+
328
+ if found != cc:
329
+ warnings.warn(
330
+ f"Device compute capability {ver_str(cc)} is not supported by "
331
+ f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
332
+ )
332
333
 
333
334
  # Compilation options:
334
335
  # - Compile for the current device's compute capability.
@@ -348,16 +349,10 @@ def compile(src, name, cc, ltoir=False):
348
349
  f"{os.path.join(cuda_include_dir, 'cccl')}",
349
350
  ]
350
351
 
351
- nvrtc_version = nvrtc.get_version()
352
- nvrtc_ver_major = nvrtc_version[0]
353
-
354
352
  cudadrv_path = os.path.dirname(os.path.abspath(__file__))
355
353
  numba_cuda_path = os.path.dirname(cudadrv_path)
356
354
 
357
- if nvrtc_ver_major == 11:
358
- numba_include = f"{os.path.join(numba_cuda_path, 'include', '11')}"
359
- else:
360
- numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
355
+ numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
361
356
 
362
357
  if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
363
358
  extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
@@ -373,7 +368,6 @@ def compile(src, name, cc, ltoir=False):
373
368
  arch=arch,
374
369
  include_path=includes,
375
370
  relocatable_device_code=True,
376
- std="c++17" if nvrtc_version < (12, 0) else None,
377
371
  link_time_optimization=ltoir,
378
372
  name=name,
379
373
  )
@@ -399,6 +393,7 @@ def compile(src, name, cc, ltoir=False):
399
393
  return result, log
400
394
 
401
395
  else:
396
+ program = nvrtc.create_program(src, name)
402
397
  includes = [f"-I{path}" for path in includes]
403
398
  options = [
404
399
  arch,
@@ -410,9 +405,6 @@ def compile(src, name, cc, ltoir=False):
410
405
  if ltoir:
411
406
  options.append("-dlto")
412
407
 
413
- if nvrtc_version < (12, 0):
414
- options.append("-std=c++17")
415
-
416
408
  # Compile the program
417
409
  compile_error = nvrtc.compile_program(program, options)
418
410
 
@@ -482,4 +474,12 @@ def get_lowest_supported_cc():
482
474
 
483
475
 
484
476
  def get_supported_ccs():
485
- return NVRTC().get_supported_archs()
477
+ if config.CUDA_USE_NVIDIA_BINDING:
478
+ retcode, archs = bindings_nvrtc.nvrtcGetSupportedArchs()
479
+ if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
480
+ raise RuntimeError(
481
+ f"{retcode.name} when calling nvrtcGetSupportedArchs()"
482
+ )
483
+ return [(arch // 10, arch % 10) for arch in archs]
484
+ else:
485
+ return NVRTC().get_supported_archs()
@@ -47,14 +47,7 @@ NVVM_ERROR_COMPILATION
47
47
  for i, k in enumerate(RESULT_CODE_NAMES):
48
48
  setattr(sys.modules[__name__], k, i)
49
49
 
50
- # Data layouts. NVVM IR 1.8 (CUDA 11.6) introduced 128-bit integer support.
51
-
52
- _datalayout_original = (
53
- "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
54
- "i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
55
- "v64:64:64-v128:128:128-n16:32:64"
56
- )
57
- _datalayout_i128 = (
50
+ _datalayout = (
58
51
  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
59
52
  "i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
60
53
  "v64:64:64-v128:128:128-n16:32:64"
@@ -182,10 +175,7 @@ class NVVM(object):
182
175
 
183
176
  @property
184
177
  def data_layout(self):
185
- if (self._majorIR, self._minorIR) < (1, 8):
186
- return _datalayout_original
187
- else:
188
- return _datalayout_i128
178
+ return _datalayout
189
179
 
190
180
  def get_version(self):
191
181
  major = c_int()
@@ -346,14 +336,9 @@ class CompilationUnit(object):
346
336
 
347
337
 
348
338
  MISSING_LIBDEVICE_FILE_MSG = """Missing libdevice file.
349
- Please ensure you have a CUDA Toolkit 11.2 or higher.
350
- For CUDA 12, ``cuda-nvcc`` and ``cuda-nvrtc`` are required:
339
+ ``cuda-nvcc`` and ``cuda-nvrtc`` are required:
351
340
 
352
341
  $ conda install -c conda-forge cuda-nvcc cuda-nvrtc "cuda-version>=12.0"
353
-
354
- For CUDA 11, ``cudatoolkit`` is required:
355
-
356
- $ conda install -c conda-forge cudatoolkit "cuda-version>=11.2,<12.0"
357
342
  """
358
343
 
359
344
 
@@ -5,12 +5,23 @@ The toolkit version can now be obtained from NVRTC, so we don't use a binding
5
5
  to the runtime anymore. This file is provided to maintain the existing API.
6
6
  """
7
7
 
8
+ from numba import config
8
9
  from numba.cuda.cudadrv.nvrtc import NVRTC
9
10
 
10
11
 
11
12
  class Runtime:
12
13
  def get_version(self):
13
- return NVRTC().get_version()
14
+ if config.CUDA_USE_NVIDIA_BINDING:
15
+ from cuda.bindings import nvrtc
16
+
17
+ retcode, *version = nvrtc.nvrtcVersion()
18
+ if retcode != nvrtc.nvrtcResult.NVRTC_SUCCESS:
19
+ raise RuntimeError(
20
+ f"{retcode.name} when calling nvrtcGetVersion()"
21
+ )
22
+ return tuple(version)
23
+ else:
24
+ return NVRTC().get_version()
14
25
 
15
26
 
16
27
  runtime = Runtime()
@@ -1,6 +1,6 @@
1
1
  import math
2
2
  from numba.core import types
3
- from numba.core.typing.templates import ConcreteTemplate, signature, Registry
3
+ from numba.cuda.typing.templates import ConcreteTemplate, signature, Registry
4
4
 
5
5
 
6
6
  registry = Registry()
@@ -1,7 +1,8 @@
1
1
  from warnings import warn
2
- from numba.core import types, config, sigutils
2
+ from numba.core import types, config
3
3
  from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
4
4
  from numba.cuda.compiler import declare_device_function
5
+ from numba.cuda.core import sigutils
5
6
  from numba.cuda.dispatcher import CUDADispatcher
6
7
  from numba.cuda.simulator.kernel import FakeCUDAKernel
7
8
  from numba.cuda.cudadrv.driver import _have_nvjitlink
@@ -86,7 +87,7 @@ def jit(
86
87
  number of threads per block.
87
88
  :type launch_bounds: int | tuple[int]
88
89
  :param lto: Whether to enable LTO. If unspecified, LTO is enabled by
89
- default when pynvjitlink is available, except for kernels where
90
+ default when nvjitlink is available, except for kernels where
90
91
  ``debug=True``.
91
92
  :type lto: bool
92
93
  """
@@ -143,7 +144,7 @@ def jit(
143
144
  raise ValueError("link keyword invalid for device function")
144
145
 
145
146
  if lto is None:
146
- # Default to using LTO if pynvjitlink is available and we're not debugging
147
+ # Default to using LTO if nvjitlink is available and we're not debugging
147
148
  lto = _have_nvjitlink() and not debug
148
149
  else:
149
150
  if lto and not _have_nvjitlink():
@@ -11,8 +11,9 @@ from functools import reduce
11
11
  import numpy as np
12
12
 
13
13
  from numba.np.ufunc.ufuncbuilder import _BaseUFuncBuilder, parse_identity
14
- from numba.core import types, sigutils
14
+ from numba.core import types
15
15
  from numba.core.typing import signature
16
+ from numba.cuda.core import sigutils
16
17
  from numba.np.ufunc.sigparse import parse_signature
17
18
 
18
19
 
@@ -8,13 +8,13 @@ import types as pytypes
8
8
  import weakref
9
9
  import uuid
10
10
 
11
- from numba.core import compiler, sigutils, types, typing, config
11
+ from numba.core import compiler, types, typing, config
12
12
  from numba.cuda import serialize, utils
13
13
  from numba.cuda.core.caching import Cache, CacheImpl, NullCache
14
14
  from numba.core.compiler_lock import global_compiler_lock
15
15
  from numba.core.dispatcher import _DispatcherBase
16
16
  from numba.core.errors import NumbaPerformanceWarning, TypingError
17
- from numba.core.typing.templates import fold_arguments
17
+ from numba.cuda.typing.templates import fold_arguments
18
18
  from numba.core.typing.typeof import Purpose, typeof
19
19
  from numba.cuda.api import get_current_device
20
20
  from numba.cuda.args import wrap_arg
@@ -23,6 +23,7 @@ from numba.cuda.compiler import (
23
23
  CUDACompiler,
24
24
  kernel_fixup,
25
25
  )
26
+ from numba.cuda.core import sigutils
26
27
  import re
27
28
  from numba.cuda.cudadrv import driver, nvvm
28
29
  from numba.cuda.cudadrv.linkable_code import LinkableCode
@@ -18,7 +18,7 @@ def make_attribute_wrapper(typeclass, struct_attr, python_attr):
18
18
  Vendored from numba.core.extending with a change to consider the CUDA data
19
19
  model manager.
20
20
  """
21
- from numba.core.typing.templates import AttributeTemplate
21
+ from numba.cuda.typing.templates import AttributeTemplate
22
22
 
23
23
  from numba.core.datamodel import default_manager
24
24
  from numba.core.datamodel.models import StructModel