numba-cuda 0.18.1__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +2 -2
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +1 -1
- numba_cuda/numba/cuda/api.py +2 -7
- numba_cuda/numba/cuda/compiler.py +7 -4
- numba_cuda/numba/cuda/core/interpreter.py +3592 -0
- numba_cuda/numba/cuda/core/ir_utils.py +2645 -0
- numba_cuda/numba/cuda/core/sigutils.py +55 -0
- numba_cuda/numba/cuda/cuda_paths.py +9 -17
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +4 -19
- numba_cuda/numba/cuda/cudadrv/libs.py +1 -2
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +44 -44
- numba_cuda/numba/cuda/cudadrv/nvvm.py +3 -18
- numba_cuda/numba/cuda/cudadrv/runtime.py +12 -1
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/decorators.py +4 -3
- numba_cuda/numba/cuda/deviceufunc.py +2 -1
- numba_cuda/numba/cuda/dispatcher.py +3 -2
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/itanium_mangler.py +211 -0
- numba_cuda/numba/cuda/libdevicedecl.py +1 -1
- numba_cuda/numba/cuda/libdevicefuncs.py +1 -1
- numba_cuda/numba/cuda/lowering.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -7
- numba_cuda/numba/cuda/target.py +1 -2
- numba_cuda/numba/cuda/testing.py +4 -6
- numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +80 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +4 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +146 -3
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -4
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +1 -284
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +473 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -6
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +295 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +5 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +1 -1
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +1 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -2
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +1 -1
- numba_cuda/numba/cuda/tests/support.py +752 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -3
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +4 -1
- numba_cuda/numba/cuda/typing/__init__.py +8 -0
- numba_cuda/numba/cuda/typing/templates.py +1453 -0
- numba_cuda/numba/cuda/vector_types.py +3 -3
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/METADATA +21 -28
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/RECORD +84 -79
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -3749
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -2683
- numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -3794
- numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -2614
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from numba.core import types, typing
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def is_signature(sig):
|
|
5
|
+
"""
|
|
6
|
+
Return whether *sig* is a potentially valid signature
|
|
7
|
+
specification (for user-facing APIs).
|
|
8
|
+
"""
|
|
9
|
+
return isinstance(sig, (str, tuple, typing.Signature))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _parse_signature_string(signature_str):
|
|
13
|
+
"""
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
signature_str : str
|
|
17
|
+
"""
|
|
18
|
+
# Just eval signature_str using the types submodules as globals
|
|
19
|
+
return eval(signature_str, {}, types.__dict__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def normalize_signature(sig):
|
|
23
|
+
"""
|
|
24
|
+
From *sig* (a signature specification), return a ``(args, return_type)``
|
|
25
|
+
tuple, where ``args`` itself is a tuple of types, and ``return_type``
|
|
26
|
+
can be None if not specified.
|
|
27
|
+
"""
|
|
28
|
+
if isinstance(sig, str):
|
|
29
|
+
parsed = _parse_signature_string(sig)
|
|
30
|
+
else:
|
|
31
|
+
parsed = sig
|
|
32
|
+
if isinstance(parsed, tuple):
|
|
33
|
+
args, return_type = parsed, None
|
|
34
|
+
elif isinstance(parsed, typing.Signature):
|
|
35
|
+
args, return_type = parsed.args, parsed.return_type
|
|
36
|
+
else:
|
|
37
|
+
raise TypeError(
|
|
38
|
+
"invalid signature: %r (type: %r) evaluates to %r "
|
|
39
|
+
"instead of tuple or Signature"
|
|
40
|
+
% (sig, sig.__class__.__name__, parsed.__class__.__name__)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def check_type(ty):
|
|
44
|
+
if not isinstance(ty, types.Type):
|
|
45
|
+
raise TypeError(
|
|
46
|
+
"invalid type in signature: expected a type "
|
|
47
|
+
"instance, got %r" % (ty,)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if return_type is not None:
|
|
51
|
+
check_type(return_type)
|
|
52
|
+
for ty in args:
|
|
53
|
+
check_type(ty)
|
|
54
|
+
|
|
55
|
+
return args, return_type
|
|
@@ -148,7 +148,6 @@ def get_nvrtc_dso_path():
|
|
|
148
148
|
# Check for each version of the NVRTC DLL, preferring the most
|
|
149
149
|
# recent.
|
|
150
150
|
versions = (
|
|
151
|
-
"112" if IS_WIN32 else "11.2",
|
|
152
151
|
"120" if IS_WIN32 else "12",
|
|
153
152
|
"130" if IS_WIN32 else "13",
|
|
154
153
|
)
|
|
@@ -303,16 +302,16 @@ def get_nvidia_nvvm_ctk():
|
|
|
303
302
|
|
|
304
303
|
# Assume the existence of NVVM in the conda env implies that a CUDA toolkit
|
|
305
304
|
# conda package is installed.
|
|
305
|
+
if IS_WIN32:
|
|
306
|
+
# The path used on Windows
|
|
307
|
+
libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
|
|
308
|
+
else:
|
|
309
|
+
# The path used on Linux is different to that on Windows
|
|
310
|
+
libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
|
|
306
311
|
|
|
307
|
-
# First, try the location used on Linux and the Windows 11.x packages
|
|
308
|
-
libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
|
|
309
312
|
if not os.path.exists(libdir) or not os.path.isdir(libdir):
|
|
310
|
-
# If
|
|
311
|
-
|
|
312
|
-
if not os.path.exists(libdir) or not os.path.isdir(libdir):
|
|
313
|
-
# If that doesn't exist either, assume we don't have the NVIDIA
|
|
314
|
-
# conda package
|
|
315
|
-
return
|
|
313
|
+
# If the path doesn't exist, we didn't find the NVIDIA conda package
|
|
314
|
+
return
|
|
316
315
|
|
|
317
316
|
paths = find_lib("nvvm", libdir=libdir)
|
|
318
317
|
if not paths:
|
|
@@ -346,15 +345,8 @@ def get_nvidia_static_cudalib_ctk():
|
|
|
346
345
|
if not nvvm_ctk:
|
|
347
346
|
return
|
|
348
347
|
|
|
349
|
-
if IS_WIN32 and ("Library" not in nvvm_ctk):
|
|
350
|
-
# Location specific to CUDA 11.x packages on Windows
|
|
351
|
-
dirs = ("Lib", "x64")
|
|
352
|
-
else:
|
|
353
|
-
# Linux, or Windows with CUDA 12.x packages
|
|
354
|
-
dirs = ("lib",)
|
|
355
|
-
|
|
356
348
|
env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
|
|
357
|
-
return os.path.join(env_dir,
|
|
349
|
+
return os.path.join(env_dir, "lib")
|
|
358
350
|
|
|
359
351
|
|
|
360
352
|
def get_cuda_home(*subdirs):
|
|
@@ -54,12 +54,6 @@ from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
|
|
|
54
54
|
from numba.cuda.utils import cached_file_read
|
|
55
55
|
from numba.cuda.cudadrv import enums, drvapi, nvrtc
|
|
56
56
|
|
|
57
|
-
try:
|
|
58
|
-
from pynvjitlink.api import NvJitLinker, NvJitLinkError
|
|
59
|
-
except ImportError:
|
|
60
|
-
NvJitLinker, NvJitLinkError = None, None
|
|
61
|
-
|
|
62
|
-
|
|
63
57
|
USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
|
|
64
58
|
|
|
65
59
|
if USE_NV_BINDING:
|
|
@@ -640,7 +634,7 @@ class Device(object):
|
|
|
640
634
|
|
|
641
635
|
if USE_NV_BINDING:
|
|
642
636
|
buf = driver.cuDeviceGetName(bufsz, self.id)
|
|
643
|
-
name = buf.
|
|
637
|
+
name = buf.split(b"\x00")[0]
|
|
644
638
|
else:
|
|
645
639
|
buf = (c_char * bufsz)()
|
|
646
640
|
driver.cuDeviceGetName(buf, bufsz, self.id)
|
|
@@ -2808,19 +2802,10 @@ class _LinkerBase(metaclass=ABCMeta):
|
|
|
2808
2802
|
lto=None,
|
|
2809
2803
|
additional_flags=None,
|
|
2810
2804
|
):
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
|
2814
|
-
linker = MVCLinker
|
|
2815
|
-
elif USE_NV_BINDING:
|
|
2816
|
-
linker = _Linker
|
|
2817
|
-
else:
|
|
2818
|
-
linker = CtypesLinker
|
|
2805
|
+
if USE_NV_BINDING:
|
|
2806
|
+
linker = _Linker
|
|
2819
2807
|
else:
|
|
2820
|
-
|
|
2821
|
-
linker = _Linker
|
|
2822
|
-
else:
|
|
2823
|
-
linker = CtypesLinker
|
|
2808
|
+
linker = CtypesLinker
|
|
2824
2809
|
|
|
2825
2810
|
params = (max_registers, lineinfo, cc)
|
|
2826
2811
|
if linker is _Linker:
|
|
@@ -2,8 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
CUDA Toolkit libraries can be available via either:
|
|
4
4
|
|
|
5
|
-
- the `cuda-nvcc` and `cuda-nvrtc` conda packages
|
|
6
|
-
- the `cudatoolkit` conda package for CUDA 11,
|
|
5
|
+
- the `cuda-nvcc` and `cuda-nvrtc` conda packages,
|
|
7
6
|
- a user supplied location from CUDA_HOME,
|
|
8
7
|
- a system wide location,
|
|
9
8
|
- package-specific locations (e.g. the Debian NVIDIA packages),
|
|
@@ -29,6 +29,7 @@ nvrtc_program = c_void_p
|
|
|
29
29
|
nvrtc_result = c_int
|
|
30
30
|
|
|
31
31
|
if config.CUDA_USE_NVIDIA_BINDING:
|
|
32
|
+
from cuda.bindings import nvrtc as bindings_nvrtc
|
|
32
33
|
from cuda.core.experimental import Program, ProgramOptions
|
|
33
34
|
|
|
34
35
|
|
|
@@ -142,6 +143,10 @@ class NVRTC:
|
|
|
142
143
|
|
|
143
144
|
def __new__(cls):
|
|
144
145
|
with _nvrtc_lock:
|
|
146
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
|
147
|
+
raise RuntimeError(
|
|
148
|
+
"NVRTC objects should not be used with cuda-python bindings"
|
|
149
|
+
)
|
|
145
150
|
if cls.__INSTANCE is None:
|
|
146
151
|
from numba.cuda.cudadrv.libs import open_cudalib
|
|
147
152
|
|
|
@@ -154,16 +159,9 @@ class NVRTC:
|
|
|
154
159
|
|
|
155
160
|
# Find & populate functions
|
|
156
161
|
for name, proto in inst._PROTOTYPES.items():
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
func.argtypes = proto[1:]
|
|
161
|
-
except AttributeError:
|
|
162
|
-
if "LTOIR" in name:
|
|
163
|
-
# CUDA 11 does not have LTOIR functions; ignore
|
|
164
|
-
continue
|
|
165
|
-
else:
|
|
166
|
-
raise
|
|
162
|
+
func = getattr(lib, name)
|
|
163
|
+
func.restype = proto[0]
|
|
164
|
+
func.argtypes = proto[1:]
|
|
167
165
|
|
|
168
166
|
@functools.wraps(func)
|
|
169
167
|
def checked_call(*args, func=func, name=name):
|
|
@@ -303,32 +301,35 @@ def compile(src, name, cc, ltoir=False):
|
|
|
303
301
|
:return: The compiled PTX and compilation log
|
|
304
302
|
:rtype: tuple
|
|
305
303
|
"""
|
|
306
|
-
nvrtc = NVRTC()
|
|
307
|
-
program = nvrtc.create_program(src, name)
|
|
308
304
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
raise RuntimeError(
|
|
313
|
-
"Unsupported CUDA version. CUDA 11.2 or higher is required."
|
|
314
|
-
)
|
|
315
|
-
else:
|
|
316
|
-
supported_arch = nvrtc.get_supported_archs()
|
|
317
|
-
try:
|
|
318
|
-
found = max(filter(lambda v: v <= cc, [v for v in supported_arch]))
|
|
319
|
-
except ValueError:
|
|
305
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
|
306
|
+
retcode, *version = bindings_nvrtc.nvrtcVersion()
|
|
307
|
+
if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
|
|
320
308
|
raise RuntimeError(
|
|
321
|
-
f"
|
|
322
|
-
f"minimum supported by NVRTC {ver_str(version)}. Supported "
|
|
323
|
-
"compute capabilities are "
|
|
324
|
-
f"{', '.join([ver_str(v) for v in supported_arch])}."
|
|
309
|
+
f"{retcode.name} when calling nvrtcGetSupportedArchs()"
|
|
325
310
|
)
|
|
311
|
+
version = tuple(version)
|
|
312
|
+
else:
|
|
313
|
+
nvrtc = NVRTC()
|
|
314
|
+
version = nvrtc.get_version()
|
|
326
315
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
316
|
+
ver_str = lambda version: ".".join(str(v) for v in version)
|
|
317
|
+
supported_ccs = get_supported_ccs()
|
|
318
|
+
try:
|
|
319
|
+
found = max(filter(lambda v: v <= cc, [v for v in supported_ccs]))
|
|
320
|
+
except ValueError:
|
|
321
|
+
raise RuntimeError(
|
|
322
|
+
f"Device compute capability {ver_str(cc)} is less than the "
|
|
323
|
+
f"minimum supported by NVRTC {ver_str(version)}. Supported "
|
|
324
|
+
"compute capabilities are "
|
|
325
|
+
f"{', '.join([ver_str(v) for v in supported_ccs])}."
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
if found != cc:
|
|
329
|
+
warnings.warn(
|
|
330
|
+
f"Device compute capability {ver_str(cc)} is not supported by "
|
|
331
|
+
f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
|
|
332
|
+
)
|
|
332
333
|
|
|
333
334
|
# Compilation options:
|
|
334
335
|
# - Compile for the current device's compute capability.
|
|
@@ -348,16 +349,10 @@ def compile(src, name, cc, ltoir=False):
|
|
|
348
349
|
f"{os.path.join(cuda_include_dir, 'cccl')}",
|
|
349
350
|
]
|
|
350
351
|
|
|
351
|
-
nvrtc_version = nvrtc.get_version()
|
|
352
|
-
nvrtc_ver_major = nvrtc_version[0]
|
|
353
|
-
|
|
354
352
|
cudadrv_path = os.path.dirname(os.path.abspath(__file__))
|
|
355
353
|
numba_cuda_path = os.path.dirname(cudadrv_path)
|
|
356
354
|
|
|
357
|
-
|
|
358
|
-
numba_include = f"{os.path.join(numba_cuda_path, 'include', '11')}"
|
|
359
|
-
else:
|
|
360
|
-
numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
|
|
355
|
+
numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
|
|
361
356
|
|
|
362
357
|
if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
|
|
363
358
|
extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
|
|
@@ -373,7 +368,6 @@ def compile(src, name, cc, ltoir=False):
|
|
|
373
368
|
arch=arch,
|
|
374
369
|
include_path=includes,
|
|
375
370
|
relocatable_device_code=True,
|
|
376
|
-
std="c++17" if nvrtc_version < (12, 0) else None,
|
|
377
371
|
link_time_optimization=ltoir,
|
|
378
372
|
name=name,
|
|
379
373
|
)
|
|
@@ -399,6 +393,7 @@ def compile(src, name, cc, ltoir=False):
|
|
|
399
393
|
return result, log
|
|
400
394
|
|
|
401
395
|
else:
|
|
396
|
+
program = nvrtc.create_program(src, name)
|
|
402
397
|
includes = [f"-I{path}" for path in includes]
|
|
403
398
|
options = [
|
|
404
399
|
arch,
|
|
@@ -410,9 +405,6 @@ def compile(src, name, cc, ltoir=False):
|
|
|
410
405
|
if ltoir:
|
|
411
406
|
options.append("-dlto")
|
|
412
407
|
|
|
413
|
-
if nvrtc_version < (12, 0):
|
|
414
|
-
options.append("-std=c++17")
|
|
415
|
-
|
|
416
408
|
# Compile the program
|
|
417
409
|
compile_error = nvrtc.compile_program(program, options)
|
|
418
410
|
|
|
@@ -482,4 +474,12 @@ def get_lowest_supported_cc():
|
|
|
482
474
|
|
|
483
475
|
|
|
484
476
|
def get_supported_ccs():
|
|
485
|
-
|
|
477
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
|
478
|
+
retcode, archs = bindings_nvrtc.nvrtcGetSupportedArchs()
|
|
479
|
+
if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
|
|
480
|
+
raise RuntimeError(
|
|
481
|
+
f"{retcode.name} when calling nvrtcGetSupportedArchs()"
|
|
482
|
+
)
|
|
483
|
+
return [(arch // 10, arch % 10) for arch in archs]
|
|
484
|
+
else:
|
|
485
|
+
return NVRTC().get_supported_archs()
|
|
@@ -47,14 +47,7 @@ NVVM_ERROR_COMPILATION
|
|
|
47
47
|
for i, k in enumerate(RESULT_CODE_NAMES):
|
|
48
48
|
setattr(sys.modules[__name__], k, i)
|
|
49
49
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
_datalayout_original = (
|
|
53
|
-
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
|
|
54
|
-
"i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
|
|
55
|
-
"v64:64:64-v128:128:128-n16:32:64"
|
|
56
|
-
)
|
|
57
|
-
_datalayout_i128 = (
|
|
50
|
+
_datalayout = (
|
|
58
51
|
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
|
|
59
52
|
"i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
|
|
60
53
|
"v64:64:64-v128:128:128-n16:32:64"
|
|
@@ -182,10 +175,7 @@ class NVVM(object):
|
|
|
182
175
|
|
|
183
176
|
@property
|
|
184
177
|
def data_layout(self):
|
|
185
|
-
|
|
186
|
-
return _datalayout_original
|
|
187
|
-
else:
|
|
188
|
-
return _datalayout_i128
|
|
178
|
+
return _datalayout
|
|
189
179
|
|
|
190
180
|
def get_version(self):
|
|
191
181
|
major = c_int()
|
|
@@ -346,14 +336,9 @@ class CompilationUnit(object):
|
|
|
346
336
|
|
|
347
337
|
|
|
348
338
|
MISSING_LIBDEVICE_FILE_MSG = """Missing libdevice file.
|
|
349
|
-
|
|
350
|
-
For CUDA 12, ``cuda-nvcc`` and ``cuda-nvrtc`` are required:
|
|
339
|
+
``cuda-nvcc`` and ``cuda-nvrtc`` are required:
|
|
351
340
|
|
|
352
341
|
$ conda install -c conda-forge cuda-nvcc cuda-nvrtc "cuda-version>=12.0"
|
|
353
|
-
|
|
354
|
-
For CUDA 11, ``cudatoolkit`` is required:
|
|
355
|
-
|
|
356
|
-
$ conda install -c conda-forge cudatoolkit "cuda-version>=11.2,<12.0"
|
|
357
342
|
"""
|
|
358
343
|
|
|
359
344
|
|
|
@@ -5,12 +5,23 @@ The toolkit version can now be obtained from NVRTC, so we don't use a binding
|
|
|
5
5
|
to the runtime anymore. This file is provided to maintain the existing API.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
from numba import config
|
|
8
9
|
from numba.cuda.cudadrv.nvrtc import NVRTC
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class Runtime:
|
|
12
13
|
def get_version(self):
|
|
13
|
-
|
|
14
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
|
15
|
+
from cuda.bindings import nvrtc
|
|
16
|
+
|
|
17
|
+
retcode, *version = nvrtc.nvrtcVersion()
|
|
18
|
+
if retcode != nvrtc.nvrtcResult.NVRTC_SUCCESS:
|
|
19
|
+
raise RuntimeError(
|
|
20
|
+
f"{retcode.name} when calling nvrtcGetVersion()"
|
|
21
|
+
)
|
|
22
|
+
return tuple(version)
|
|
23
|
+
else:
|
|
24
|
+
return NVRTC().get_version()
|
|
14
25
|
|
|
15
26
|
|
|
16
27
|
runtime = Runtime()
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from warnings import warn
|
|
2
|
-
from numba.core import types, config
|
|
2
|
+
from numba.core import types, config
|
|
3
3
|
from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
|
|
4
4
|
from numba.cuda.compiler import declare_device_function
|
|
5
|
+
from numba.cuda.core import sigutils
|
|
5
6
|
from numba.cuda.dispatcher import CUDADispatcher
|
|
6
7
|
from numba.cuda.simulator.kernel import FakeCUDAKernel
|
|
7
8
|
from numba.cuda.cudadrv.driver import _have_nvjitlink
|
|
@@ -86,7 +87,7 @@ def jit(
|
|
|
86
87
|
number of threads per block.
|
|
87
88
|
:type launch_bounds: int | tuple[int]
|
|
88
89
|
:param lto: Whether to enable LTO. If unspecified, LTO is enabled by
|
|
89
|
-
default when
|
|
90
|
+
default when nvjitlink is available, except for kernels where
|
|
90
91
|
``debug=True``.
|
|
91
92
|
:type lto: bool
|
|
92
93
|
"""
|
|
@@ -143,7 +144,7 @@ def jit(
|
|
|
143
144
|
raise ValueError("link keyword invalid for device function")
|
|
144
145
|
|
|
145
146
|
if lto is None:
|
|
146
|
-
# Default to using LTO if
|
|
147
|
+
# Default to using LTO if nvjitlink is available and we're not debugging
|
|
147
148
|
lto = _have_nvjitlink() and not debug
|
|
148
149
|
else:
|
|
149
150
|
if lto and not _have_nvjitlink():
|
|
@@ -11,8 +11,9 @@ from functools import reduce
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
|
|
13
13
|
from numba.np.ufunc.ufuncbuilder import _BaseUFuncBuilder, parse_identity
|
|
14
|
-
from numba.core import types
|
|
14
|
+
from numba.core import types
|
|
15
15
|
from numba.core.typing import signature
|
|
16
|
+
from numba.cuda.core import sigutils
|
|
16
17
|
from numba.np.ufunc.sigparse import parse_signature
|
|
17
18
|
|
|
18
19
|
|
|
@@ -8,13 +8,13 @@ import types as pytypes
|
|
|
8
8
|
import weakref
|
|
9
9
|
import uuid
|
|
10
10
|
|
|
11
|
-
from numba.core import compiler,
|
|
11
|
+
from numba.core import compiler, types, typing, config
|
|
12
12
|
from numba.cuda import serialize, utils
|
|
13
13
|
from numba.cuda.core.caching import Cache, CacheImpl, NullCache
|
|
14
14
|
from numba.core.compiler_lock import global_compiler_lock
|
|
15
15
|
from numba.core.dispatcher import _DispatcherBase
|
|
16
16
|
from numba.core.errors import NumbaPerformanceWarning, TypingError
|
|
17
|
-
from numba.
|
|
17
|
+
from numba.cuda.typing.templates import fold_arguments
|
|
18
18
|
from numba.core.typing.typeof import Purpose, typeof
|
|
19
19
|
from numba.cuda.api import get_current_device
|
|
20
20
|
from numba.cuda.args import wrap_arg
|
|
@@ -23,6 +23,7 @@ from numba.cuda.compiler import (
|
|
|
23
23
|
CUDACompiler,
|
|
24
24
|
kernel_fixup,
|
|
25
25
|
)
|
|
26
|
+
from numba.cuda.core import sigutils
|
|
26
27
|
import re
|
|
27
28
|
from numba.cuda.cudadrv import driver, nvvm
|
|
28
29
|
from numba.cuda.cudadrv.linkable_code import LinkableCode
|
|
@@ -18,7 +18,7 @@ def make_attribute_wrapper(typeclass, struct_attr, python_attr):
|
|
|
18
18
|
Vendored from numba.core.extending with a change to consider the CUDA data
|
|
19
19
|
model manager.
|
|
20
20
|
"""
|
|
21
|
-
from numba.
|
|
21
|
+
from numba.cuda.typing.templates import AttributeTemplate
|
|
22
22
|
|
|
23
23
|
from numba.core.datamodel import default_manager
|
|
24
24
|
from numba.core.datamodel.models import StructModel
|