PyPI - numba-cuda - Versions diffs - 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

numba-cuda 0.15.1py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +51 -16
numba_cuda/numba/cuda/codegen.py +11 -9
numba_cuda/numba/cuda/compiler.py +3 -39
numba_cuda/numba/cuda/cuda_paths.py +20 -22
numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
numba_cuda/numba/cuda/cudadrv/error.py +4 -0
numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
numba_cuda/numba/cuda/decorators.py +18 -0
numba_cuda/numba/cuda/dispatcher.py +1 -0
numba_cuda/numba/cuda/flags.py +36 -0
numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
numba_cuda/numba/cuda/target.py +55 -2
numba_cuda/numba/cuda/testing.py +0 -22
numba_cuda/numba/cuda/tests/__init__.py +0 -2
numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/cudadrv/nvrtc.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
 from enum import IntEnum
 from numba.cuda.cudadrv.error import (
+    CCSupportError,
     NvrtcError,
     NvrtcBuiltinOperationFailure,
     NvrtcCompilationError,
@@ -27,6 +28,9 @@ nvrtc_program = c_void_p
 # Result code
 nvrtc_result = c_int
+if config.CUDA_USE_NVIDIA_BINDING:
+    from cuda.core.experimental import Program, ProgramOptions
 class NvrtcResult(IntEnum):
     NVRTC_SUCCESS = 0
@@ -76,20 +80,6 @@ class NVRTC:
     (for Numba) open_cudalib function to load the NVRTC library.
     """
-    _CU11_2ONLY_PROTOTYPES = {
-        # nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs);
-        "nvrtcGetNumSupportedArchs": (nvrtc_result, POINTER(c_int)),
-        # nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs);
-        "nvrtcGetSupportedArchs": (nvrtc_result, POINTER(c_int)),
-    }
-    _CU12ONLY_PROTOTYPES = {
-        # nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet);
-        "nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
-        # nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto);
-        "nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p),
-    }
     _PROTOTYPES = {
         # nvrtcResult nvrtcVersion(int *major, int *minor)
         "nvrtcVersion": (nvrtc_result, POINTER(c_int), POINTER(c_int)),
@@ -137,6 +127,14 @@ class NVRTC:
         ),
         # nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
         "nvrtcGetProgramLog": (nvrtc_result, nvrtc_program, c_char_p),
+        # nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs);
+        "nvrtcGetNumSupportedArchs": (nvrtc_result, POINTER(c_int)),
+        # nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs);
+        "nvrtcGetSupportedArchs": (nvrtc_result, POINTER(c_int)),
+        # nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet);
+        "nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
+        # nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto);
+        "nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p),
     }
     # Singleton reference
@@ -154,18 +152,18 @@ class NVRTC:
                     cls.__INSTANCE = None
                     raise NvrtcSupportError("NVRTC cannot be loaded") from e
-                from numba.cuda.cudadrv.runtime import get_version
-                if get_version() >= (11, 2):
-                    inst._PROTOTYPES |= inst._CU11_2ONLY_PROTOTYPES
-                if get_version() >= (12, 0):
-                    inst._PROTOTYPES |= inst._CU12ONLY_PROTOTYPES
                 # Find & populate functions
                 for name, proto in inst._PROTOTYPES.items():
-                    func = getattr(lib, name)
-                    func.restype = proto[0]
-                    func.argtypes = proto[1:]
+                    try:
+                        func = getattr(lib, name)
+                        func.restype = proto[0]
+                        func.argtypes = proto[1:]
+                    except AttributeError:
+                        if "LTOIR" in name:
+                            # CUDA 11 does not have LTOIR functions; ignore
+                            continue
+                        else:
+                            raise
                     @functools.wraps(func)
                     def checked_call(*args, func=func, name=name):
@@ -192,52 +190,16 @@ class NVRTC:
         return cls.__INSTANCE
+    @functools.cache
     def get_supported_archs(self):
         """
         Get Supported Architectures by NVRTC as list of arch tuples.
         """
-        ver = self.get_version()
-        if ver < (11, 0):
-            raise RuntimeError(
-                "Unsupported CUDA version. CUDA 11.0 or higher is required."
-            )
-        elif ver == (11, 0):
-            return [
-                (3, 0),
-                (3, 2),
-                (3, 5),
-                (3, 7),
-                (5, 0),
-                (5, 2),
-                (5, 3),
-                (6, 0),
-                (6, 1),
-                (6, 2),
-                (7, 0),
-                (7, 2),
-                (7, 5),
-            ]
-        elif ver == (11, 1):
-            return [
-                (3, 5),
-                (3, 7),
-                (5, 0),
-                (5, 2),
-                (5, 3),
-                (6, 0),
-                (6, 1),
-                (6, 2),
-                (7, 0),
-                (7, 2),
-                (7, 5),
-                (8, 0),
-            ]
-        else:
-            num = c_int()
-            self.nvrtcGetNumSupportedArchs(byref(num))
-            archs = (c_int * num.value)()
-            self.nvrtcGetSupportedArchs(archs)
-            return [(archs[i] // 10, archs[i] % 10) for i in range(num.value)]
+        num = c_int()
+        self.nvrtcGetNumSupportedArchs(byref(num))
+        archs = (c_int * num.value)()
+        self.nvrtcGetSupportedArchs(archs)
+        return [(archs[i] // 10, archs[i] % 10) for i in range(num.value)]
     def get_version(self):
         """
@@ -346,9 +308,9 @@ def compile(src, name, cc, ltoir=False):
     version = nvrtc.get_version()
     ver_str = lambda v: ".".join(v)
-    if version < (11, 0):
+    if version < (11, 2):
         raise RuntimeError(
-            "Unsupported CUDA version. CUDA 11.0 or higher is required."
+            "Unsupported CUDA version. CUDA 11.2 or higher is required."
         )
     else:
         supported_arch = nvrtc.get_supported_archs()
@@ -374,10 +336,16 @@ def compile(src, name, cc, ltoir=False):
     # - Relocatable Device Code (rdc) is needed to prevent device functions
     #   being optimized away.
     major, minor = found
-    arch = f"--gpu-architecture=compute_{major}{minor}"
-    cuda_include = [
-        f"-I{get_cuda_paths()['include_dir'].info}",
+    if config.CUDA_USE_NVIDIA_BINDING:
+        arch = f"sm_{major}{minor}"
+    else:
+        arch = f"--gpu-architecture=compute_{major}{minor}"
+    cuda_include_dir = get_cuda_paths()["include_dir"].info
+    cuda_includes = [
+        f"{cuda_include_dir}",
+        f"{os.path.join(cuda_include_dir, 'cccl')}",
     ]
     nvrtc_version = nvrtc.get_version()
@@ -387,54 +355,131 @@ def compile(src, name, cc, ltoir=False):
     numba_cuda_path = os.path.dirname(cudadrv_path)
     if nvrtc_ver_major == 11:
-        numba_include = f"-I{os.path.join(numba_cuda_path, 'include', '11')}"
+        numba_include = f"{os.path.join(numba_cuda_path, 'include', '11')}"
     else:
-        numba_include = f"-I{os.path.join(numba_cuda_path, 'include', '12')}"
+        numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
     if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
-        extra_search_paths = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
-        extra_includes = [f"-I{p}" for p in extra_search_paths]
+        extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
     else:
         extra_includes = []
-    nrt_path = os.path.join(numba_cuda_path, "memory_management")
-    nrt_include = f"-I{nrt_path}"
-    options = [
-        arch,
-        numba_include,
-        *cuda_include,
-        nrt_include,
-        *extra_includes,
-        "-rdc",
-        "true",
-    ]
+    nrt_include = os.path.join(numba_cuda_path, "memory_management")
+    includes = [numba_include, *cuda_includes, nrt_include, *extra_includes]
+    if config.CUDA_USE_NVIDIA_BINDING:
+        options = ProgramOptions(
+            arch=arch,
+            include_path=includes,
+            relocatable_device_code=True,
+            std="c++17" if nvrtc_version < (12, 0) else None,
+            link_time_optimization=ltoir,
+            name=name,
+        )
-    if ltoir:
-        options.append("-dlto")
+        class Logger:
+            def __init__(self):
+                self.log = []
-    if nvrtc_version < (12, 0):
-        options += ["-std=c++17"]
+            def write(self, msg):
+                self.log.append(msg)
-    # Compile the program
-    compile_error = nvrtc.compile_program(program, options)
+        logger = Logger()
+        if isinstance(src, bytes):
+            src = src.decode("utf8")
-    # Get log from compilation
-    log = nvrtc.get_compile_log(program)
+        prog = Program(src, "c++", options=options)
+        result = prog.compile("ltoir" if ltoir else "ptx", logs=logger)
+        log = ""
+        if logger.log:
+            log = logger.log
+            joined_logs = "\n".join(log)
+            warnings.warn(f"NVRTC log messages: {joined_logs}")
+        return result, log
-    # If the compile failed, provide the log in an exception
-    if compile_error:
-        msg = f"NVRTC Compilation failure whilst compiling {name}:\n\n{log}"
-        raise NvrtcError(msg)
+    else:
+        includes = [f"-I{path}" for path in includes]
+        options = [
+            arch,
+            *includes,
+            "-rdc",
+            "true",
+        ]
+        if ltoir:
+            options.append("-dlto")
+        if nvrtc_version < (12, 0):
+            options.append("-std=c++17")
+        # Compile the program
+        compile_error = nvrtc.compile_program(program, options)
+        # Get log from compilation
+        log = nvrtc.get_compile_log(program)
+        # If the compile failed, provide the log in an exception
+        if compile_error:
+            msg = f"NVRTC Compilation failure whilst compiling {name}:\n\n{log}"
+            raise NvrtcError(msg)
+        # Otherwise, if there's any content in the log, present it as a warning
+        if log:
+            msg = f"NVRTC log messages whilst compiling {name}:\n\n{log}"
+            warnings.warn(msg)
+        if ltoir:
+            ltoir = nvrtc.get_lto(program)
+            return ltoir, log
+        else:
+            ptx = nvrtc.get_ptx(program)
+            return ptx, log
-    # Otherwise, if there's any content in the log, present it as a warning
-    if log:
-        msg = f"NVRTC log messages whilst compiling {name}:\n\n{log}"
-        warnings.warn(msg)
-    if ltoir:
-        ltoir = nvrtc.get_lto(program)
-        return ltoir, log
+def find_closest_arch(mycc):
+    """
+    Given a compute capability, return the closest compute capability supported
+    by the CUDA toolkit.
+    :param mycc: Compute capability as a tuple ``(MAJOR, MINOR)``
+    :return: Closest supported CC as a tuple ``(MAJOR, MINOR)``
+    """
+    supported_ccs = get_supported_ccs()
+    for i, cc in enumerate(supported_ccs):
+        if cc == mycc:
+            # Matches
+            return cc
+        elif cc > mycc:
+            # Exceeded
+            if i == 0:
+                # CC lower than supported
+                msg = (
+                    "GPU compute capability %d.%d is not supported"
+                    "(requires >=%d.%d)" % (mycc + cc)
+                )
+                raise CCSupportError(msg)
+            else:
+                # return the previous CC
+                return supported_ccs[i - 1]
+    # CC higher than supported
+    return supported_ccs[-1]  # Choose the highest
+def get_arch_option(major, minor):
+    """Matches with the closest architecture option"""
+    if config.FORCE_CUDA_CC:
+        arch = config.FORCE_CUDA_CC
     else:
-        ptx = nvrtc.get_ptx(program)
-        return ptx, log
+        arch = find_closest_arch((major, minor))
+    return "compute_%d%d" % arch
+def get_lowest_supported_cc():
+    return min(get_supported_ccs())
+def get_supported_ccs():
+    return NVRTC().get_supported_archs()

numba_cuda/numba/cuda/cudadrv/nvvm.py CHANGED Viewed

@@ -14,7 +14,7 @@ from llvmlite import ir
 from .error import NvvmError, NvvmSupportError, NvvmWarning
 from .libs import get_libdevice, open_libdevice, open_cudalib
-from numba.core import cgutils, config
+from numba.core import cgutils
 logger = logging.getLogger(__name__)
@@ -179,7 +179,6 @@ class NVVM(object):
         self._minorIR = ir_versions[1]
         self._majorDbg = ir_versions[2]
         self._minorDbg = ir_versions[3]
-        self._supported_ccs = get_supported_ccs()
     @property
     def data_layout(self):
@@ -188,10 +187,6 @@ class NVVM(object):
         else:
             return _datalayout_i128
-    @property
-    def supported_ccs(self):
-        return self._supported_ccs
     def get_version(self):
         major = c_int()
         minor = c_int()
@@ -350,197 +345,6 @@ class CompilationUnit(object):
         return ""
-COMPUTE_CAPABILITIES = (
-    (3, 5),
-    (3, 7),
-    (5, 0),
-    (5, 2),
-    (5, 3),
-    (6, 0),
-    (6, 1),
-    (6, 2),
-    (7, 0),
-    (7, 2),
-    (7, 5),
-    (8, 0),
-    (8, 6),
-    (8, 7),
-    (8, 9),
-    (9, 0),
-    (10, 0),
-    (10, 1),
-    (10, 3),
-    (12, 0),
-    (12, 1),
-)
-# Maps CTK version -> (min supported cc, max supported cc) ranges, bounds inclusive
-_CUDA_CC_MIN_MAX_SUPPORT = {
-    (11, 2): [
-        ((3, 5), (8, 6)),
-    ],
-    (11, 3): [
-        ((3, 5), (8, 6)),
-    ],
-    (11, 4): [
-        ((3, 5), (8, 7)),
-    ],
-    (11, 5): [
-        ((3, 5), (8, 7)),
-    ],
-    (11, 6): [
-        ((3, 5), (8, 7)),
-    ],
-    (11, 7): [
-        ((3, 5), (8, 7)),
-    ],
-    (11, 8): [
-        ((3, 5), (9, 0)),
-    ],
-    (12, 0): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 1): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 2): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 3): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 4): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 5): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 6): [
-        ((5, 0), (9, 0)),
-    ],
-    (12, 8): [
-        ((5, 0), (10, 1)),
-        ((12, 0), (12, 0)),
-    ],
-    (12, 9): [
-        ((5, 0), (12, 1)),
-    ],
-}
-# From CUDA 12.9 Release notes, Section 1.5.4, "Deprecated Architectures"
-# https://docs.nvidia.com/cuda/archive/12.9.0/cuda-toolkit-release-notes/index.html#deprecated-architectures
-#
-#   "Maxwell, Pascal, and Volta architectures are now feature-complete with no
-#   further enhancements planned. While CUDA Toolkit 12.x series will continue
-#   to support building applications for these architectures, offline
-#   compilation and library support will be removed in the next major CUDA
-#   Toolkit version release. Users should plan migration to newer
-#   architectures, as future toolkits will be unable to target Maxwell, Pascal,
-#   and Volta GPUs."
-#
-# In order to maintain compatibility with future toolkits, we use Turing (7.5)
-# as the default CC if it is not otherwise specified.
-LOWEST_CURRENT_CC = (7, 5)
-def ccs_supported_by_ctk(ctk_version):
-    try:
-        # For supported versions, we look up the range of supported CCs
-        return tuple(
-            [
-                cc
-                for min_cc, max_cc in _CUDA_CC_MIN_MAX_SUPPORT[ctk_version]
-                for cc in COMPUTE_CAPABILITIES
-                if min_cc <= cc <= max_cc
-            ]
-        )
-    except KeyError:
-        # For unsupported CUDA toolkit versions, all we can do is assume all
-        # non-deprecated versions we are aware of are supported.
-        #
-        # If the user has specified a non-default CC that is greater than the
-        # lowest non-deprecated one, then we should assume that instead.
-        MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, LOWEST_CURRENT_CC)
-        return tuple([cc for cc in COMPUTE_CAPABILITIES if cc >= MIN_CC])
-def get_supported_ccs():
-    try:
-        from numba.cuda.cudadrv.runtime import runtime
-        cudart_version = runtime.get_version()
-    except:  # noqa: E722
-        # We can't support anything if there's an error getting the runtime
-        # version (e.g. if it's not present or there's another issue)
-        _supported_cc = ()
-        return _supported_cc
-    # Ensure the minimum CTK version requirement is met
-    min_cudart = min(_CUDA_CC_MIN_MAX_SUPPORT)
-    if cudart_version < min_cudart:
-        _supported_cc = ()
-        ctk_ver = f"{cudart_version[0]}.{cudart_version[1]}"
-        unsupported_ver = (
-            f"CUDA Toolkit {ctk_ver} is unsupported by Numba - "
-            f"{min_cudart[0]}.{min_cudart[1]} is the minimum "
-            "required version."
-        )
-        warnings.warn(unsupported_ver)
-        return _supported_cc
-    _supported_cc = ccs_supported_by_ctk(cudart_version)
-    return _supported_cc
-def find_closest_arch(mycc):
-    """
-    Given a compute capability, return the closest compute capability supported
-    by the CUDA toolkit.
-    :param mycc: Compute capability as a tuple ``(MAJOR, MINOR)``
-    :return: Closest supported CC as a tuple ``(MAJOR, MINOR)``
-    """
-    supported_ccs = NVVM().supported_ccs
-    if not supported_ccs:
-        msg = (
-            "No supported GPU compute capabilities found. "
-            "Please check your cudatoolkit version matches your CUDA version."
-        )
-        raise NvvmSupportError(msg)
-    for i, cc in enumerate(supported_ccs):
-        if cc == mycc:
-            # Matches
-            return cc
-        elif cc > mycc:
-            # Exceeded
-            if i == 0:
-                # CC lower than supported
-                msg = (
-                    "GPU compute capability %d.%d is not supported"
-                    "(requires >=%d.%d)" % (mycc + cc)
-                )
-                raise NvvmSupportError(msg)
-            else:
-                # return the previous CC
-                return supported_ccs[i - 1]
-    # CC higher than supported
-    return supported_ccs[-1]  # Choose the highest
-def get_arch_option(major, minor):
-    """Matches with the closest architecture option"""
-    if config.FORCE_CUDA_CC:
-        arch = config.FORCE_CUDA_CC
-    else:
-        arch = find_closest_arch((major, minor))
-    return "compute_%d%d" % arch
 MISSING_LIBDEVICE_FILE_MSG = """Missing libdevice file.
 Please ensure you have a CUDA Toolkit 11.2 or higher.
 For CUDA 12, ``cuda-nvcc`` and ``cuda-nvrtc`` are required:

numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

numba-cuda 0.15.1py3-none-any.whl → 0.16.0py3-none-any.whl