PyPI - numba-cuda - Versions diffs - 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl - Mend

numba-cuda 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/codegen.py +15 -3
numba_cuda/numba/cuda/cuda_paths.py +68 -0
numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -1
numba_cuda/numba/cuda/cudadrv/driver.py +209 -47
numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
numba_cuda/numba/cuda/cudadrv/libs.py +38 -0
numba_cuda/numba/cuda/cudadrv/linkable_code.py +63 -0
numba_cuda/numba/cuda/cudadrv/mappings.py +24 -0
numba_cuda/numba/cuda/cudadrv/nvrtc.py +9 -4
numba_cuda/numba/cuda/device_init.py +3 -0
numba_cuda/numba/cuda/dispatcher.py +48 -8
numba_cuda/numba/cuda/intrinsics.py +6 -1
numba_cuda/numba/cuda/runtime/nrt.cu +190 -0
numba_cuda/numba/cuda/simulator/api.py +14 -0
numba_cuda/numba/cuda/target.py +8 -2
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +199 -0
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +44 -4
numba_cuda/numba/cuda/tests/cudapy/test_print.py +2 -2
numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +48 -0
numba_cuda/numba/cuda/tests/nrt/__init__.py +8 -0
numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +42 -0
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +110 -0
numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +51 -0
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +170 -0
numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +19 -0
numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +3 -0
{numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/METADATA +1 -1
{numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/RECORD +32 -20
{numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/WHEEL +1 -1
{numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/LICENSE +0 -0
{numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/top_level.txt +0 -0

numba_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.17
1	+ 0.0.19

numba_cuda/numba/cuda/codegen.py CHANGED Viewed

@@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
     get_cufunc), which may be of different compute capabilities.
     """
-    def __init__(self, codegen, name, entry_name=None, max_registers=None,
-                 nvvm_options=None):
+    def __init__(
+        self,
+        codegen,
+        name,
+        entry_name=None,
+        max_registers=None,
+        lto=False,
+        nvvm_options=None
+    ):
         """
         codegen:
             Codegen object.
@@ -71,6 +78,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
             kernel and not a device function.
         max_registers:
             The maximum register usage to aim for when linking.
+        lto:
+            Whether to enable link-time optimization.
         nvvm_options:
                 Dict of options to pass to NVVM.
         """
@@ -103,6 +112,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
         self._cufunc_cache = {}
         self._max_registers = max_registers
+        self._lto = lto
         if nvvm_options is None:
             nvvm_options = {}
         self._nvvm_options = nvvm_options
@@ -178,7 +188,9 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
         if cubin:
             return cubin
-        linker = driver.Linker.new(max_registers=self._max_registers, cc=cc)
+        linker = driver.Linker.new(
+            max_registers=self._max_registers, cc=cc, lto=self._lto
+        )
         if linker.lto:
             ltoir = self.get_ltoir(cc=cc)

numba_cuda/numba/cuda/cuda_paths.py CHANGED Viewed

@@ -2,9 +2,11 @@ import sys
 import re
 import os
 from collections import namedtuple
+import platform
 from numba.core.config import IS_WIN32
 from numba.misc.findlib import find_lib, find_file
+from numba import config
 _env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info'])
@@ -241,6 +243,7 @@ def get_cuda_paths():
             'libdevice': _get_libdevice_paths(),
             'cudalib_dir': _get_cudalib_dir(),
             'static_cudalib_dir': _get_static_cudalib_dir(),
+            'include_dir': _get_include_dir(),
         }
         # Cache result
         get_cuda_paths._cached_result = d
@@ -256,3 +259,68 @@ def get_debian_pkg_libdevice():
     if not os.path.exists(pkg_libdevice_location):
         return None
     return pkg_libdevice_location
+def get_current_cuda_target_name():
+    """Determine conda's CTK target folder based on system and machine arch.
+    CTK's conda package delivers headers based on its architecture type. For example,
+    `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and
+    `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the
+    nuances at cudart's conda feedstock:
+    https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11  # noqa: E501
+    """
+    system = platform.system()
+    machine = platform.machine()
+    if system == "Linux":
+        arch_to_targets = {
+            'x86_64': 'x86_64-linux',
+            'aarch64': 'sbsa-linux'
+        }
+    elif system == "Windows":
+        arch_to_targets = {
+            'AMD64': 'x64',
+        }
+    else:
+        arch_to_targets = {}
+    return arch_to_targets.get(machine, None)
+def get_conda_include_dir():
+    """
+    Return the include directory in the current conda environment, if one
+    is active and it exists.
+    """
+    is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
+    if not is_conda_env:
+        return
+    if platform.system() == "Windows":
+        include_dir = os.path.join(
+            sys.prefix, 'Library', 'include'
+        )
+    elif target_name := get_current_cuda_target_name():
+        include_dir = os.path.join(
+            sys.prefix, 'targets', target_name, 'include'
+        )
+    else:
+        # A fallback when target cannot determined
+        # though usually it shouldn't.
+        include_dir = os.path.join(sys.prefix, 'include')
+    if os.path.exists(include_dir):
+        return include_dir
+    return
+def _get_include_dir():
+    """Find the root include directory."""
+    options = [
+        ('Conda environment (NVIDIA package)', get_conda_include_dir()),
+        ('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH),
+        # TODO: add others
+    ]
+    by, include_dir = _find_valid_path(options)
+    return _env_path_tuple(by, include_dir)

numba_cuda/numba/cuda/cudadrv/devicearray.py CHANGED Viewed

@@ -876,7 +876,10 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
             sentry_contiguous(obj)
             devobj = from_array_like(obj, stream=stream)
         if copy:
-            if config.CUDA_WARN_ON_IMPLICIT_COPY:
+            if (
+                config.CUDA_WARN_ON_IMPLICIT_COPY
+                and not config.DISABLE_PERFORMANCE_WARNINGS
+            ):
                 if (
                     not user_explicit and
                     (not isinstance(obj, DeviceNDArray)

numba_cuda/numba/cuda/cudadrv/driver.py CHANGED Viewed

@@ -10,7 +10,6 @@ subsequent deallocation could further corrupt the CUDA context and causes the
 system to freeze in some cases.
 """
 import sys
 import os
 import ctypes
@@ -19,6 +18,7 @@ import functools
 import warnings
 import logging
 import threading
+import traceback
 import asyncio
 import pathlib
 from itertools import product
@@ -35,6 +35,8 @@ from numba.core import utils, serialize, config
 from .error import CudaSupportError, CudaDriverError
 from .drvapi import API_PROTOTYPES
 from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
+from .mappings import FILE_EXTENSION_MAP
+from .linkable_code import LinkableCode
 from numba.cuda.cudadrv import enums, drvapi, nvrtc
 USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
@@ -56,6 +58,52 @@ _py_decref.argtypes = [ctypes.py_object]
 _py_incref.argtypes = [ctypes.py_object]
+def _readenv(name, ctor, default):
+    value = os.environ.get(name)
+    if value is None:
+        return default() if callable(default) else default
+    try:
+        if ctor is bool:
+            return value.lower() in {'1', "true"}
+        return ctor(value)
+    except Exception:
+        warnings.warn(
+            f"Environment variable '{name}' is defined but its associated "
+            f"value '{value}' could not be parsed.\n"
+            "The parse failed with exception:\n"
+            f"{traceback.format_exc()}",
+            RuntimeWarning
+        )
+        return default
+_MVC_ERROR_MESSAGE = (
+    "Minor version compatibility requires ptxcompiler and cubinlinker packages "
+    "to be available"
+)
+ENABLE_PYNVJITLINK = (
+    _readenv("NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False)
+    or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
+)
+if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
+    config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
+if ENABLE_PYNVJITLINK:
+    try:
+        from pynvjitlink.api import NvJitLinker, NvJitLinkError
+    except ImportError:
+        raise ImportError(
+            "Using pynvjitlink requires the pynvjitlink package to be available"
+        )
+    if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
+        raise ValueError(
+            "Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
+            "CUDA_ENABLE_PYNVJITLINK at the same time"
+        )
 def make_logger():
     logger = logging.getLogger(__name__)
     # is logging configured?
@@ -432,7 +480,7 @@ class Driver(object):
     def get_version(self):
         """
-        Returns the CUDA Runtime version as a tuple (major, minor).
+        Returns the CUDA Driver version as a tuple (major, minor).
         """
         if USE_NV_BINDING:
             version = driver.cuDriverGetVersion()
@@ -2546,38 +2594,47 @@ def launch_kernel(cufunc_handle,
                               extra)
-if USE_NV_BINDING:
-    jitty = binding.CUjitInputType
-    FILE_EXTENSION_MAP = {
-        'o': jitty.CU_JIT_INPUT_OBJECT,
-        'ptx': jitty.CU_JIT_INPUT_PTX,
-        'a': jitty.CU_JIT_INPUT_LIBRARY,
-        'lib': jitty.CU_JIT_INPUT_LIBRARY,
-        'cubin': jitty.CU_JIT_INPUT_CUBIN,
-        'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
-    }
-else:
-    FILE_EXTENSION_MAP = {
-        'o': enums.CU_JIT_INPUT_OBJECT,
-        'ptx': enums.CU_JIT_INPUT_PTX,
-        'a': enums.CU_JIT_INPUT_LIBRARY,
-        'lib': enums.CU_JIT_INPUT_LIBRARY,
-        'cubin': enums.CU_JIT_INPUT_CUBIN,
-        'fatbin': enums.CU_JIT_INPUT_FATBINARY,
-    }
 class Linker(metaclass=ABCMeta):
     """Abstract base class for linkers"""
     @classmethod
-    def new(cls, max_registers=0, lineinfo=False, cc=None):
-        if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
-            return MVCLinker(max_registers, lineinfo, cc)
-        elif USE_NV_BINDING:
-            return CudaPythonLinker(max_registers, lineinfo, cc)
+    def new(cls,
+            max_registers=0,
+            lineinfo=False,
+            cc=None,
+            lto=None,
+            additional_flags=None
+            ):
+        driver_ver = driver.get_version()
+        if (
+            config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
+            and driver_ver >= (12, 0)
+        ):
+            raise ValueError(
+                "Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC"
+            )
+        if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
+            raise ValueError(
+                "Enabling pynvjitlink requires CUDA 12."
+            )
+        if config.CUDA_ENABLE_PYNVJITLINK:
+            linker = PyNvJitLinker
+        elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
+            linker = MVCLinker
+        else:
+            if USE_NV_BINDING:
+                linker = CudaPythonLinker
+            else:
+                linker = CtypesLinker
+        if linker is PyNvJitLinker:
+            return linker(max_registers, lineinfo, cc, lto, additional_flags)
+        elif additional_flags or lto:
+            raise ValueError("LTO and additional flags require PyNvJitLinker")
         else:
-            return CtypesLinker(max_registers, lineinfo, cc)
+            return linker(max_registers, lineinfo, cc)
     @abstractmethod
     def __init__(self, max_registers, lineinfo, cc):
@@ -2626,19 +2683,42 @@ class Linker(metaclass=ABCMeta):
             cu = f.read()
         self.add_cu(cu, os.path.basename(path))
-    def add_file_guess_ext(self, path):
-        """Add a file to the link, guessing its type from its extension."""
-        ext = os.path.splitext(path)[1][1:]
-        if ext == '':
-            raise RuntimeError("Don't know how to link file with no extension")
-        elif ext == 'cu':
-            self.add_cu_file(path)
+    def add_file_guess_ext(self, path_or_code):
+        """
+        Add a file or LinkableCode object to the link. If a file is
+        passed, the type will be inferred from the extension. A LinkableCode
+        object represents a file already in memory.
+        """
+        if isinstance(path_or_code, str):
+            ext = pathlib.Path(path_or_code).suffix
+            if ext == '':
+                raise RuntimeError(
+                    "Don't know how to link file with no extension"
+                )
+            elif ext == '.cu':
+                self.add_cu_file(path_or_code)
+            else:
+                kind = FILE_EXTENSION_MAP.get(ext.lstrip('.'), None)
+                if kind is None:
+                    raise RuntimeError(
+                        "Don't know how to link file with extension "
+                        f"{ext}"
+                    )
+                self.add_file(path_or_code, kind)
+            return
         else:
-            kind = FILE_EXTENSION_MAP.get(ext, None)
-            if kind is None:
-                raise RuntimeError("Don't know how to link file with extension "
-                                   f".{ext}")
-            self.add_file(path, kind)
+            # Otherwise, we should have been given a LinkableCode object
+            if not isinstance(path_or_code, LinkableCode):
+                raise TypeError(
+                    "Expected path to file or a LinkableCode object"
+                )
+            if path_or_code.kind == "cu":
+                self.add_cu(path_or_code.data, path_or_code.name)
+            else:
+                self.add_data(
+                    path_or_code.data, path_or_code.kind, path_or_code.name
+                )
     @abstractmethod
     def complete(self):
@@ -2649,12 +2729,6 @@ class Linker(metaclass=ABCMeta):
         """
-_MVC_ERROR_MESSAGE = (
-    "Minor version compatibility requires ptxcompiler and cubinlinker packages "
-    "to be available"
-)
 class MVCLinker(Linker):
     """
     Linker supporting Minor Version Compatibility, backed by the cubinlinker
@@ -2930,6 +3004,94 @@ class CudaPythonLinker(Linker):
         return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
+class PyNvJitLinker(Linker):
+    def __init__(
+        self,
+        max_registers=None,
+        lineinfo=False,
+        cc=None,
+        lto=False,
+        additional_flags=None,
+    ):
+        if cc is None:
+            raise RuntimeError("PyNvJitLinker requires CC to be specified")
+        if not any(isinstance(cc, t) for t in [list, tuple]):
+            raise TypeError("`cc` must be a list or tuple of length 2")
+        sm_ver = f"{cc[0] * 10 + cc[1]}"
+        arch = f"-arch=sm_{sm_ver}"
+        options = [arch]
+        if max_registers:
+            options.append(f"-maxrregcount={max_registers}")
+        if lineinfo:
+            options.append("-lineinfo")
+        if lto:
+            options.append("-lto")
+        if additional_flags is not None:
+            options.extend(additional_flags)
+        self._linker = NvJitLinker(*options)
+        self.lto = lto
+        self.options = options
+    @property
+    def info_log(self):
+        return self._linker.info_log
+    @property
+    def error_log(self):
+        return self._linker.error_log
+    def add_ptx(self, ptx, name="<cudapy-ptx>"):
+        self._linker.add_ptx(ptx, name)
+    def add_fatbin(self, fatbin, name="<external-fatbin>"):
+        self._linker.add_fatbin(fatbin, name)
+    def add_ltoir(self, ltoir, name="<external-ltoir>"):
+        self._linker.add_ltoir(ltoir, name)
+    def add_object(self, obj, name="<external-object>"):
+        self._linker.add_object(obj, name)
+    def add_file(self, path, kind):
+        try:
+            with open(path, "rb") as f:
+                data = f.read()
+        except FileNotFoundError:
+            raise LinkerError(f"{path} not found")
+        name = pathlib.Path(path).name
+        self.add_data(data, kind, name)
+    def add_data(self, data, kind, name):
+        if kind == FILE_EXTENSION_MAP["cubin"]:
+            fn = self._linker.add_cubin
+        elif kind == FILE_EXTENSION_MAP["fatbin"]:
+            fn = self._linker.add_fatbin
+        elif kind == FILE_EXTENSION_MAP["a"]:
+            fn = self._linker.add_library
+        elif kind == FILE_EXTENSION_MAP["ptx"]:
+            return self.add_ptx(data, name)
+        elif kind == FILE_EXTENSION_MAP["o"]:
+            fn = self._linker.add_object
+        elif kind == FILE_EXTENSION_MAP["ltoir"]:
+            fn = self._linker.add_ltoir
+        else:
+            raise LinkerError(f"Don't know how to link {kind}")
+        try:
+            fn(data, name)
+        except NvJitLinkError as e:
+            raise LinkerError from e
+    def complete(self):
+        try:
+            return self._linker.get_linked_cubin()
+        except NvJitLinkError as e:
+            raise LinkerError from e
 # -----------------------------------------------------------------------------

numba_cuda/numba/cuda/cudadrv/enums.py CHANGED Viewed

@@ -309,6 +309,9 @@ CU_JIT_INPUT_OBJECT = 3
 # Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
 CU_JIT_INPUT_LIBRARY = 4
+# LTO IR
+CU_JIT_INPUT_NVVM = 5
 CU_JIT_NUM_INPUT_TYPES = 6

numba_cuda/numba/cuda/cudadrv/libs.py CHANGED Viewed

@@ -18,6 +18,7 @@ from numba.misc.findlib import find_lib
 from numba.cuda.cuda_paths import get_cuda_paths
 from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
 from numba.cuda.cudadrv.error import CudaSupportError
+from numba.core import config
 if sys.platform == 'win32':
@@ -60,6 +61,24 @@ def get_cudalib(lib, static=False):
     return max(candidates) if candidates else namepattern % lib
+def get_cuda_include_dir():
+    """
+    Find the path to cuda include dir based on a list of default locations.
+    Note that this does not list the `CUDA_INCLUDE_PATH` entry in user
+    configuration.
+    """
+    return get_cuda_paths()['include_dir'].info
+def check_cuda_include_dir(path):
+    if path is None or not os.path.exists(path):
+        raise FileNotFoundError(f"{path} not found")
+    if not os.path.exists(os.path.join(path, "cuda_runtime.h")):
+        raise FileNotFoundError(f"Unable to find cuda_runtime.h from {path}")
 def open_cudalib(lib):
     path = get_cudalib(lib)
     return ctypes.CDLL(path)
@@ -75,6 +94,8 @@ def _get_source_variable(lib, static=False):
         return get_cuda_paths()['nvvm'].by
     elif lib == 'libdevice':
         return get_cuda_paths()['libdevice'].by
+    elif lib == 'include_dir':
+        return get_cuda_paths()['include_dir'].by
     else:
         dir_type = 'static_cudalib_dir' if static else 'cudalib_dir'
         return get_cuda_paths()[dir_type].by
@@ -173,4 +194,21 @@ def test():
         print('\tERROR: failed to find %s:\n%s' % (lib, e))
         failed = True
+    # Check cuda include paths
+    print("Include directory configuration variable:")
+    print(f"\tCUDA_INCLUDE_PATH={config.CUDA_INCLUDE_PATH}")
+    where = _get_source_variable('include_dir')
+    print(f'Finding include directory from {where}')
+    include = get_cuda_include_dir()
+    print('\tLocated at', include)
+    try:
+        print('\tChecking include directory', end='...')
+        check_cuda_include_dir(include)
+        print('\tok')
+    except FileNotFoundError as e:
+        print('\tERROR: failed to find cuda include directory:\n%s' % e)
+        failed = True
     return not failed

numba_cuda/numba/cuda/cudadrv/linkable_code.py ADDED Viewed

@@ -0,0 +1,63 @@
+from .mappings import FILE_EXTENSION_MAP
+class LinkableCode:
+    """An object that can be passed in the `link` list argument to `@cuda.jit`
+    kernels to supply code to be linked from memory."""
+    def __init__(self, data, name=None):
+        self.data = data
+        self._name = name
+    @property
+    def name(self):
+        return self._name or self.default_name
+class PTXSource(LinkableCode):
+    """PTX Source code in memory"""
+    kind = FILE_EXTENSION_MAP["ptx"]
+    default_name = "<unnamed-ptx>"
+class CUSource(LinkableCode):
+    """CUDA C/C++ Source code in memory"""
+    kind = "cu"
+    default_name = "<unnamed-cu>"
+class Fatbin(LinkableCode):
+    """A fatbin ELF in memory"""
+    kind = FILE_EXTENSION_MAP["fatbin"]
+    default_name = "<unnamed-fatbin>"
+class Cubin(LinkableCode):
+    """A cubin ELF in memory"""
+    kind = FILE_EXTENSION_MAP["cubin"]
+    default_name = "<unnamed-cubin>"
+class Archive(LinkableCode):
+    """An archive of objects in memory"""
+    kind = FILE_EXTENSION_MAP["a"]
+    default_name = "<unnamed-archive>"
+class Object(LinkableCode):
+    """An object file in memory"""
+    kind = FILE_EXTENSION_MAP["o"]
+    default_name = "<unnamed-object>"
+class LTOIR(LinkableCode):
+    """An LTOIR file in memory"""
+    kind = "ltoir"
+    default_name = "<unnamed-ltoir>"

numba_cuda/numba/cuda/cudadrv/mappings.py ADDED Viewed

@@ -0,0 +1,24 @@
+from numba import config
+from . import enums
+if config.CUDA_USE_NVIDIA_BINDING:
+    from cuda import cuda
+    jitty = cuda.CUjitInputType
+    FILE_EXTENSION_MAP = {
+        'o': jitty.CU_JIT_INPUT_OBJECT,
+        'ptx': jitty.CU_JIT_INPUT_PTX,
+        'a': jitty.CU_JIT_INPUT_LIBRARY,
+        'lib': jitty.CU_JIT_INPUT_LIBRARY,
+        'cubin': jitty.CU_JIT_INPUT_CUBIN,
+        'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
+        'ltoir': jitty.CU_JIT_INPUT_NVVM,
+    }
+else:
+    FILE_EXTENSION_MAP = {
+        'o': enums.CU_JIT_INPUT_OBJECT,
+        'ptx': enums.CU_JIT_INPUT_PTX,
+        'a': enums.CU_JIT_INPUT_LIBRARY,
+        'lib': enums.CU_JIT_INPUT_LIBRARY,
+        'cubin': enums.CU_JIT_INPUT_CUBIN,
+        'fatbin': enums.CU_JIT_INPUT_FATBINARY,
+        'ltoir': enums.CU_JIT_INPUT_NVVM,
+    }

numba_cuda/numba/cuda/cudadrv/nvrtc.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
 from enum import IntEnum
-from numba.core import config
 from numba.cuda.cudadrv.error import (NvrtcError, NvrtcCompilationError,
                                       NvrtcSupportError)
+from numba.cuda.cuda_paths import get_cuda_paths
 import functools
 import os
 import threading
@@ -233,12 +232,18 @@ def compile(src, name, cc):
     #   being optimized away.
     major, minor = cc
     arch = f'--gpu-architecture=compute_{major}{minor}'
-    include = f'-I{config.CUDA_INCLUDE_PATH}'
+    cuda_include = [
+        f"-I{get_cuda_paths()['include_dir'].info}",
+    ]
     cudadrv_path = os.path.dirname(os.path.abspath(__file__))
     numba_cuda_path = os.path.dirname(cudadrv_path)
     numba_include = f'-I{numba_cuda_path}'
-    options = [arch, include, numba_include, '-rdc', 'true']
+    options = [arch, *cuda_include, numba_include, '-rdc', 'true']
+    if nvrtc.get_version() < (12, 0):
+        options += ["-std=c++17"]
     # Compile the program
     compile_error = nvrtc.compile_program(program, options)

numba_cuda/numba/cuda/device_init.py CHANGED Viewed

@@ -31,6 +31,9 @@ from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
                                 shfl_xor_sync)
 from .kernels import reduction
+from numba.cuda.cudadrv.linkable_code import (
+    Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
+)
 reduce = Reduce = reduction.Reduce

numba-cuda 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

numba-cuda 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl