numba-cuda 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/codegen.py +15 -3
  3. numba_cuda/numba/cuda/cuda_paths.py +68 -0
  4. numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -1
  5. numba_cuda/numba/cuda/cudadrv/driver.py +209 -47
  6. numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
  7. numba_cuda/numba/cuda/cudadrv/libs.py +38 -0
  8. numba_cuda/numba/cuda/cudadrv/linkable_code.py +63 -0
  9. numba_cuda/numba/cuda/cudadrv/mappings.py +24 -0
  10. numba_cuda/numba/cuda/cudadrv/nvrtc.py +9 -4
  11. numba_cuda/numba/cuda/device_init.py +3 -0
  12. numba_cuda/numba/cuda/dispatcher.py +48 -8
  13. numba_cuda/numba/cuda/intrinsics.py +6 -1
  14. numba_cuda/numba/cuda/runtime/nrt.cu +190 -0
  15. numba_cuda/numba/cuda/simulator/api.py +14 -0
  16. numba_cuda/numba/cuda/target.py +8 -2
  17. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +199 -0
  18. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +44 -4
  19. numba_cuda/numba/cuda/tests/cudapy/test_print.py +2 -2
  20. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +48 -0
  21. numba_cuda/numba/cuda/tests/nrt/__init__.py +8 -0
  22. numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +42 -0
  23. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +110 -0
  24. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +51 -0
  25. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +170 -0
  26. numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +19 -0
  27. numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +3 -0
  28. {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/METADATA +1 -1
  29. {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/RECORD +32 -20
  30. {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/WHEEL +1 -1
  31. {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/LICENSE +0 -0
  32. {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.17
1
+ 0.0.19
@@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
59
59
  get_cufunc), which may be of different compute capabilities.
60
60
  """
61
61
 
62
- def __init__(self, codegen, name, entry_name=None, max_registers=None,
63
- nvvm_options=None):
62
+ def __init__(
63
+ self,
64
+ codegen,
65
+ name,
66
+ entry_name=None,
67
+ max_registers=None,
68
+ lto=False,
69
+ nvvm_options=None
70
+ ):
64
71
  """
65
72
  codegen:
66
73
  Codegen object.
@@ -71,6 +78,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
71
78
  kernel and not a device function.
72
79
  max_registers:
73
80
  The maximum register usage to aim for when linking.
81
+ lto:
82
+ Whether to enable link-time optimization.
74
83
  nvvm_options:
75
84
  Dict of options to pass to NVVM.
76
85
  """
@@ -103,6 +112,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
103
112
  self._cufunc_cache = {}
104
113
 
105
114
  self._max_registers = max_registers
115
+ self._lto = lto
106
116
  if nvvm_options is None:
107
117
  nvvm_options = {}
108
118
  self._nvvm_options = nvvm_options
@@ -178,7 +188,9 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
178
188
  if cubin:
179
189
  return cubin
180
190
 
181
- linker = driver.Linker.new(max_registers=self._max_registers, cc=cc)
191
+ linker = driver.Linker.new(
192
+ max_registers=self._max_registers, cc=cc, lto=self._lto
193
+ )
182
194
 
183
195
  if linker.lto:
184
196
  ltoir = self.get_ltoir(cc=cc)
@@ -2,9 +2,11 @@ import sys
2
2
  import re
3
3
  import os
4
4
  from collections import namedtuple
5
+ import platform
5
6
 
6
7
  from numba.core.config import IS_WIN32
7
8
  from numba.misc.findlib import find_lib, find_file
9
+ from numba import config
8
10
 
9
11
 
10
12
  _env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info'])
@@ -241,6 +243,7 @@ def get_cuda_paths():
241
243
  'libdevice': _get_libdevice_paths(),
242
244
  'cudalib_dir': _get_cudalib_dir(),
243
245
  'static_cudalib_dir': _get_static_cudalib_dir(),
246
+ 'include_dir': _get_include_dir(),
244
247
  }
245
248
  # Cache result
246
249
  get_cuda_paths._cached_result = d
@@ -256,3 +259,68 @@ def get_debian_pkg_libdevice():
256
259
  if not os.path.exists(pkg_libdevice_location):
257
260
  return None
258
261
  return pkg_libdevice_location
262
+
263
+
264
+ def get_current_cuda_target_name():
265
+ """Determine conda's CTK target folder based on system and machine arch.
266
+
267
+ CTK's conda package delivers headers based on its architecture type. For example,
268
+ `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and
269
+ `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the
270
+ nuances at cudart's conda feedstock:
271
+ https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501
272
+ """
273
+ system = platform.system()
274
+ machine = platform.machine()
275
+
276
+ if system == "Linux":
277
+ arch_to_targets = {
278
+ 'x86_64': 'x86_64-linux',
279
+ 'aarch64': 'sbsa-linux'
280
+ }
281
+ elif system == "Windows":
282
+ arch_to_targets = {
283
+ 'AMD64': 'x64',
284
+ }
285
+ else:
286
+ arch_to_targets = {}
287
+
288
+ return arch_to_targets.get(machine, None)
289
+
290
+
291
+ def get_conda_include_dir():
292
+ """
293
+ Return the include directory in the current conda environment, if one
294
+ is active and it exists.
295
+ """
296
+ is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
297
+ if not is_conda_env:
298
+ return
299
+
300
+ if platform.system() == "Windows":
301
+ include_dir = os.path.join(
302
+ sys.prefix, 'Library', 'include'
303
+ )
304
+ elif target_name := get_current_cuda_target_name():
305
+ include_dir = os.path.join(
306
+ sys.prefix, 'targets', target_name, 'include'
307
+ )
308
+ else:
309
+ # A fallback when target cannot determined
310
+ # though usually it shouldn't.
311
+ include_dir = os.path.join(sys.prefix, 'include')
312
+
313
+ if os.path.exists(include_dir):
314
+ return include_dir
315
+ return
316
+
317
+
318
+ def _get_include_dir():
319
+ """Find the root include directory."""
320
+ options = [
321
+ ('Conda environment (NVIDIA package)', get_conda_include_dir()),
322
+ ('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH),
323
+ # TODO: add others
324
+ ]
325
+ by, include_dir = _find_valid_path(options)
326
+ return _env_path_tuple(by, include_dir)
@@ -876,7 +876,10 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
876
876
  sentry_contiguous(obj)
877
877
  devobj = from_array_like(obj, stream=stream)
878
878
  if copy:
879
- if config.CUDA_WARN_ON_IMPLICIT_COPY:
879
+ if (
880
+ config.CUDA_WARN_ON_IMPLICIT_COPY
881
+ and not config.DISABLE_PERFORMANCE_WARNINGS
882
+ ):
880
883
  if (
881
884
  not user_explicit and
882
885
  (not isinstance(obj, DeviceNDArray)
@@ -10,7 +10,6 @@ subsequent deallocation could further corrupt the CUDA context and causes the
10
10
  system to freeze in some cases.
11
11
 
12
12
  """
13
-
14
13
  import sys
15
14
  import os
16
15
  import ctypes
@@ -19,6 +18,7 @@ import functools
19
18
  import warnings
20
19
  import logging
21
20
  import threading
21
+ import traceback
22
22
  import asyncio
23
23
  import pathlib
24
24
  from itertools import product
@@ -35,6 +35,8 @@ from numba.core import utils, serialize, config
35
35
  from .error import CudaSupportError, CudaDriverError
36
36
  from .drvapi import API_PROTOTYPES
37
37
  from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
38
+ from .mappings import FILE_EXTENSION_MAP
39
+ from .linkable_code import LinkableCode
38
40
  from numba.cuda.cudadrv import enums, drvapi, nvrtc
39
41
 
40
42
  USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
@@ -56,6 +58,52 @@ _py_decref.argtypes = [ctypes.py_object]
56
58
  _py_incref.argtypes = [ctypes.py_object]
57
59
 
58
60
 
61
+ def _readenv(name, ctor, default):
62
+ value = os.environ.get(name)
63
+ if value is None:
64
+ return default() if callable(default) else default
65
+ try:
66
+ if ctor is bool:
67
+ return value.lower() in {'1', "true"}
68
+ return ctor(value)
69
+ except Exception:
70
+ warnings.warn(
71
+ f"Environment variable '{name}' is defined but its associated "
72
+ f"value '{value}' could not be parsed.\n"
73
+ "The parse failed with exception:\n"
74
+ f"{traceback.format_exc()}",
75
+ RuntimeWarning
76
+ )
77
+ return default
78
+
79
+
80
+ _MVC_ERROR_MESSAGE = (
81
+ "Minor version compatibility requires ptxcompiler and cubinlinker packages "
82
+ "to be available"
83
+ )
84
+
85
+ ENABLE_PYNVJITLINK = (
86
+ _readenv("NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False)
87
+ or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
88
+ )
89
+ if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
90
+ config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
91
+
92
+ if ENABLE_PYNVJITLINK:
93
+ try:
94
+ from pynvjitlink.api import NvJitLinker, NvJitLinkError
95
+ except ImportError:
96
+ raise ImportError(
97
+ "Using pynvjitlink requires the pynvjitlink package to be available"
98
+ )
99
+
100
+ if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
101
+ raise ValueError(
102
+ "Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
103
+ "CUDA_ENABLE_PYNVJITLINK at the same time"
104
+ )
105
+
106
+
59
107
  def make_logger():
60
108
  logger = logging.getLogger(__name__)
61
109
  # is logging configured?
@@ -432,7 +480,7 @@ class Driver(object):
432
480
 
433
481
  def get_version(self):
434
482
  """
435
- Returns the CUDA Runtime version as a tuple (major, minor).
483
+ Returns the CUDA Driver version as a tuple (major, minor).
436
484
  """
437
485
  if USE_NV_BINDING:
438
486
  version = driver.cuDriverGetVersion()
@@ -2546,38 +2594,47 @@ def launch_kernel(cufunc_handle,
2546
2594
  extra)
2547
2595
 
2548
2596
 
2549
- if USE_NV_BINDING:
2550
- jitty = binding.CUjitInputType
2551
- FILE_EXTENSION_MAP = {
2552
- 'o': jitty.CU_JIT_INPUT_OBJECT,
2553
- 'ptx': jitty.CU_JIT_INPUT_PTX,
2554
- 'a': jitty.CU_JIT_INPUT_LIBRARY,
2555
- 'lib': jitty.CU_JIT_INPUT_LIBRARY,
2556
- 'cubin': jitty.CU_JIT_INPUT_CUBIN,
2557
- 'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
2558
- }
2559
- else:
2560
- FILE_EXTENSION_MAP = {
2561
- 'o': enums.CU_JIT_INPUT_OBJECT,
2562
- 'ptx': enums.CU_JIT_INPUT_PTX,
2563
- 'a': enums.CU_JIT_INPUT_LIBRARY,
2564
- 'lib': enums.CU_JIT_INPUT_LIBRARY,
2565
- 'cubin': enums.CU_JIT_INPUT_CUBIN,
2566
- 'fatbin': enums.CU_JIT_INPUT_FATBINARY,
2567
- }
2568
-
2569
-
2570
2597
  class Linker(metaclass=ABCMeta):
2571
2598
  """Abstract base class for linkers"""
2572
2599
 
2573
2600
  @classmethod
2574
- def new(cls, max_registers=0, lineinfo=False, cc=None):
2575
- if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2576
- return MVCLinker(max_registers, lineinfo, cc)
2577
- elif USE_NV_BINDING:
2578
- return CudaPythonLinker(max_registers, lineinfo, cc)
2601
+ def new(cls,
2602
+ max_registers=0,
2603
+ lineinfo=False,
2604
+ cc=None,
2605
+ lto=None,
2606
+ additional_flags=None
2607
+ ):
2608
+
2609
+ driver_ver = driver.get_version()
2610
+ if (
2611
+ config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
2612
+ and driver_ver >= (12, 0)
2613
+ ):
2614
+ raise ValueError(
2615
+ "Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC"
2616
+ )
2617
+ if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
2618
+ raise ValueError(
2619
+ "Enabling pynvjitlink requires CUDA 12."
2620
+ )
2621
+ if config.CUDA_ENABLE_PYNVJITLINK:
2622
+ linker = PyNvJitLinker
2623
+
2624
+ elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2625
+ linker = MVCLinker
2626
+ else:
2627
+ if USE_NV_BINDING:
2628
+ linker = CudaPythonLinker
2629
+ else:
2630
+ linker = CtypesLinker
2631
+
2632
+ if linker is PyNvJitLinker:
2633
+ return linker(max_registers, lineinfo, cc, lto, additional_flags)
2634
+ elif additional_flags or lto:
2635
+ raise ValueError("LTO and additional flags require PyNvJitLinker")
2579
2636
  else:
2580
- return CtypesLinker(max_registers, lineinfo, cc)
2637
+ return linker(max_registers, lineinfo, cc)
2581
2638
 
2582
2639
  @abstractmethod
2583
2640
  def __init__(self, max_registers, lineinfo, cc):
@@ -2626,19 +2683,42 @@ class Linker(metaclass=ABCMeta):
2626
2683
  cu = f.read()
2627
2684
  self.add_cu(cu, os.path.basename(path))
2628
2685
 
2629
- def add_file_guess_ext(self, path):
2630
- """Add a file to the link, guessing its type from its extension."""
2631
- ext = os.path.splitext(path)[1][1:]
2632
- if ext == '':
2633
- raise RuntimeError("Don't know how to link file with no extension")
2634
- elif ext == 'cu':
2635
- self.add_cu_file(path)
2686
+ def add_file_guess_ext(self, path_or_code):
2687
+ """
2688
+ Add a file or LinkableCode object to the link. If a file is
2689
+ passed, the type will be inferred from the extension. A LinkableCode
2690
+ object represents a file already in memory.
2691
+ """
2692
+ if isinstance(path_or_code, str):
2693
+ ext = pathlib.Path(path_or_code).suffix
2694
+ if ext == '':
2695
+ raise RuntimeError(
2696
+ "Don't know how to link file with no extension"
2697
+ )
2698
+ elif ext == '.cu':
2699
+ self.add_cu_file(path_or_code)
2700
+ else:
2701
+ kind = FILE_EXTENSION_MAP.get(ext.lstrip('.'), None)
2702
+ if kind is None:
2703
+ raise RuntimeError(
2704
+ "Don't know how to link file with extension "
2705
+ f"{ext}"
2706
+ )
2707
+ self.add_file(path_or_code, kind)
2708
+ return
2636
2709
  else:
2637
- kind = FILE_EXTENSION_MAP.get(ext, None)
2638
- if kind is None:
2639
- raise RuntimeError("Don't know how to link file with extension "
2640
- f".{ext}")
2641
- self.add_file(path, kind)
2710
+ # Otherwise, we should have been given a LinkableCode object
2711
+ if not isinstance(path_or_code, LinkableCode):
2712
+ raise TypeError(
2713
+ "Expected path to file or a LinkableCode object"
2714
+ )
2715
+
2716
+ if path_or_code.kind == "cu":
2717
+ self.add_cu(path_or_code.data, path_or_code.name)
2718
+ else:
2719
+ self.add_data(
2720
+ path_or_code.data, path_or_code.kind, path_or_code.name
2721
+ )
2642
2722
 
2643
2723
  @abstractmethod
2644
2724
  def complete(self):
@@ -2649,12 +2729,6 @@ class Linker(metaclass=ABCMeta):
2649
2729
  """
2650
2730
 
2651
2731
 
2652
- _MVC_ERROR_MESSAGE = (
2653
- "Minor version compatibility requires ptxcompiler and cubinlinker packages "
2654
- "to be available"
2655
- )
2656
-
2657
-
2658
2732
  class MVCLinker(Linker):
2659
2733
  """
2660
2734
  Linker supporting Minor Version Compatibility, backed by the cubinlinker
@@ -2930,6 +3004,94 @@ class CudaPythonLinker(Linker):
2930
3004
  return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
2931
3005
 
2932
3006
 
3007
+ class PyNvJitLinker(Linker):
3008
+ def __init__(
3009
+ self,
3010
+ max_registers=None,
3011
+ lineinfo=False,
3012
+ cc=None,
3013
+ lto=False,
3014
+ additional_flags=None,
3015
+ ):
3016
+
3017
+ if cc is None:
3018
+ raise RuntimeError("PyNvJitLinker requires CC to be specified")
3019
+ if not any(isinstance(cc, t) for t in [list, tuple]):
3020
+ raise TypeError("`cc` must be a list or tuple of length 2")
3021
+
3022
+ sm_ver = f"{cc[0] * 10 + cc[1]}"
3023
+ arch = f"-arch=sm_{sm_ver}"
3024
+ options = [arch]
3025
+ if max_registers:
3026
+ options.append(f"-maxrregcount={max_registers}")
3027
+ if lineinfo:
3028
+ options.append("-lineinfo")
3029
+ if lto:
3030
+ options.append("-lto")
3031
+ if additional_flags is not None:
3032
+ options.extend(additional_flags)
3033
+
3034
+ self._linker = NvJitLinker(*options)
3035
+ self.lto = lto
3036
+ self.options = options
3037
+
3038
+ @property
3039
+ def info_log(self):
3040
+ return self._linker.info_log
3041
+
3042
+ @property
3043
+ def error_log(self):
3044
+ return self._linker.error_log
3045
+
3046
+ def add_ptx(self, ptx, name="<cudapy-ptx>"):
3047
+ self._linker.add_ptx(ptx, name)
3048
+
3049
+ def add_fatbin(self, fatbin, name="<external-fatbin>"):
3050
+ self._linker.add_fatbin(fatbin, name)
3051
+
3052
+ def add_ltoir(self, ltoir, name="<external-ltoir>"):
3053
+ self._linker.add_ltoir(ltoir, name)
3054
+
3055
+ def add_object(self, obj, name="<external-object>"):
3056
+ self._linker.add_object(obj, name)
3057
+
3058
+ def add_file(self, path, kind):
3059
+ try:
3060
+ with open(path, "rb") as f:
3061
+ data = f.read()
3062
+ except FileNotFoundError:
3063
+ raise LinkerError(f"{path} not found")
3064
+
3065
+ name = pathlib.Path(path).name
3066
+ self.add_data(data, kind, name)
3067
+
3068
+ def add_data(self, data, kind, name):
3069
+ if kind == FILE_EXTENSION_MAP["cubin"]:
3070
+ fn = self._linker.add_cubin
3071
+ elif kind == FILE_EXTENSION_MAP["fatbin"]:
3072
+ fn = self._linker.add_fatbin
3073
+ elif kind == FILE_EXTENSION_MAP["a"]:
3074
+ fn = self._linker.add_library
3075
+ elif kind == FILE_EXTENSION_MAP["ptx"]:
3076
+ return self.add_ptx(data, name)
3077
+ elif kind == FILE_EXTENSION_MAP["o"]:
3078
+ fn = self._linker.add_object
3079
+ elif kind == FILE_EXTENSION_MAP["ltoir"]:
3080
+ fn = self._linker.add_ltoir
3081
+ else:
3082
+ raise LinkerError(f"Don't know how to link {kind}")
3083
+
3084
+ try:
3085
+ fn(data, name)
3086
+ except NvJitLinkError as e:
3087
+ raise LinkerError from e
3088
+
3089
+ def complete(self):
3090
+ try:
3091
+ return self._linker.get_linked_cubin()
3092
+ except NvJitLinkError as e:
3093
+ raise LinkerError from e
3094
+
2933
3095
  # -----------------------------------------------------------------------------
2934
3096
 
2935
3097
 
@@ -309,6 +309,9 @@ CU_JIT_INPUT_OBJECT = 3
309
309
  # Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
310
310
  CU_JIT_INPUT_LIBRARY = 4
311
311
 
312
+ # LTO IR
313
+ CU_JIT_INPUT_NVVM = 5
314
+
312
315
  CU_JIT_NUM_INPUT_TYPES = 6
313
316
 
314
317
 
@@ -18,6 +18,7 @@ from numba.misc.findlib import find_lib
18
18
  from numba.cuda.cuda_paths import get_cuda_paths
19
19
  from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
20
20
  from numba.cuda.cudadrv.error import CudaSupportError
21
+ from numba.core import config
21
22
 
22
23
 
23
24
  if sys.platform == 'win32':
@@ -60,6 +61,24 @@ def get_cudalib(lib, static=False):
60
61
  return max(candidates) if candidates else namepattern % lib
61
62
 
62
63
 
64
+ def get_cuda_include_dir():
65
+ """
66
+ Find the path to cuda include dir based on a list of default locations.
67
+ Note that this does not list the `CUDA_INCLUDE_PATH` entry in user
68
+ configuration.
69
+ """
70
+
71
+ return get_cuda_paths()['include_dir'].info
72
+
73
+
74
+ def check_cuda_include_dir(path):
75
+ if path is None or not os.path.exists(path):
76
+ raise FileNotFoundError(f"{path} not found")
77
+
78
+ if not os.path.exists(os.path.join(path, "cuda_runtime.h")):
79
+ raise FileNotFoundError(f"Unable to find cuda_runtime.h from {path}")
80
+
81
+
63
82
  def open_cudalib(lib):
64
83
  path = get_cudalib(lib)
65
84
  return ctypes.CDLL(path)
@@ -75,6 +94,8 @@ def _get_source_variable(lib, static=False):
75
94
  return get_cuda_paths()['nvvm'].by
76
95
  elif lib == 'libdevice':
77
96
  return get_cuda_paths()['libdevice'].by
97
+ elif lib == 'include_dir':
98
+ return get_cuda_paths()['include_dir'].by
78
99
  else:
79
100
  dir_type = 'static_cudalib_dir' if static else 'cudalib_dir'
80
101
  return get_cuda_paths()[dir_type].by
@@ -173,4 +194,21 @@ def test():
173
194
  print('\tERROR: failed to find %s:\n%s' % (lib, e))
174
195
  failed = True
175
196
 
197
+ # Check cuda include paths
198
+
199
+ print("Include directory configuration variable:")
200
+ print(f"\tCUDA_INCLUDE_PATH={config.CUDA_INCLUDE_PATH}")
201
+
202
+ where = _get_source_variable('include_dir')
203
+ print(f'Finding include directory from {where}')
204
+ include = get_cuda_include_dir()
205
+ print('\tLocated at', include)
206
+ try:
207
+ print('\tChecking include directory', end='...')
208
+ check_cuda_include_dir(include)
209
+ print('\tok')
210
+ except FileNotFoundError as e:
211
+ print('\tERROR: failed to find cuda include directory:\n%s' % e)
212
+ failed = True
213
+
176
214
  return not failed
@@ -0,0 +1,63 @@
1
+ from .mappings import FILE_EXTENSION_MAP
2
+
3
+
4
+ class LinkableCode:
5
+ """An object that can be passed in the `link` list argument to `@cuda.jit`
6
+ kernels to supply code to be linked from memory."""
7
+
8
+ def __init__(self, data, name=None):
9
+ self.data = data
10
+ self._name = name
11
+
12
+ @property
13
+ def name(self):
14
+ return self._name or self.default_name
15
+
16
+
17
+ class PTXSource(LinkableCode):
18
+ """PTX Source code in memory"""
19
+
20
+ kind = FILE_EXTENSION_MAP["ptx"]
21
+ default_name = "<unnamed-ptx>"
22
+
23
+
24
+ class CUSource(LinkableCode):
25
+ """CUDA C/C++ Source code in memory"""
26
+
27
+ kind = "cu"
28
+ default_name = "<unnamed-cu>"
29
+
30
+
31
+ class Fatbin(LinkableCode):
32
+ """A fatbin ELF in memory"""
33
+
34
+ kind = FILE_EXTENSION_MAP["fatbin"]
35
+ default_name = "<unnamed-fatbin>"
36
+
37
+
38
+ class Cubin(LinkableCode):
39
+ """A cubin ELF in memory"""
40
+
41
+ kind = FILE_EXTENSION_MAP["cubin"]
42
+ default_name = "<unnamed-cubin>"
43
+
44
+
45
+ class Archive(LinkableCode):
46
+ """An archive of objects in memory"""
47
+
48
+ kind = FILE_EXTENSION_MAP["a"]
49
+ default_name = "<unnamed-archive>"
50
+
51
+
52
+ class Object(LinkableCode):
53
+ """An object file in memory"""
54
+
55
+ kind = FILE_EXTENSION_MAP["o"]
56
+ default_name = "<unnamed-object>"
57
+
58
+
59
+ class LTOIR(LinkableCode):
60
+ """An LTOIR file in memory"""
61
+
62
+ kind = "ltoir"
63
+ default_name = "<unnamed-ltoir>"
@@ -0,0 +1,24 @@
1
+ from numba import config
2
+ from . import enums
3
+ if config.CUDA_USE_NVIDIA_BINDING:
4
+ from cuda import cuda
5
+ jitty = cuda.CUjitInputType
6
+ FILE_EXTENSION_MAP = {
7
+ 'o': jitty.CU_JIT_INPUT_OBJECT,
8
+ 'ptx': jitty.CU_JIT_INPUT_PTX,
9
+ 'a': jitty.CU_JIT_INPUT_LIBRARY,
10
+ 'lib': jitty.CU_JIT_INPUT_LIBRARY,
11
+ 'cubin': jitty.CU_JIT_INPUT_CUBIN,
12
+ 'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
13
+ 'ltoir': jitty.CU_JIT_INPUT_NVVM,
14
+ }
15
+ else:
16
+ FILE_EXTENSION_MAP = {
17
+ 'o': enums.CU_JIT_INPUT_OBJECT,
18
+ 'ptx': enums.CU_JIT_INPUT_PTX,
19
+ 'a': enums.CU_JIT_INPUT_LIBRARY,
20
+ 'lib': enums.CU_JIT_INPUT_LIBRARY,
21
+ 'cubin': enums.CU_JIT_INPUT_CUBIN,
22
+ 'fatbin': enums.CU_JIT_INPUT_FATBINARY,
23
+ 'ltoir': enums.CU_JIT_INPUT_NVVM,
24
+ }
@@ -1,9 +1,8 @@
1
1
  from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
2
2
  from enum import IntEnum
3
- from numba.core import config
4
3
  from numba.cuda.cudadrv.error import (NvrtcError, NvrtcCompilationError,
5
4
  NvrtcSupportError)
6
-
5
+ from numba.cuda.cuda_paths import get_cuda_paths
7
6
  import functools
8
7
  import os
9
8
  import threading
@@ -233,12 +232,18 @@ def compile(src, name, cc):
233
232
  # being optimized away.
234
233
  major, minor = cc
235
234
  arch = f'--gpu-architecture=compute_{major}{minor}'
236
- include = f'-I{config.CUDA_INCLUDE_PATH}'
235
+
236
+ cuda_include = [
237
+ f"-I{get_cuda_paths()['include_dir'].info}",
238
+ ]
237
239
 
238
240
  cudadrv_path = os.path.dirname(os.path.abspath(__file__))
239
241
  numba_cuda_path = os.path.dirname(cudadrv_path)
240
242
  numba_include = f'-I{numba_cuda_path}'
241
- options = [arch, include, numba_include, '-rdc', 'true']
243
+ options = [arch, *cuda_include, numba_include, '-rdc', 'true']
244
+
245
+ if nvrtc.get_version() < (12, 0):
246
+ options += ["-std=c++17"]
242
247
 
243
248
  # Compile the program
244
249
  compile_error = nvrtc.compile_program(program, options)
@@ -31,6 +31,9 @@ from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
31
31
  shfl_xor_sync)
32
32
 
33
33
  from .kernels import reduction
34
+ from numba.cuda.cudadrv.linkable_code import (
35
+ Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
36
+ )
34
37
 
35
38
  reduce = Reduce = reduction.Reduce
36
39