numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +51 -16
  3. numba_cuda/numba/cuda/codegen.py +11 -9
  4. numba_cuda/numba/cuda/compiler.py +3 -39
  5. numba_cuda/numba/cuda/cuda_paths.py +20 -22
  6. numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
  7. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  8. numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
  9. numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
  10. numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
  11. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
  12. numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
  13. numba_cuda/numba/cuda/decorators.py +18 -0
  14. numba_cuda/numba/cuda/dispatcher.py +1 -0
  15. numba_cuda/numba/cuda/flags.py +36 -0
  16. numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
  17. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
  18. numba_cuda/numba/cuda/target.py +55 -2
  19. numba_cuda/numba/cuda/testing.py +0 -22
  20. numba_cuda/numba/cuda/tests/__init__.py +0 -2
  21. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
  22. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
  23. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
  24. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
  25. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
  26. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
  27. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
  28. numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
  29. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
  30. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
  31. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
  32. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
  33. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  34. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
  35. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
  36. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
  37. numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
  38. numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
  39. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
  40. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
  41. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
  42. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
  43. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
  44. {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0
@@ -42,6 +42,7 @@ import importlib
42
42
  import numpy as np
43
43
  from collections import namedtuple, deque
44
44
 
45
+
45
46
  from numba import mviewbuf
46
47
  from numba.core import utils, serialize, config
47
48
  from .error import CudaSupportError, CudaDriverError
@@ -58,6 +59,22 @@ except ImportError:
58
59
  NvJitLinker, NvJitLinkError = None, None
59
60
 
60
61
 
62
+ USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
63
+
64
+ if USE_NV_BINDING:
65
+ from cuda.bindings import driver as binding
66
+ from cuda.core.experimental import (
67
+ Linker,
68
+ LinkerOptions,
69
+ ObjectCode,
70
+ )
71
+
72
+ # There is no definition of the default stream in the Nvidia bindings (nor
73
+ # is there at the C/C++ level), so we define it here so we don't need to
74
+ # use a magic number 0 in places where we want the default stream.
75
+ CU_STREAM_DEFAULT = 0
76
+
77
+
61
78
  MIN_REQUIRED_CC = (3, 5)
62
79
  SUPPORTS_IPC = sys.platform.startswith("linux")
63
80
 
@@ -108,6 +125,25 @@ def make_logger():
108
125
  return logger
109
126
 
110
127
 
128
+ @functools.cache
129
+ def _have_nvjitlink():
130
+ if not USE_NV_BINDING:
131
+ return False
132
+ try:
133
+ from cuda.bindings._internal import nvjitlink as nvjitlink_internal
134
+ from cuda.bindings._internal.utils import NotSupportedError
135
+ except ImportError:
136
+ return False
137
+ try:
138
+ return (
139
+ nvjitlink_internal._inspect_function_pointer("__nvJitLinkVersion")
140
+ != 0
141
+ )
142
+ except NotSupportedError:
143
+ # no driver
144
+ return False
145
+
146
+
111
147
  class DeadMemoryError(RuntimeError):
112
148
  pass
113
149
 
@@ -1472,7 +1508,7 @@ class Context(object):
1472
1508
  if isinstance(ptx, str):
1473
1509
  ptx = ptx.encode("utf8")
1474
1510
  if USE_NV_BINDING:
1475
- image = ptx
1511
+ image = ObjectCode.from_ptx(ptx)
1476
1512
  else:
1477
1513
  image = c_char_p(ptx)
1478
1514
  return self.create_module_image(image)
@@ -1615,7 +1651,6 @@ def load_module_image_ctypes(
1615
1651
 
1616
1652
  option_keys = (drvapi.cu_jit_option * len(options))(*options.keys())
1617
1653
  option_vals = (c_void_p * len(options))(*options.values())
1618
-
1619
1654
  handle = drvapi.cu_module()
1620
1655
  try:
1621
1656
  driver.cuModuleLoadDataEx(
@@ -1662,7 +1697,7 @@ def load_module_image_cuda_python(
1662
1697
 
1663
1698
  try:
1664
1699
  handle = driver.cuModuleLoadDataEx(
1665
- image, len(options), option_keys, option_vals
1700
+ image.code, len(options), option_keys, option_vals
1666
1701
  )
1667
1702
  except CudaAPIError as e:
1668
1703
  err_string = jiterrors.decode("utf-8")
@@ -2722,7 +2757,7 @@ def launch_kernel(
2722
2757
  )
2723
2758
 
2724
2759
 
2725
- class Linker(metaclass=ABCMeta):
2760
+ class _LinkerBase(metaclass=ABCMeta):
2726
2761
  """Abstract base class for linkers"""
2727
2762
 
2728
2763
  @classmethod
@@ -2735,30 +2770,27 @@ class Linker(metaclass=ABCMeta):
2735
2770
  additional_flags=None,
2736
2771
  ):
2737
2772
  driver_ver = driver.get_version()
2738
- if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and driver_ver >= (
2739
- 12,
2740
- 0,
2741
- ):
2742
- raise ValueError("Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC")
2743
- if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
2744
- raise ValueError("Enabling pynvjitlink requires CUDA 12.")
2745
- if config.CUDA_ENABLE_PYNVJITLINK:
2746
- linker = PyNvJitLinker
2747
-
2748
- elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2749
- linker = MVCLinker
2773
+ if driver_ver < (12, 0):
2774
+ if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2775
+ linker = MVCLinker
2776
+ elif USE_NV_BINDING:
2777
+ linker = _Linker
2778
+ else:
2779
+ linker = CtypesLinker
2750
2780
  else:
2751
2781
  if USE_NV_BINDING:
2752
- linker = CudaPythonLinker
2782
+ linker = _Linker
2753
2783
  else:
2754
2784
  linker = CtypesLinker
2755
2785
 
2756
- if linker is PyNvJitLinker:
2757
- return linker(max_registers, lineinfo, cc, lto, additional_flags)
2758
- elif additional_flags or lto:
2759
- raise ValueError("LTO and additional flags require PyNvJitLinker")
2786
+ params = (max_registers, lineinfo, cc)
2787
+ if linker is _Linker:
2788
+ params = (*params, lto, additional_flags)
2760
2789
  else:
2761
- return linker(max_registers, lineinfo, cc)
2790
+ if lto or additional_flags:
2791
+ raise ValueError("LTO and additional flags require nvjitlink")
2792
+
2793
+ return linker(*params)
2762
2794
 
2763
2795
  @abstractmethod
2764
2796
  def __init__(self, max_registers, lineinfo, cc):
@@ -2786,7 +2818,6 @@ class Linker(metaclass=ABCMeta):
2786
2818
  with driver.get_active_context() as ac:
2787
2819
  dev = driver.get_device(ac.devnum)
2788
2820
  cc = dev.compute_capability
2789
-
2790
2821
  ptx, log = nvrtc.compile(cu, name, cc)
2791
2822
 
2792
2823
  if config.DUMP_ASSEMBLY:
@@ -2821,7 +2852,6 @@ class Linker(metaclass=ABCMeta):
2821
2852
  LTO-ed portion of the PTX when linker is added with objects that can be
2822
2853
  both LTO-ed and not LTO-ed.
2823
2854
  """
2824
-
2825
2855
  if isinstance(path_or_code, str):
2826
2856
  ext = pathlib.Path(path_or_code).suffix
2827
2857
  if ext == "":
@@ -2901,7 +2931,148 @@ class Linker(metaclass=ABCMeta):
2901
2931
  """
2902
2932
 
2903
2933
 
2904
- class MVCLinker(Linker):
2934
+ class _Linker(_LinkerBase):
2935
+ def __init__(
2936
+ self,
2937
+ max_registers=None,
2938
+ lineinfo=False,
2939
+ cc=None,
2940
+ lto=None,
2941
+ additional_flags=None,
2942
+ ):
2943
+ arch = f"sm_{cc[0]}{cc[1]}"
2944
+ self.max_registers = max_registers if max_registers else None
2945
+ self.lineinfo = lineinfo
2946
+ self.cc = cc
2947
+ self.arch = arch
2948
+ if lto is False:
2949
+ # WAR for apparent nvjitlink issue
2950
+ lto = None
2951
+ self.lto = lto
2952
+ self.additional_flags = additional_flags
2953
+
2954
+ self.options = LinkerOptions(
2955
+ max_register_count=self.max_registers,
2956
+ lineinfo=lineinfo,
2957
+ arch=arch,
2958
+ link_time_optimization=lto,
2959
+ )
2960
+ self._complete = False
2961
+ self._object_codes = []
2962
+ self.linker = None # need at least one program
2963
+
2964
+ @property
2965
+ def info_log(self):
2966
+ if not self.linker:
2967
+ raise ValueError("Not Initialized")
2968
+ if self._complete:
2969
+ return self._info_log
2970
+ raise RuntimeError("Link not yet complete.")
2971
+
2972
+ @property
2973
+ def error_log(self):
2974
+ if not self.linker:
2975
+ raise ValueError("Not Initialized")
2976
+ if self._complete:
2977
+ return self._error_log
2978
+ raise RuntimeError("Link not yet complete.")
2979
+
2980
+ def add_ptx(self, ptx, name="<cudapy-ptx>"):
2981
+ obj = ObjectCode.from_ptx(ptx, name=name)
2982
+ self._object_codes.append(obj)
2983
+
2984
+ def add_cu(self, cu, name="<cudapy-cu>"):
2985
+ with driver.get_active_context() as ac:
2986
+ dev = driver.get_device(ac.devnum)
2987
+ cc = dev.compute_capability
2988
+ obj, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
2989
+
2990
+ if not self.lto and config.DUMP_ASSEMBLY:
2991
+ print(("ASSEMBLY %s" % name).center(80, "-"))
2992
+ print(obj.code)
2993
+
2994
+ self._object_codes.append(obj)
2995
+
2996
+ def add_cubin(self, cubin, name="<cudapy-cubin>"):
2997
+ obj = ObjectCode.from_cubin(cubin, name=name)
2998
+ self._object_codes.append(obj)
2999
+
3000
+ def add_ltoir(self, ltoir, name="<cudapy-ltoir>"):
3001
+ obj = ObjectCode.from_ltoir(ltoir, name=name)
3002
+ self._object_codes.append(obj)
3003
+
3004
+ def add_fatbin(self, fatbin, name="<cudapy-fatbin>"):
3005
+ obj = ObjectCode.from_fatbin(fatbin, name=name)
3006
+ self._object_codes.append(obj)
3007
+
3008
+ def add_object(self, obj, name="<cudapy-object>"):
3009
+ obj = ObjectCode.from_object(obj, name=name)
3010
+ self._object_codes.append(obj)
3011
+
3012
+ def add_library(self, lib, name="<cudapy-lib>"):
3013
+ obj = ObjectCode.from_library(lib, name=name)
3014
+ self._object_codes.append(obj)
3015
+
3016
+ def add_file(self, path, kind):
3017
+ try:
3018
+ data = cached_file_read(path, how="rb")
3019
+ except FileNotFoundError:
3020
+ raise LinkerError(f"{path} not found")
3021
+ name = pathlib.Path(path).name
3022
+ self.add_data(data, kind, name)
3023
+
3024
+ def add_data(self, data, kind, name):
3025
+ if kind == FILE_EXTENSION_MAP["ptx"]:
3026
+ fn = self.add_ptx
3027
+ elif kind == FILE_EXTENSION_MAP["cubin"]:
3028
+ fn = self.add_cubin
3029
+ elif kind == "cu":
3030
+ fn = self.add_cu
3031
+ elif (
3032
+ kind == FILE_EXTENSION_MAP["lib"] or kind == FILE_EXTENSION_MAP["a"]
3033
+ ):
3034
+ fn = self.add_library
3035
+ elif kind == FILE_EXTENSION_MAP["fatbin"]:
3036
+ fn = self.add_fatbin
3037
+ elif kind == FILE_EXTENSION_MAP["o"]:
3038
+ fn = self.add_object
3039
+ elif kind == FILE_EXTENSION_MAP["ltoir"]:
3040
+ fn = self.add_ltoir
3041
+ else:
3042
+ raise LinkerError(f"Don't know how to link {kind}")
3043
+
3044
+ fn(data, name)
3045
+
3046
+ def get_linked_ptx(self):
3047
+ options = LinkerOptions(
3048
+ max_register_count=self.max_registers,
3049
+ lineinfo=self.lineinfo,
3050
+ arch=self.arch,
3051
+ link_time_optimization=True,
3052
+ ptx=True,
3053
+ )
3054
+
3055
+ self.linker = Linker(*self._object_codes, options=options)
3056
+
3057
+ result = self.linker.link("ptx")
3058
+ self.close()
3059
+ self._complete = True
3060
+ return result.code
3061
+
3062
+ def close(self):
3063
+ self._info_log = self.linker.get_info_log()
3064
+ self._error_log = self.linker.get_error_log()
3065
+ self.linker.close()
3066
+
3067
+ def complete(self):
3068
+ self.linker = Linker(*self._object_codes, options=self.options)
3069
+ result = self.linker.link("cubin")
3070
+ self.close()
3071
+ self._complete = True
3072
+ return result
3073
+
3074
+
3075
+ class MVCLinker(_LinkerBase):
2905
3076
  """
2906
3077
  Linker supporting Minor Version Compatibility, backed by the cubinlinker
2907
3078
  package.
@@ -2996,7 +3167,7 @@ class MVCLinker(Linker):
2996
3167
  raise LinkerError from e
2997
3168
 
2998
3169
 
2999
- class CtypesLinker(Linker):
3170
+ class CtypesLinker(_LinkerBase):
3000
3171
  """
3001
3172
  Links for current device if no CC given
3002
3173
  """
@@ -3139,266 +3310,6 @@ class CtypesLinker(Linker):
3139
3310
  return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
3140
3311
 
3141
3312
 
3142
- class CudaPythonLinker(Linker):
3143
- """
3144
- Links for current device if no CC given
3145
- """
3146
-
3147
- def __init__(self, max_registers=0, lineinfo=False, cc=None):
3148
- super().__init__(max_registers, lineinfo, cc)
3149
-
3150
- logsz = config.CUDA_LOG_SIZE
3151
- linkerinfo = bytearray(logsz)
3152
- linkererrors = bytearray(logsz)
3153
-
3154
- jit_option = binding.CUjit_option
3155
-
3156
- options = {
3157
- jit_option.CU_JIT_INFO_LOG_BUFFER: linkerinfo,
3158
- jit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: logsz,
3159
- jit_option.CU_JIT_ERROR_LOG_BUFFER: linkererrors,
3160
- jit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: logsz,
3161
- jit_option.CU_JIT_LOG_VERBOSE: 1,
3162
- }
3163
- if max_registers:
3164
- options[jit_option.CU_JIT_MAX_REGISTERS] = max_registers
3165
- if lineinfo:
3166
- options[jit_option.CU_JIT_GENERATE_LINE_INFO] = 1
3167
-
3168
- if cc is None:
3169
- # No option value is needed, but we need something as a placeholder
3170
- options[jit_option.CU_JIT_TARGET_FROM_CUCONTEXT] = 1
3171
- else:
3172
- cc_val = cc[0] * 10 + cc[1]
3173
- cc_enum = getattr(
3174
- binding.CUjit_target, f"CU_TARGET_COMPUTE_{cc_val}"
3175
- )
3176
- options[jit_option.CU_JIT_TARGET] = cc_enum
3177
-
3178
- raw_keys = list(options.keys())
3179
- raw_values = list(options.values())
3180
- self.handle = driver.cuLinkCreate(len(raw_keys), raw_keys, raw_values)
3181
-
3182
- weakref.finalize(self, driver.cuLinkDestroy, self.handle)
3183
-
3184
- self.linker_info_buf = linkerinfo
3185
- self.linker_errors_buf = linkererrors
3186
-
3187
- self._keep_alive = [linkerinfo, linkererrors, raw_keys, raw_values]
3188
-
3189
- @property
3190
- def info_log(self):
3191
- return self.linker_info_buf.decode("utf8")
3192
-
3193
- @property
3194
- def error_log(self):
3195
- return self.linker_errors_buf.decode("utf8")
3196
-
3197
- def add_cubin(self, cubin, name="<unnamed-cubin>"):
3198
- input_type = binding.CUjitInputType.CU_JIT_INPUT_CUBIN
3199
- return self._add_data(input_type, cubin, name)
3200
-
3201
- def add_ptx(self, ptx, name="<unnamed-ptx>"):
3202
- input_type = binding.CUjitInputType.CU_JIT_INPUT_PTX
3203
- return self._add_data(input_type, ptx, name)
3204
-
3205
- def add_object(self, object_, name="<unnamed-object>"):
3206
- input_type = binding.CUjitInputType.CU_JIT_INPUT_OBJECT
3207
- return self._add_data(input_type, object_, name)
3208
-
3209
- def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
3210
- input_type = binding.CUjitInputType.CU_JIT_INPUT_FATBINARY
3211
- return self._add_data(input_type, fatbin, name)
3212
-
3213
- def add_library(self, library, name="<unnamed-library>"):
3214
- input_type = binding.CUjitInputType.CU_JIT_INPUT_LIBRARY
3215
- return self._add_data(input_type, library, name)
3216
-
3217
- def _add_data(self, input_type, data, name):
3218
- name_buffer = name.encode("utf8")
3219
- self._keep_alive += [data, name_buffer]
3220
- try:
3221
- driver.cuLinkAddData(
3222
- self.handle, input_type, data, len(data), name_buffer, 0, [], []
3223
- )
3224
- except CudaAPIError as e:
3225
- raise LinkerError("%s\n%s" % (e, self.error_log))
3226
-
3227
- def add_data(self, data, kind, name=None):
3228
- # We pass the name as **kwargs to ensure the default name for the input
3229
- # type is used if none is supplied
3230
- kws = {}
3231
- if name is not None:
3232
- kws["name"] = name
3233
-
3234
- if kind == FILE_EXTENSION_MAP["cubin"]:
3235
- self.add_cubin(data, **kws)
3236
- elif kind == FILE_EXTENSION_MAP["fatbin"]:
3237
- self.add_fatbin(data, **kws)
3238
- elif kind == FILE_EXTENSION_MAP["a"]:
3239
- self.add_library(data, **kws)
3240
- elif kind == FILE_EXTENSION_MAP["ptx"]:
3241
- self.add_ptx(data, **kws)
3242
- elif kind == FILE_EXTENSION_MAP["o"]:
3243
- self.add_object(data, **kws)
3244
- elif kind == FILE_EXTENSION_MAP["ltoir"]:
3245
- raise LinkerError("CudaPythonLinker cannot link LTO-IR")
3246
- else:
3247
- raise LinkerError(f"Don't know how to link {kind}")
3248
-
3249
- def add_file(self, path, kind):
3250
- pathbuf = path.encode("utf8")
3251
- self._keep_alive.append(pathbuf)
3252
-
3253
- try:
3254
- driver.cuLinkAddFile(self.handle, kind, pathbuf, 0, [], [])
3255
- except CudaAPIError as e:
3256
- if e.code == binding.CUresult.CUDA_ERROR_FILE_NOT_FOUND:
3257
- msg = f"{path} not found"
3258
- else:
3259
- msg = "%s\n%s" % (e, self.error_log)
3260
- raise LinkerError(msg)
3261
-
3262
- def complete(self):
3263
- try:
3264
- cubin_buf, size = driver.cuLinkComplete(self.handle)
3265
- except CudaAPIError as e:
3266
- raise LinkerError("%s\n%s" % (e, self.error_log))
3267
-
3268
- assert size > 0, "linker returned a zero sized cubin"
3269
- del self._keep_alive[:]
3270
- # We return a copy of the cubin because it's owned by the linker
3271
- cubin_ptr = ctypes.cast(cubin_buf, ctypes.POINTER(ctypes.c_char))
3272
- return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
3273
-
3274
-
3275
- class PyNvJitLinker(Linker):
3276
- def __init__(
3277
- self,
3278
- max_registers=None,
3279
- lineinfo=False,
3280
- cc=None,
3281
- lto=False,
3282
- additional_flags=None,
3283
- ):
3284
- if NvJitLinker is None:
3285
- raise ImportError(
3286
- "Using pynvjitlink requires the pynvjitlink package to be "
3287
- "available"
3288
- )
3289
-
3290
- if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
3291
- raise ValueError(
3292
- "Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
3293
- "CUDA_ENABLE_PYNVJITLINK at the same time"
3294
- )
3295
-
3296
- if cc is None:
3297
- raise RuntimeError("PyNvJitLinker requires CC to be specified")
3298
- if not any(isinstance(cc, t) for t in [list, tuple]):
3299
- raise TypeError("`cc` must be a list or tuple of length 2")
3300
-
3301
- sm_ver = f"{cc[0] * 10 + cc[1]}"
3302
- arch = f"-arch=sm_{sm_ver}"
3303
- options = [arch]
3304
- if max_registers:
3305
- options.append(f"-maxrregcount={max_registers}")
3306
- if lineinfo:
3307
- options.append("-lineinfo")
3308
- if lto:
3309
- options.append("-lto")
3310
- if additional_flags is not None:
3311
- options.extend(additional_flags)
3312
-
3313
- self._linker = NvJitLinker(*options)
3314
- self.lto = lto
3315
- self.options = options
3316
-
3317
- @property
3318
- def info_log(self):
3319
- return self._linker.info_log
3320
-
3321
- @property
3322
- def error_log(self):
3323
- return self._linker.error_log
3324
-
3325
- def add_ptx(self, ptx, name="<cudapy-ptx>"):
3326
- self._linker.add_ptx(ptx, name)
3327
-
3328
- def add_fatbin(self, fatbin, name="<external-fatbin>"):
3329
- self._linker.add_fatbin(fatbin, name)
3330
-
3331
- def add_ltoir(self, ltoir, name="<external-ltoir>"):
3332
- self._linker.add_ltoir(ltoir, name)
3333
-
3334
- def add_object(self, obj, name="<external-object>"):
3335
- self._linker.add_object(obj, name)
3336
-
3337
- def add_file(self, path, kind):
3338
- try:
3339
- data = cached_file_read(path, "rb")
3340
- except FileNotFoundError:
3341
- raise LinkerError(f"{path} not found")
3342
-
3343
- name = pathlib.Path(path).name
3344
- self.add_data(data, kind, name)
3345
-
3346
- def add_cu(self, cu, name):
3347
- """Add CUDA source in a string to the link. The name of the source
3348
- file should be specified in `name`."""
3349
- with driver.get_active_context() as ac:
3350
- dev = driver.get_device(ac.devnum)
3351
- cc = dev.compute_capability
3352
-
3353
- program, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
3354
-
3355
- if not self.lto and config.DUMP_ASSEMBLY:
3356
- print(("ASSEMBLY %s" % name).center(80, "-"))
3357
- print(program)
3358
- print("=" * 80)
3359
-
3360
- suffix = ".ltoir" if self.lto else ".ptx"
3361
- program_name = os.path.splitext(name)[0] + suffix
3362
- # Link the program's PTX or LTOIR using the normal linker mechanism
3363
- if self.lto:
3364
- self.add_ltoir(program, program_name)
3365
- else:
3366
- self.add_ptx(program.encode(), program_name)
3367
-
3368
- def add_data(self, data, kind, name):
3369
- if kind == FILE_EXTENSION_MAP["cubin"]:
3370
- fn = self._linker.add_cubin
3371
- elif kind == FILE_EXTENSION_MAP["fatbin"]:
3372
- fn = self._linker.add_fatbin
3373
- elif kind == FILE_EXTENSION_MAP["a"]:
3374
- fn = self._linker.add_library
3375
- elif kind == FILE_EXTENSION_MAP["ptx"]:
3376
- return self.add_ptx(data, name)
3377
- elif kind == FILE_EXTENSION_MAP["o"]:
3378
- fn = self._linker.add_object
3379
- elif kind == FILE_EXTENSION_MAP["ltoir"]:
3380
- fn = self._linker.add_ltoir
3381
- else:
3382
- raise LinkerError(f"Don't know how to link {kind}")
3383
-
3384
- try:
3385
- fn(data, name)
3386
- except NvJitLinkError as e:
3387
- raise LinkerError from e
3388
-
3389
- def get_linked_ptx(self):
3390
- try:
3391
- return self._linker.get_linked_ptx()
3392
- except NvJitLinkError as e:
3393
- raise LinkerError from e
3394
-
3395
- def complete(self):
3396
- try:
3397
- return self._linker.get_linked_cubin()
3398
- except NvJitLinkError as e:
3399
- raise LinkerError from e
3400
-
3401
-
3402
3313
  # -----------------------------------------------------------------------------
3403
3314
 
3404
3315
 
@@ -38,3 +38,7 @@ class NvrtcBuiltinOperationFailure(NvrtcError):
38
38
 
39
39
  class NvrtcSupportError(ImportError):
40
40
  pass
41
+
42
+
43
+ class CCSupportError(RuntimeError):
44
+ pass
@@ -154,7 +154,7 @@ def test():
154
154
  print(f"\t\t{location}")
155
155
 
156
156
  # Checks for dynamic libraries
157
- libs = "nvvm nvrtc cudart".split()
157
+ libs = "nvvm nvrtc".split()
158
158
  for lib in libs:
159
159
  path = get_cudalib(lib)
160
160
  print("Finding {} from {}".format(lib, _get_source_variable(lib)))
@@ -2,17 +2,16 @@ from numba import config
2
2
  from . import enums
3
3
 
4
4
  if config.CUDA_USE_NVIDIA_BINDING:
5
- from cuda.bindings import driver
5
+ from cuda.bindings.driver import CUjitInputType
6
6
 
7
- jitty = driver.CUjitInputType
8
7
  FILE_EXTENSION_MAP = {
9
- "o": jitty.CU_JIT_INPUT_OBJECT,
10
- "ptx": jitty.CU_JIT_INPUT_PTX,
11
- "a": jitty.CU_JIT_INPUT_LIBRARY,
12
- "lib": jitty.CU_JIT_INPUT_LIBRARY,
13
- "cubin": jitty.CU_JIT_INPUT_CUBIN,
14
- "fatbin": jitty.CU_JIT_INPUT_FATBINARY,
15
- "ltoir": jitty.CU_JIT_INPUT_NVVM,
8
+ "o": CUjitInputType.CU_JIT_INPUT_OBJECT,
9
+ "ptx": CUjitInputType.CU_JIT_INPUT_PTX,
10
+ "a": CUjitInputType.CU_JIT_INPUT_LIBRARY,
11
+ "lib": CUjitInputType.CU_JIT_INPUT_LIBRARY,
12
+ "cubin": CUjitInputType.CU_JIT_INPUT_CUBIN,
13
+ "fatbin": CUjitInputType.CU_JIT_INPUT_FATBINARY,
14
+ "ltoir": CUjitInputType.CU_JIT_INPUT_NVVM,
16
15
  }
17
16
  else:
18
17
  FILE_EXTENSION_MAP = {