numba-cuda 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +51 -16
- numba_cuda/numba/cuda/codegen.py +11 -9
- numba_cuda/numba/cuda/compiler.py +3 -39
- numba_cuda/numba/cuda/cuda_paths.py +20 -22
- numba_cuda/numba/cuda/cudadrv/driver.py +197 -286
- numba_cuda/numba/cuda/cudadrv/error.py +4 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +8 -9
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +153 -108
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -197
- numba_cuda/numba/cuda/cudadrv/runtime.py +5 -136
- numba_cuda/numba/cuda/decorators.py +18 -0
- numba_cuda/numba/cuda/dispatcher.py +1 -0
- numba_cuda/numba/cuda/flags.py +36 -0
- numba_cuda/numba/cuda/memory_management/nrt.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +6 -2
- numba_cuda/numba/cuda/target.py +55 -2
- numba_cuda/numba/cuda/testing.py +0 -22
- numba_cuda/numba/cuda/tests/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +15 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +17 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +9 -167
- numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +27 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +1 -37
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -9
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +14 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -6
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -4
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +18 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -7
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -2
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +10 -1
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/METADATA +8 -10
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/RECORD +44 -42
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.15.1.dist-info → numba_cuda-0.16.0.dist-info}/top_level.txt +0 -0
@@ -42,6 +42,7 @@ import importlib
|
|
42
42
|
import numpy as np
|
43
43
|
from collections import namedtuple, deque
|
44
44
|
|
45
|
+
|
45
46
|
from numba import mviewbuf
|
46
47
|
from numba.core import utils, serialize, config
|
47
48
|
from .error import CudaSupportError, CudaDriverError
|
@@ -58,6 +59,22 @@ except ImportError:
|
|
58
59
|
NvJitLinker, NvJitLinkError = None, None
|
59
60
|
|
60
61
|
|
62
|
+
USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
|
63
|
+
|
64
|
+
if USE_NV_BINDING:
|
65
|
+
from cuda.bindings import driver as binding
|
66
|
+
from cuda.core.experimental import (
|
67
|
+
Linker,
|
68
|
+
LinkerOptions,
|
69
|
+
ObjectCode,
|
70
|
+
)
|
71
|
+
|
72
|
+
# There is no definition of the default stream in the Nvidia bindings (nor
|
73
|
+
# is there at the C/C++ level), so we define it here so we don't need to
|
74
|
+
# use a magic number 0 in places where we want the default stream.
|
75
|
+
CU_STREAM_DEFAULT = 0
|
76
|
+
|
77
|
+
|
61
78
|
MIN_REQUIRED_CC = (3, 5)
|
62
79
|
SUPPORTS_IPC = sys.platform.startswith("linux")
|
63
80
|
|
@@ -108,6 +125,25 @@ def make_logger():
|
|
108
125
|
return logger
|
109
126
|
|
110
127
|
|
128
|
+
@functools.cache
|
129
|
+
def _have_nvjitlink():
|
130
|
+
if not USE_NV_BINDING:
|
131
|
+
return False
|
132
|
+
try:
|
133
|
+
from cuda.bindings._internal import nvjitlink as nvjitlink_internal
|
134
|
+
from cuda.bindings._internal.utils import NotSupportedError
|
135
|
+
except ImportError:
|
136
|
+
return False
|
137
|
+
try:
|
138
|
+
return (
|
139
|
+
nvjitlink_internal._inspect_function_pointer("__nvJitLinkVersion")
|
140
|
+
!= 0
|
141
|
+
)
|
142
|
+
except NotSupportedError:
|
143
|
+
# no driver
|
144
|
+
return False
|
145
|
+
|
146
|
+
|
111
147
|
class DeadMemoryError(RuntimeError):
|
112
148
|
pass
|
113
149
|
|
@@ -1472,7 +1508,7 @@ class Context(object):
|
|
1472
1508
|
if isinstance(ptx, str):
|
1473
1509
|
ptx = ptx.encode("utf8")
|
1474
1510
|
if USE_NV_BINDING:
|
1475
|
-
image = ptx
|
1511
|
+
image = ObjectCode.from_ptx(ptx)
|
1476
1512
|
else:
|
1477
1513
|
image = c_char_p(ptx)
|
1478
1514
|
return self.create_module_image(image)
|
@@ -1615,7 +1651,6 @@ def load_module_image_ctypes(
|
|
1615
1651
|
|
1616
1652
|
option_keys = (drvapi.cu_jit_option * len(options))(*options.keys())
|
1617
1653
|
option_vals = (c_void_p * len(options))(*options.values())
|
1618
|
-
|
1619
1654
|
handle = drvapi.cu_module()
|
1620
1655
|
try:
|
1621
1656
|
driver.cuModuleLoadDataEx(
|
@@ -1662,7 +1697,7 @@ def load_module_image_cuda_python(
|
|
1662
1697
|
|
1663
1698
|
try:
|
1664
1699
|
handle = driver.cuModuleLoadDataEx(
|
1665
|
-
image, len(options), option_keys, option_vals
|
1700
|
+
image.code, len(options), option_keys, option_vals
|
1666
1701
|
)
|
1667
1702
|
except CudaAPIError as e:
|
1668
1703
|
err_string = jiterrors.decode("utf-8")
|
@@ -2722,7 +2757,7 @@ def launch_kernel(
|
|
2722
2757
|
)
|
2723
2758
|
|
2724
2759
|
|
2725
|
-
class
|
2760
|
+
class _LinkerBase(metaclass=ABCMeta):
|
2726
2761
|
"""Abstract base class for linkers"""
|
2727
2762
|
|
2728
2763
|
@classmethod
|
@@ -2735,30 +2770,27 @@ class Linker(metaclass=ABCMeta):
|
|
2735
2770
|
additional_flags=None,
|
2736
2771
|
):
|
2737
2772
|
driver_ver = driver.get_version()
|
2738
|
-
if
|
2739
|
-
|
2740
|
-
|
2741
|
-
|
2742
|
-
|
2743
|
-
|
2744
|
-
|
2745
|
-
if config.CUDA_ENABLE_PYNVJITLINK:
|
2746
|
-
linker = PyNvJitLinker
|
2747
|
-
|
2748
|
-
elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
2749
|
-
linker = MVCLinker
|
2773
|
+
if driver_ver < (12, 0):
|
2774
|
+
if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
2775
|
+
linker = MVCLinker
|
2776
|
+
elif USE_NV_BINDING:
|
2777
|
+
linker = _Linker
|
2778
|
+
else:
|
2779
|
+
linker = CtypesLinker
|
2750
2780
|
else:
|
2751
2781
|
if USE_NV_BINDING:
|
2752
|
-
linker =
|
2782
|
+
linker = _Linker
|
2753
2783
|
else:
|
2754
2784
|
linker = CtypesLinker
|
2755
2785
|
|
2756
|
-
|
2757
|
-
|
2758
|
-
|
2759
|
-
raise ValueError("LTO and additional flags require PyNvJitLinker")
|
2786
|
+
params = (max_registers, lineinfo, cc)
|
2787
|
+
if linker is _Linker:
|
2788
|
+
params = (*params, lto, additional_flags)
|
2760
2789
|
else:
|
2761
|
-
|
2790
|
+
if lto or additional_flags:
|
2791
|
+
raise ValueError("LTO and additional flags require nvjitlink")
|
2792
|
+
|
2793
|
+
return linker(*params)
|
2762
2794
|
|
2763
2795
|
@abstractmethod
|
2764
2796
|
def __init__(self, max_registers, lineinfo, cc):
|
@@ -2786,7 +2818,6 @@ class Linker(metaclass=ABCMeta):
|
|
2786
2818
|
with driver.get_active_context() as ac:
|
2787
2819
|
dev = driver.get_device(ac.devnum)
|
2788
2820
|
cc = dev.compute_capability
|
2789
|
-
|
2790
2821
|
ptx, log = nvrtc.compile(cu, name, cc)
|
2791
2822
|
|
2792
2823
|
if config.DUMP_ASSEMBLY:
|
@@ -2821,7 +2852,6 @@ class Linker(metaclass=ABCMeta):
|
|
2821
2852
|
LTO-ed portion of the PTX when linker is added with objects that can be
|
2822
2853
|
both LTO-ed and not LTO-ed.
|
2823
2854
|
"""
|
2824
|
-
|
2825
2855
|
if isinstance(path_or_code, str):
|
2826
2856
|
ext = pathlib.Path(path_or_code).suffix
|
2827
2857
|
if ext == "":
|
@@ -2901,7 +2931,148 @@ class Linker(metaclass=ABCMeta):
|
|
2901
2931
|
"""
|
2902
2932
|
|
2903
2933
|
|
2904
|
-
class
|
2934
|
+
class _Linker(_LinkerBase):
|
2935
|
+
def __init__(
|
2936
|
+
self,
|
2937
|
+
max_registers=None,
|
2938
|
+
lineinfo=False,
|
2939
|
+
cc=None,
|
2940
|
+
lto=None,
|
2941
|
+
additional_flags=None,
|
2942
|
+
):
|
2943
|
+
arch = f"sm_{cc[0]}{cc[1]}"
|
2944
|
+
self.max_registers = max_registers if max_registers else None
|
2945
|
+
self.lineinfo = lineinfo
|
2946
|
+
self.cc = cc
|
2947
|
+
self.arch = arch
|
2948
|
+
if lto is False:
|
2949
|
+
# WAR for apparent nvjitlink issue
|
2950
|
+
lto = None
|
2951
|
+
self.lto = lto
|
2952
|
+
self.additional_flags = additional_flags
|
2953
|
+
|
2954
|
+
self.options = LinkerOptions(
|
2955
|
+
max_register_count=self.max_registers,
|
2956
|
+
lineinfo=lineinfo,
|
2957
|
+
arch=arch,
|
2958
|
+
link_time_optimization=lto,
|
2959
|
+
)
|
2960
|
+
self._complete = False
|
2961
|
+
self._object_codes = []
|
2962
|
+
self.linker = None # need at least one program
|
2963
|
+
|
2964
|
+
@property
|
2965
|
+
def info_log(self):
|
2966
|
+
if not self.linker:
|
2967
|
+
raise ValueError("Not Initialized")
|
2968
|
+
if self._complete:
|
2969
|
+
return self._info_log
|
2970
|
+
raise RuntimeError("Link not yet complete.")
|
2971
|
+
|
2972
|
+
@property
|
2973
|
+
def error_log(self):
|
2974
|
+
if not self.linker:
|
2975
|
+
raise ValueError("Not Initialized")
|
2976
|
+
if self._complete:
|
2977
|
+
return self._error_log
|
2978
|
+
raise RuntimeError("Link not yet complete.")
|
2979
|
+
|
2980
|
+
def add_ptx(self, ptx, name="<cudapy-ptx>"):
|
2981
|
+
obj = ObjectCode.from_ptx(ptx, name=name)
|
2982
|
+
self._object_codes.append(obj)
|
2983
|
+
|
2984
|
+
def add_cu(self, cu, name="<cudapy-cu>"):
|
2985
|
+
with driver.get_active_context() as ac:
|
2986
|
+
dev = driver.get_device(ac.devnum)
|
2987
|
+
cc = dev.compute_capability
|
2988
|
+
obj, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
|
2989
|
+
|
2990
|
+
if not self.lto and config.DUMP_ASSEMBLY:
|
2991
|
+
print(("ASSEMBLY %s" % name).center(80, "-"))
|
2992
|
+
print(obj.code)
|
2993
|
+
|
2994
|
+
self._object_codes.append(obj)
|
2995
|
+
|
2996
|
+
def add_cubin(self, cubin, name="<cudapy-cubin>"):
|
2997
|
+
obj = ObjectCode.from_cubin(cubin, name=name)
|
2998
|
+
self._object_codes.append(obj)
|
2999
|
+
|
3000
|
+
def add_ltoir(self, ltoir, name="<cudapy-ltoir>"):
|
3001
|
+
obj = ObjectCode.from_ltoir(ltoir, name=name)
|
3002
|
+
self._object_codes.append(obj)
|
3003
|
+
|
3004
|
+
def add_fatbin(self, fatbin, name="<cudapy-fatbin>"):
|
3005
|
+
obj = ObjectCode.from_fatbin(fatbin, name=name)
|
3006
|
+
self._object_codes.append(obj)
|
3007
|
+
|
3008
|
+
def add_object(self, obj, name="<cudapy-object>"):
|
3009
|
+
obj = ObjectCode.from_object(obj, name=name)
|
3010
|
+
self._object_codes.append(obj)
|
3011
|
+
|
3012
|
+
def add_library(self, lib, name="<cudapy-lib>"):
|
3013
|
+
obj = ObjectCode.from_library(lib, name=name)
|
3014
|
+
self._object_codes.append(obj)
|
3015
|
+
|
3016
|
+
def add_file(self, path, kind):
|
3017
|
+
try:
|
3018
|
+
data = cached_file_read(path, how="rb")
|
3019
|
+
except FileNotFoundError:
|
3020
|
+
raise LinkerError(f"{path} not found")
|
3021
|
+
name = pathlib.Path(path).name
|
3022
|
+
self.add_data(data, kind, name)
|
3023
|
+
|
3024
|
+
def add_data(self, data, kind, name):
|
3025
|
+
if kind == FILE_EXTENSION_MAP["ptx"]:
|
3026
|
+
fn = self.add_ptx
|
3027
|
+
elif kind == FILE_EXTENSION_MAP["cubin"]:
|
3028
|
+
fn = self.add_cubin
|
3029
|
+
elif kind == "cu":
|
3030
|
+
fn = self.add_cu
|
3031
|
+
elif (
|
3032
|
+
kind == FILE_EXTENSION_MAP["lib"] or kind == FILE_EXTENSION_MAP["a"]
|
3033
|
+
):
|
3034
|
+
fn = self.add_library
|
3035
|
+
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
3036
|
+
fn = self.add_fatbin
|
3037
|
+
elif kind == FILE_EXTENSION_MAP["o"]:
|
3038
|
+
fn = self.add_object
|
3039
|
+
elif kind == FILE_EXTENSION_MAP["ltoir"]:
|
3040
|
+
fn = self.add_ltoir
|
3041
|
+
else:
|
3042
|
+
raise LinkerError(f"Don't know how to link {kind}")
|
3043
|
+
|
3044
|
+
fn(data, name)
|
3045
|
+
|
3046
|
+
def get_linked_ptx(self):
|
3047
|
+
options = LinkerOptions(
|
3048
|
+
max_register_count=self.max_registers,
|
3049
|
+
lineinfo=self.lineinfo,
|
3050
|
+
arch=self.arch,
|
3051
|
+
link_time_optimization=True,
|
3052
|
+
ptx=True,
|
3053
|
+
)
|
3054
|
+
|
3055
|
+
self.linker = Linker(*self._object_codes, options=options)
|
3056
|
+
|
3057
|
+
result = self.linker.link("ptx")
|
3058
|
+
self.close()
|
3059
|
+
self._complete = True
|
3060
|
+
return result.code
|
3061
|
+
|
3062
|
+
def close(self):
|
3063
|
+
self._info_log = self.linker.get_info_log()
|
3064
|
+
self._error_log = self.linker.get_error_log()
|
3065
|
+
self.linker.close()
|
3066
|
+
|
3067
|
+
def complete(self):
|
3068
|
+
self.linker = Linker(*self._object_codes, options=self.options)
|
3069
|
+
result = self.linker.link("cubin")
|
3070
|
+
self.close()
|
3071
|
+
self._complete = True
|
3072
|
+
return result
|
3073
|
+
|
3074
|
+
|
3075
|
+
class MVCLinker(_LinkerBase):
|
2905
3076
|
"""
|
2906
3077
|
Linker supporting Minor Version Compatibility, backed by the cubinlinker
|
2907
3078
|
package.
|
@@ -2996,7 +3167,7 @@ class MVCLinker(Linker):
|
|
2996
3167
|
raise LinkerError from e
|
2997
3168
|
|
2998
3169
|
|
2999
|
-
class CtypesLinker(
|
3170
|
+
class CtypesLinker(_LinkerBase):
|
3000
3171
|
"""
|
3001
3172
|
Links for current device if no CC given
|
3002
3173
|
"""
|
@@ -3139,266 +3310,6 @@ class CtypesLinker(Linker):
|
|
3139
3310
|
return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
|
3140
3311
|
|
3141
3312
|
|
3142
|
-
class CudaPythonLinker(Linker):
|
3143
|
-
"""
|
3144
|
-
Links for current device if no CC given
|
3145
|
-
"""
|
3146
|
-
|
3147
|
-
def __init__(self, max_registers=0, lineinfo=False, cc=None):
|
3148
|
-
super().__init__(max_registers, lineinfo, cc)
|
3149
|
-
|
3150
|
-
logsz = config.CUDA_LOG_SIZE
|
3151
|
-
linkerinfo = bytearray(logsz)
|
3152
|
-
linkererrors = bytearray(logsz)
|
3153
|
-
|
3154
|
-
jit_option = binding.CUjit_option
|
3155
|
-
|
3156
|
-
options = {
|
3157
|
-
jit_option.CU_JIT_INFO_LOG_BUFFER: linkerinfo,
|
3158
|
-
jit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: logsz,
|
3159
|
-
jit_option.CU_JIT_ERROR_LOG_BUFFER: linkererrors,
|
3160
|
-
jit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: logsz,
|
3161
|
-
jit_option.CU_JIT_LOG_VERBOSE: 1,
|
3162
|
-
}
|
3163
|
-
if max_registers:
|
3164
|
-
options[jit_option.CU_JIT_MAX_REGISTERS] = max_registers
|
3165
|
-
if lineinfo:
|
3166
|
-
options[jit_option.CU_JIT_GENERATE_LINE_INFO] = 1
|
3167
|
-
|
3168
|
-
if cc is None:
|
3169
|
-
# No option value is needed, but we need something as a placeholder
|
3170
|
-
options[jit_option.CU_JIT_TARGET_FROM_CUCONTEXT] = 1
|
3171
|
-
else:
|
3172
|
-
cc_val = cc[0] * 10 + cc[1]
|
3173
|
-
cc_enum = getattr(
|
3174
|
-
binding.CUjit_target, f"CU_TARGET_COMPUTE_{cc_val}"
|
3175
|
-
)
|
3176
|
-
options[jit_option.CU_JIT_TARGET] = cc_enum
|
3177
|
-
|
3178
|
-
raw_keys = list(options.keys())
|
3179
|
-
raw_values = list(options.values())
|
3180
|
-
self.handle = driver.cuLinkCreate(len(raw_keys), raw_keys, raw_values)
|
3181
|
-
|
3182
|
-
weakref.finalize(self, driver.cuLinkDestroy, self.handle)
|
3183
|
-
|
3184
|
-
self.linker_info_buf = linkerinfo
|
3185
|
-
self.linker_errors_buf = linkererrors
|
3186
|
-
|
3187
|
-
self._keep_alive = [linkerinfo, linkererrors, raw_keys, raw_values]
|
3188
|
-
|
3189
|
-
@property
|
3190
|
-
def info_log(self):
|
3191
|
-
return self.linker_info_buf.decode("utf8")
|
3192
|
-
|
3193
|
-
@property
|
3194
|
-
def error_log(self):
|
3195
|
-
return self.linker_errors_buf.decode("utf8")
|
3196
|
-
|
3197
|
-
def add_cubin(self, cubin, name="<unnamed-cubin>"):
|
3198
|
-
input_type = binding.CUjitInputType.CU_JIT_INPUT_CUBIN
|
3199
|
-
return self._add_data(input_type, cubin, name)
|
3200
|
-
|
3201
|
-
def add_ptx(self, ptx, name="<unnamed-ptx>"):
|
3202
|
-
input_type = binding.CUjitInputType.CU_JIT_INPUT_PTX
|
3203
|
-
return self._add_data(input_type, ptx, name)
|
3204
|
-
|
3205
|
-
def add_object(self, object_, name="<unnamed-object>"):
|
3206
|
-
input_type = binding.CUjitInputType.CU_JIT_INPUT_OBJECT
|
3207
|
-
return self._add_data(input_type, object_, name)
|
3208
|
-
|
3209
|
-
def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
|
3210
|
-
input_type = binding.CUjitInputType.CU_JIT_INPUT_FATBINARY
|
3211
|
-
return self._add_data(input_type, fatbin, name)
|
3212
|
-
|
3213
|
-
def add_library(self, library, name="<unnamed-library>"):
|
3214
|
-
input_type = binding.CUjitInputType.CU_JIT_INPUT_LIBRARY
|
3215
|
-
return self._add_data(input_type, library, name)
|
3216
|
-
|
3217
|
-
def _add_data(self, input_type, data, name):
|
3218
|
-
name_buffer = name.encode("utf8")
|
3219
|
-
self._keep_alive += [data, name_buffer]
|
3220
|
-
try:
|
3221
|
-
driver.cuLinkAddData(
|
3222
|
-
self.handle, input_type, data, len(data), name_buffer, 0, [], []
|
3223
|
-
)
|
3224
|
-
except CudaAPIError as e:
|
3225
|
-
raise LinkerError("%s\n%s" % (e, self.error_log))
|
3226
|
-
|
3227
|
-
def add_data(self, data, kind, name=None):
|
3228
|
-
# We pass the name as **kwargs to ensure the default name for the input
|
3229
|
-
# type is used if none is supplied
|
3230
|
-
kws = {}
|
3231
|
-
if name is not None:
|
3232
|
-
kws["name"] = name
|
3233
|
-
|
3234
|
-
if kind == FILE_EXTENSION_MAP["cubin"]:
|
3235
|
-
self.add_cubin(data, **kws)
|
3236
|
-
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
3237
|
-
self.add_fatbin(data, **kws)
|
3238
|
-
elif kind == FILE_EXTENSION_MAP["a"]:
|
3239
|
-
self.add_library(data, **kws)
|
3240
|
-
elif kind == FILE_EXTENSION_MAP["ptx"]:
|
3241
|
-
self.add_ptx(data, **kws)
|
3242
|
-
elif kind == FILE_EXTENSION_MAP["o"]:
|
3243
|
-
self.add_object(data, **kws)
|
3244
|
-
elif kind == FILE_EXTENSION_MAP["ltoir"]:
|
3245
|
-
raise LinkerError("CudaPythonLinker cannot link LTO-IR")
|
3246
|
-
else:
|
3247
|
-
raise LinkerError(f"Don't know how to link {kind}")
|
3248
|
-
|
3249
|
-
def add_file(self, path, kind):
|
3250
|
-
pathbuf = path.encode("utf8")
|
3251
|
-
self._keep_alive.append(pathbuf)
|
3252
|
-
|
3253
|
-
try:
|
3254
|
-
driver.cuLinkAddFile(self.handle, kind, pathbuf, 0, [], [])
|
3255
|
-
except CudaAPIError as e:
|
3256
|
-
if e.code == binding.CUresult.CUDA_ERROR_FILE_NOT_FOUND:
|
3257
|
-
msg = f"{path} not found"
|
3258
|
-
else:
|
3259
|
-
msg = "%s\n%s" % (e, self.error_log)
|
3260
|
-
raise LinkerError(msg)
|
3261
|
-
|
3262
|
-
def complete(self):
|
3263
|
-
try:
|
3264
|
-
cubin_buf, size = driver.cuLinkComplete(self.handle)
|
3265
|
-
except CudaAPIError as e:
|
3266
|
-
raise LinkerError("%s\n%s" % (e, self.error_log))
|
3267
|
-
|
3268
|
-
assert size > 0, "linker returned a zero sized cubin"
|
3269
|
-
del self._keep_alive[:]
|
3270
|
-
# We return a copy of the cubin because it's owned by the linker
|
3271
|
-
cubin_ptr = ctypes.cast(cubin_buf, ctypes.POINTER(ctypes.c_char))
|
3272
|
-
return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
|
3273
|
-
|
3274
|
-
|
3275
|
-
class PyNvJitLinker(Linker):
|
3276
|
-
def __init__(
|
3277
|
-
self,
|
3278
|
-
max_registers=None,
|
3279
|
-
lineinfo=False,
|
3280
|
-
cc=None,
|
3281
|
-
lto=False,
|
3282
|
-
additional_flags=None,
|
3283
|
-
):
|
3284
|
-
if NvJitLinker is None:
|
3285
|
-
raise ImportError(
|
3286
|
-
"Using pynvjitlink requires the pynvjitlink package to be "
|
3287
|
-
"available"
|
3288
|
-
)
|
3289
|
-
|
3290
|
-
if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
3291
|
-
raise ValueError(
|
3292
|
-
"Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
|
3293
|
-
"CUDA_ENABLE_PYNVJITLINK at the same time"
|
3294
|
-
)
|
3295
|
-
|
3296
|
-
if cc is None:
|
3297
|
-
raise RuntimeError("PyNvJitLinker requires CC to be specified")
|
3298
|
-
if not any(isinstance(cc, t) for t in [list, tuple]):
|
3299
|
-
raise TypeError("`cc` must be a list or tuple of length 2")
|
3300
|
-
|
3301
|
-
sm_ver = f"{cc[0] * 10 + cc[1]}"
|
3302
|
-
arch = f"-arch=sm_{sm_ver}"
|
3303
|
-
options = [arch]
|
3304
|
-
if max_registers:
|
3305
|
-
options.append(f"-maxrregcount={max_registers}")
|
3306
|
-
if lineinfo:
|
3307
|
-
options.append("-lineinfo")
|
3308
|
-
if lto:
|
3309
|
-
options.append("-lto")
|
3310
|
-
if additional_flags is not None:
|
3311
|
-
options.extend(additional_flags)
|
3312
|
-
|
3313
|
-
self._linker = NvJitLinker(*options)
|
3314
|
-
self.lto = lto
|
3315
|
-
self.options = options
|
3316
|
-
|
3317
|
-
@property
|
3318
|
-
def info_log(self):
|
3319
|
-
return self._linker.info_log
|
3320
|
-
|
3321
|
-
@property
|
3322
|
-
def error_log(self):
|
3323
|
-
return self._linker.error_log
|
3324
|
-
|
3325
|
-
def add_ptx(self, ptx, name="<cudapy-ptx>"):
|
3326
|
-
self._linker.add_ptx(ptx, name)
|
3327
|
-
|
3328
|
-
def add_fatbin(self, fatbin, name="<external-fatbin>"):
|
3329
|
-
self._linker.add_fatbin(fatbin, name)
|
3330
|
-
|
3331
|
-
def add_ltoir(self, ltoir, name="<external-ltoir>"):
|
3332
|
-
self._linker.add_ltoir(ltoir, name)
|
3333
|
-
|
3334
|
-
def add_object(self, obj, name="<external-object>"):
|
3335
|
-
self._linker.add_object(obj, name)
|
3336
|
-
|
3337
|
-
def add_file(self, path, kind):
|
3338
|
-
try:
|
3339
|
-
data = cached_file_read(path, "rb")
|
3340
|
-
except FileNotFoundError:
|
3341
|
-
raise LinkerError(f"{path} not found")
|
3342
|
-
|
3343
|
-
name = pathlib.Path(path).name
|
3344
|
-
self.add_data(data, kind, name)
|
3345
|
-
|
3346
|
-
def add_cu(self, cu, name):
|
3347
|
-
"""Add CUDA source in a string to the link. The name of the source
|
3348
|
-
file should be specified in `name`."""
|
3349
|
-
with driver.get_active_context() as ac:
|
3350
|
-
dev = driver.get_device(ac.devnum)
|
3351
|
-
cc = dev.compute_capability
|
3352
|
-
|
3353
|
-
program, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
|
3354
|
-
|
3355
|
-
if not self.lto and config.DUMP_ASSEMBLY:
|
3356
|
-
print(("ASSEMBLY %s" % name).center(80, "-"))
|
3357
|
-
print(program)
|
3358
|
-
print("=" * 80)
|
3359
|
-
|
3360
|
-
suffix = ".ltoir" if self.lto else ".ptx"
|
3361
|
-
program_name = os.path.splitext(name)[0] + suffix
|
3362
|
-
# Link the program's PTX or LTOIR using the normal linker mechanism
|
3363
|
-
if self.lto:
|
3364
|
-
self.add_ltoir(program, program_name)
|
3365
|
-
else:
|
3366
|
-
self.add_ptx(program.encode(), program_name)
|
3367
|
-
|
3368
|
-
def add_data(self, data, kind, name):
|
3369
|
-
if kind == FILE_EXTENSION_MAP["cubin"]:
|
3370
|
-
fn = self._linker.add_cubin
|
3371
|
-
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
3372
|
-
fn = self._linker.add_fatbin
|
3373
|
-
elif kind == FILE_EXTENSION_MAP["a"]:
|
3374
|
-
fn = self._linker.add_library
|
3375
|
-
elif kind == FILE_EXTENSION_MAP["ptx"]:
|
3376
|
-
return self.add_ptx(data, name)
|
3377
|
-
elif kind == FILE_EXTENSION_MAP["o"]:
|
3378
|
-
fn = self._linker.add_object
|
3379
|
-
elif kind == FILE_EXTENSION_MAP["ltoir"]:
|
3380
|
-
fn = self._linker.add_ltoir
|
3381
|
-
else:
|
3382
|
-
raise LinkerError(f"Don't know how to link {kind}")
|
3383
|
-
|
3384
|
-
try:
|
3385
|
-
fn(data, name)
|
3386
|
-
except NvJitLinkError as e:
|
3387
|
-
raise LinkerError from e
|
3388
|
-
|
3389
|
-
def get_linked_ptx(self):
|
3390
|
-
try:
|
3391
|
-
return self._linker.get_linked_ptx()
|
3392
|
-
except NvJitLinkError as e:
|
3393
|
-
raise LinkerError from e
|
3394
|
-
|
3395
|
-
def complete(self):
|
3396
|
-
try:
|
3397
|
-
return self._linker.get_linked_cubin()
|
3398
|
-
except NvJitLinkError as e:
|
3399
|
-
raise LinkerError from e
|
3400
|
-
|
3401
|
-
|
3402
3313
|
# -----------------------------------------------------------------------------
|
3403
3314
|
|
3404
3315
|
|
@@ -154,7 +154,7 @@ def test():
|
|
154
154
|
print(f"\t\t{location}")
|
155
155
|
|
156
156
|
# Checks for dynamic libraries
|
157
|
-
libs = "nvvm nvrtc
|
157
|
+
libs = "nvvm nvrtc".split()
|
158
158
|
for lib in libs:
|
159
159
|
path = get_cudalib(lib)
|
160
160
|
print("Finding {} from {}".format(lib, _get_source_variable(lib)))
|
@@ -2,17 +2,16 @@ from numba import config
|
|
2
2
|
from . import enums
|
3
3
|
|
4
4
|
if config.CUDA_USE_NVIDIA_BINDING:
|
5
|
-
from cuda.bindings import
|
5
|
+
from cuda.bindings.driver import CUjitInputType
|
6
6
|
|
7
|
-
jitty = driver.CUjitInputType
|
8
7
|
FILE_EXTENSION_MAP = {
|
9
|
-
"o":
|
10
|
-
"ptx":
|
11
|
-
"a":
|
12
|
-
"lib":
|
13
|
-
"cubin":
|
14
|
-
"fatbin":
|
15
|
-
"ltoir":
|
8
|
+
"o": CUjitInputType.CU_JIT_INPUT_OBJECT,
|
9
|
+
"ptx": CUjitInputType.CU_JIT_INPUT_PTX,
|
10
|
+
"a": CUjitInputType.CU_JIT_INPUT_LIBRARY,
|
11
|
+
"lib": CUjitInputType.CU_JIT_INPUT_LIBRARY,
|
12
|
+
"cubin": CUjitInputType.CU_JIT_INPUT_CUBIN,
|
13
|
+
"fatbin": CUjitInputType.CU_JIT_INPUT_FATBINARY,
|
14
|
+
"ltoir": CUjitInputType.CU_JIT_INPUT_NVVM,
|
16
15
|
}
|
17
16
|
else:
|
18
17
|
FILE_EXTENSION_MAP = {
|