numba-cuda 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/codegen.py +15 -3
- numba_cuda/numba/cuda/cuda_paths.py +68 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +209 -47
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +38 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +63 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +24 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +9 -4
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/dispatcher.py +48 -8
- numba_cuda/numba/cuda/intrinsics.py +6 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +190 -0
- numba_cuda/numba/cuda/simulator/api.py +14 -0
- numba_cuda/numba/cuda/target.py +8 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +44 -4
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +48 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +42 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +110 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +51 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +170 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +19 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +3 -0
- {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/METADATA +1 -1
- {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/RECORD +32 -20
- {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/WHEEL +1 -1
- {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/LICENSE +0 -0
- {numba_cuda-0.0.17.dist-info → numba_cuda-0.0.19.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.19
|
numba_cuda/numba/cuda/codegen.py
CHANGED
@@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
59
59
|
get_cufunc), which may be of different compute capabilities.
|
60
60
|
"""
|
61
61
|
|
62
|
-
def __init__(
|
63
|
-
|
62
|
+
def __init__(
|
63
|
+
self,
|
64
|
+
codegen,
|
65
|
+
name,
|
66
|
+
entry_name=None,
|
67
|
+
max_registers=None,
|
68
|
+
lto=False,
|
69
|
+
nvvm_options=None
|
70
|
+
):
|
64
71
|
"""
|
65
72
|
codegen:
|
66
73
|
Codegen object.
|
@@ -71,6 +78,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
71
78
|
kernel and not a device function.
|
72
79
|
max_registers:
|
73
80
|
The maximum register usage to aim for when linking.
|
81
|
+
lto:
|
82
|
+
Whether to enable link-time optimization.
|
74
83
|
nvvm_options:
|
75
84
|
Dict of options to pass to NVVM.
|
76
85
|
"""
|
@@ -103,6 +112,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
103
112
|
self._cufunc_cache = {}
|
104
113
|
|
105
114
|
self._max_registers = max_registers
|
115
|
+
self._lto = lto
|
106
116
|
if nvvm_options is None:
|
107
117
|
nvvm_options = {}
|
108
118
|
self._nvvm_options = nvvm_options
|
@@ -178,7 +188,9 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
178
188
|
if cubin:
|
179
189
|
return cubin
|
180
190
|
|
181
|
-
linker = driver.Linker.new(
|
191
|
+
linker = driver.Linker.new(
|
192
|
+
max_registers=self._max_registers, cc=cc, lto=self._lto
|
193
|
+
)
|
182
194
|
|
183
195
|
if linker.lto:
|
184
196
|
ltoir = self.get_ltoir(cc=cc)
|
@@ -2,9 +2,11 @@ import sys
|
|
2
2
|
import re
|
3
3
|
import os
|
4
4
|
from collections import namedtuple
|
5
|
+
import platform
|
5
6
|
|
6
7
|
from numba.core.config import IS_WIN32
|
7
8
|
from numba.misc.findlib import find_lib, find_file
|
9
|
+
from numba import config
|
8
10
|
|
9
11
|
|
10
12
|
_env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info'])
|
@@ -241,6 +243,7 @@ def get_cuda_paths():
|
|
241
243
|
'libdevice': _get_libdevice_paths(),
|
242
244
|
'cudalib_dir': _get_cudalib_dir(),
|
243
245
|
'static_cudalib_dir': _get_static_cudalib_dir(),
|
246
|
+
'include_dir': _get_include_dir(),
|
244
247
|
}
|
245
248
|
# Cache result
|
246
249
|
get_cuda_paths._cached_result = d
|
@@ -256,3 +259,68 @@ def get_debian_pkg_libdevice():
|
|
256
259
|
if not os.path.exists(pkg_libdevice_location):
|
257
260
|
return None
|
258
261
|
return pkg_libdevice_location
|
262
|
+
|
263
|
+
|
264
|
+
def get_current_cuda_target_name():
|
265
|
+
"""Determine conda's CTK target folder based on system and machine arch.
|
266
|
+
|
267
|
+
CTK's conda package delivers headers based on its architecture type. For example,
|
268
|
+
`x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and
|
269
|
+
`aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the
|
270
|
+
nuances at cudart's conda feedstock:
|
271
|
+
https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501
|
272
|
+
"""
|
273
|
+
system = platform.system()
|
274
|
+
machine = platform.machine()
|
275
|
+
|
276
|
+
if system == "Linux":
|
277
|
+
arch_to_targets = {
|
278
|
+
'x86_64': 'x86_64-linux',
|
279
|
+
'aarch64': 'sbsa-linux'
|
280
|
+
}
|
281
|
+
elif system == "Windows":
|
282
|
+
arch_to_targets = {
|
283
|
+
'AMD64': 'x64',
|
284
|
+
}
|
285
|
+
else:
|
286
|
+
arch_to_targets = {}
|
287
|
+
|
288
|
+
return arch_to_targets.get(machine, None)
|
289
|
+
|
290
|
+
|
291
|
+
def get_conda_include_dir():
|
292
|
+
"""
|
293
|
+
Return the include directory in the current conda environment, if one
|
294
|
+
is active and it exists.
|
295
|
+
"""
|
296
|
+
is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
|
297
|
+
if not is_conda_env:
|
298
|
+
return
|
299
|
+
|
300
|
+
if platform.system() == "Windows":
|
301
|
+
include_dir = os.path.join(
|
302
|
+
sys.prefix, 'Library', 'include'
|
303
|
+
)
|
304
|
+
elif target_name := get_current_cuda_target_name():
|
305
|
+
include_dir = os.path.join(
|
306
|
+
sys.prefix, 'targets', target_name, 'include'
|
307
|
+
)
|
308
|
+
else:
|
309
|
+
# A fallback when target cannot determined
|
310
|
+
# though usually it shouldn't.
|
311
|
+
include_dir = os.path.join(sys.prefix, 'include')
|
312
|
+
|
313
|
+
if os.path.exists(include_dir):
|
314
|
+
return include_dir
|
315
|
+
return
|
316
|
+
|
317
|
+
|
318
|
+
def _get_include_dir():
|
319
|
+
"""Find the root include directory."""
|
320
|
+
options = [
|
321
|
+
('Conda environment (NVIDIA package)', get_conda_include_dir()),
|
322
|
+
('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH),
|
323
|
+
# TODO: add others
|
324
|
+
]
|
325
|
+
by, include_dir = _find_valid_path(options)
|
326
|
+
return _env_path_tuple(by, include_dir)
|
@@ -876,7 +876,10 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
|
|
876
876
|
sentry_contiguous(obj)
|
877
877
|
devobj = from_array_like(obj, stream=stream)
|
878
878
|
if copy:
|
879
|
-
if
|
879
|
+
if (
|
880
|
+
config.CUDA_WARN_ON_IMPLICIT_COPY
|
881
|
+
and not config.DISABLE_PERFORMANCE_WARNINGS
|
882
|
+
):
|
880
883
|
if (
|
881
884
|
not user_explicit and
|
882
885
|
(not isinstance(obj, DeviceNDArray)
|
@@ -10,7 +10,6 @@ subsequent deallocation could further corrupt the CUDA context and causes the
|
|
10
10
|
system to freeze in some cases.
|
11
11
|
|
12
12
|
"""
|
13
|
-
|
14
13
|
import sys
|
15
14
|
import os
|
16
15
|
import ctypes
|
@@ -19,6 +18,7 @@ import functools
|
|
19
18
|
import warnings
|
20
19
|
import logging
|
21
20
|
import threading
|
21
|
+
import traceback
|
22
22
|
import asyncio
|
23
23
|
import pathlib
|
24
24
|
from itertools import product
|
@@ -35,6 +35,8 @@ from numba.core import utils, serialize, config
|
|
35
35
|
from .error import CudaSupportError, CudaDriverError
|
36
36
|
from .drvapi import API_PROTOTYPES
|
37
37
|
from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
|
38
|
+
from .mappings import FILE_EXTENSION_MAP
|
39
|
+
from .linkable_code import LinkableCode
|
38
40
|
from numba.cuda.cudadrv import enums, drvapi, nvrtc
|
39
41
|
|
40
42
|
USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
|
@@ -56,6 +58,52 @@ _py_decref.argtypes = [ctypes.py_object]
|
|
56
58
|
_py_incref.argtypes = [ctypes.py_object]
|
57
59
|
|
58
60
|
|
61
|
+
def _readenv(name, ctor, default):
|
62
|
+
value = os.environ.get(name)
|
63
|
+
if value is None:
|
64
|
+
return default() if callable(default) else default
|
65
|
+
try:
|
66
|
+
if ctor is bool:
|
67
|
+
return value.lower() in {'1', "true"}
|
68
|
+
return ctor(value)
|
69
|
+
except Exception:
|
70
|
+
warnings.warn(
|
71
|
+
f"Environment variable '{name}' is defined but its associated "
|
72
|
+
f"value '{value}' could not be parsed.\n"
|
73
|
+
"The parse failed with exception:\n"
|
74
|
+
f"{traceback.format_exc()}",
|
75
|
+
RuntimeWarning
|
76
|
+
)
|
77
|
+
return default
|
78
|
+
|
79
|
+
|
80
|
+
_MVC_ERROR_MESSAGE = (
|
81
|
+
"Minor version compatibility requires ptxcompiler and cubinlinker packages "
|
82
|
+
"to be available"
|
83
|
+
)
|
84
|
+
|
85
|
+
ENABLE_PYNVJITLINK = (
|
86
|
+
_readenv("NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False)
|
87
|
+
or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
|
88
|
+
)
|
89
|
+
if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
|
90
|
+
config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
|
91
|
+
|
92
|
+
if ENABLE_PYNVJITLINK:
|
93
|
+
try:
|
94
|
+
from pynvjitlink.api import NvJitLinker, NvJitLinkError
|
95
|
+
except ImportError:
|
96
|
+
raise ImportError(
|
97
|
+
"Using pynvjitlink requires the pynvjitlink package to be available"
|
98
|
+
)
|
99
|
+
|
100
|
+
if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
101
|
+
raise ValueError(
|
102
|
+
"Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
|
103
|
+
"CUDA_ENABLE_PYNVJITLINK at the same time"
|
104
|
+
)
|
105
|
+
|
106
|
+
|
59
107
|
def make_logger():
|
60
108
|
logger = logging.getLogger(__name__)
|
61
109
|
# is logging configured?
|
@@ -432,7 +480,7 @@ class Driver(object):
|
|
432
480
|
|
433
481
|
def get_version(self):
|
434
482
|
"""
|
435
|
-
Returns the CUDA
|
483
|
+
Returns the CUDA Driver version as a tuple (major, minor).
|
436
484
|
"""
|
437
485
|
if USE_NV_BINDING:
|
438
486
|
version = driver.cuDriverGetVersion()
|
@@ -2546,38 +2594,47 @@ def launch_kernel(cufunc_handle,
|
|
2546
2594
|
extra)
|
2547
2595
|
|
2548
2596
|
|
2549
|
-
if USE_NV_BINDING:
|
2550
|
-
jitty = binding.CUjitInputType
|
2551
|
-
FILE_EXTENSION_MAP = {
|
2552
|
-
'o': jitty.CU_JIT_INPUT_OBJECT,
|
2553
|
-
'ptx': jitty.CU_JIT_INPUT_PTX,
|
2554
|
-
'a': jitty.CU_JIT_INPUT_LIBRARY,
|
2555
|
-
'lib': jitty.CU_JIT_INPUT_LIBRARY,
|
2556
|
-
'cubin': jitty.CU_JIT_INPUT_CUBIN,
|
2557
|
-
'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
|
2558
|
-
}
|
2559
|
-
else:
|
2560
|
-
FILE_EXTENSION_MAP = {
|
2561
|
-
'o': enums.CU_JIT_INPUT_OBJECT,
|
2562
|
-
'ptx': enums.CU_JIT_INPUT_PTX,
|
2563
|
-
'a': enums.CU_JIT_INPUT_LIBRARY,
|
2564
|
-
'lib': enums.CU_JIT_INPUT_LIBRARY,
|
2565
|
-
'cubin': enums.CU_JIT_INPUT_CUBIN,
|
2566
|
-
'fatbin': enums.CU_JIT_INPUT_FATBINARY,
|
2567
|
-
}
|
2568
|
-
|
2569
|
-
|
2570
2597
|
class Linker(metaclass=ABCMeta):
|
2571
2598
|
"""Abstract base class for linkers"""
|
2572
2599
|
|
2573
2600
|
@classmethod
|
2574
|
-
def new(cls,
|
2575
|
-
|
2576
|
-
|
2577
|
-
|
2578
|
-
|
2601
|
+
def new(cls,
|
2602
|
+
max_registers=0,
|
2603
|
+
lineinfo=False,
|
2604
|
+
cc=None,
|
2605
|
+
lto=None,
|
2606
|
+
additional_flags=None
|
2607
|
+
):
|
2608
|
+
|
2609
|
+
driver_ver = driver.get_version()
|
2610
|
+
if (
|
2611
|
+
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
|
2612
|
+
and driver_ver >= (12, 0)
|
2613
|
+
):
|
2614
|
+
raise ValueError(
|
2615
|
+
"Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC"
|
2616
|
+
)
|
2617
|
+
if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
|
2618
|
+
raise ValueError(
|
2619
|
+
"Enabling pynvjitlink requires CUDA 12."
|
2620
|
+
)
|
2621
|
+
if config.CUDA_ENABLE_PYNVJITLINK:
|
2622
|
+
linker = PyNvJitLinker
|
2623
|
+
|
2624
|
+
elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
2625
|
+
linker = MVCLinker
|
2626
|
+
else:
|
2627
|
+
if USE_NV_BINDING:
|
2628
|
+
linker = CudaPythonLinker
|
2629
|
+
else:
|
2630
|
+
linker = CtypesLinker
|
2631
|
+
|
2632
|
+
if linker is PyNvJitLinker:
|
2633
|
+
return linker(max_registers, lineinfo, cc, lto, additional_flags)
|
2634
|
+
elif additional_flags or lto:
|
2635
|
+
raise ValueError("LTO and additional flags require PyNvJitLinker")
|
2579
2636
|
else:
|
2580
|
-
return
|
2637
|
+
return linker(max_registers, lineinfo, cc)
|
2581
2638
|
|
2582
2639
|
@abstractmethod
|
2583
2640
|
def __init__(self, max_registers, lineinfo, cc):
|
@@ -2626,19 +2683,42 @@ class Linker(metaclass=ABCMeta):
|
|
2626
2683
|
cu = f.read()
|
2627
2684
|
self.add_cu(cu, os.path.basename(path))
|
2628
2685
|
|
2629
|
-
def add_file_guess_ext(self,
|
2630
|
-
"""
|
2631
|
-
|
2632
|
-
|
2633
|
-
|
2634
|
-
|
2635
|
-
|
2686
|
+
def add_file_guess_ext(self, path_or_code):
|
2687
|
+
"""
|
2688
|
+
Add a file or LinkableCode object to the link. If a file is
|
2689
|
+
passed, the type will be inferred from the extension. A LinkableCode
|
2690
|
+
object represents a file already in memory.
|
2691
|
+
"""
|
2692
|
+
if isinstance(path_or_code, str):
|
2693
|
+
ext = pathlib.Path(path_or_code).suffix
|
2694
|
+
if ext == '':
|
2695
|
+
raise RuntimeError(
|
2696
|
+
"Don't know how to link file with no extension"
|
2697
|
+
)
|
2698
|
+
elif ext == '.cu':
|
2699
|
+
self.add_cu_file(path_or_code)
|
2700
|
+
else:
|
2701
|
+
kind = FILE_EXTENSION_MAP.get(ext.lstrip('.'), None)
|
2702
|
+
if kind is None:
|
2703
|
+
raise RuntimeError(
|
2704
|
+
"Don't know how to link file with extension "
|
2705
|
+
f"{ext}"
|
2706
|
+
)
|
2707
|
+
self.add_file(path_or_code, kind)
|
2708
|
+
return
|
2636
2709
|
else:
|
2637
|
-
|
2638
|
-
if
|
2639
|
-
raise
|
2640
|
-
|
2641
|
-
|
2710
|
+
# Otherwise, we should have been given a LinkableCode object
|
2711
|
+
if not isinstance(path_or_code, LinkableCode):
|
2712
|
+
raise TypeError(
|
2713
|
+
"Expected path to file or a LinkableCode object"
|
2714
|
+
)
|
2715
|
+
|
2716
|
+
if path_or_code.kind == "cu":
|
2717
|
+
self.add_cu(path_or_code.data, path_or_code.name)
|
2718
|
+
else:
|
2719
|
+
self.add_data(
|
2720
|
+
path_or_code.data, path_or_code.kind, path_or_code.name
|
2721
|
+
)
|
2642
2722
|
|
2643
2723
|
@abstractmethod
|
2644
2724
|
def complete(self):
|
@@ -2649,12 +2729,6 @@ class Linker(metaclass=ABCMeta):
|
|
2649
2729
|
"""
|
2650
2730
|
|
2651
2731
|
|
2652
|
-
_MVC_ERROR_MESSAGE = (
|
2653
|
-
"Minor version compatibility requires ptxcompiler and cubinlinker packages "
|
2654
|
-
"to be available"
|
2655
|
-
)
|
2656
|
-
|
2657
|
-
|
2658
2732
|
class MVCLinker(Linker):
|
2659
2733
|
"""
|
2660
2734
|
Linker supporting Minor Version Compatibility, backed by the cubinlinker
|
@@ -2930,6 +3004,94 @@ class CudaPythonLinker(Linker):
|
|
2930
3004
|
return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
|
2931
3005
|
|
2932
3006
|
|
3007
|
+
class PyNvJitLinker(Linker):
|
3008
|
+
def __init__(
|
3009
|
+
self,
|
3010
|
+
max_registers=None,
|
3011
|
+
lineinfo=False,
|
3012
|
+
cc=None,
|
3013
|
+
lto=False,
|
3014
|
+
additional_flags=None,
|
3015
|
+
):
|
3016
|
+
|
3017
|
+
if cc is None:
|
3018
|
+
raise RuntimeError("PyNvJitLinker requires CC to be specified")
|
3019
|
+
if not any(isinstance(cc, t) for t in [list, tuple]):
|
3020
|
+
raise TypeError("`cc` must be a list or tuple of length 2")
|
3021
|
+
|
3022
|
+
sm_ver = f"{cc[0] * 10 + cc[1]}"
|
3023
|
+
arch = f"-arch=sm_{sm_ver}"
|
3024
|
+
options = [arch]
|
3025
|
+
if max_registers:
|
3026
|
+
options.append(f"-maxrregcount={max_registers}")
|
3027
|
+
if lineinfo:
|
3028
|
+
options.append("-lineinfo")
|
3029
|
+
if lto:
|
3030
|
+
options.append("-lto")
|
3031
|
+
if additional_flags is not None:
|
3032
|
+
options.extend(additional_flags)
|
3033
|
+
|
3034
|
+
self._linker = NvJitLinker(*options)
|
3035
|
+
self.lto = lto
|
3036
|
+
self.options = options
|
3037
|
+
|
3038
|
+
@property
|
3039
|
+
def info_log(self):
|
3040
|
+
return self._linker.info_log
|
3041
|
+
|
3042
|
+
@property
|
3043
|
+
def error_log(self):
|
3044
|
+
return self._linker.error_log
|
3045
|
+
|
3046
|
+
def add_ptx(self, ptx, name="<cudapy-ptx>"):
|
3047
|
+
self._linker.add_ptx(ptx, name)
|
3048
|
+
|
3049
|
+
def add_fatbin(self, fatbin, name="<external-fatbin>"):
|
3050
|
+
self._linker.add_fatbin(fatbin, name)
|
3051
|
+
|
3052
|
+
def add_ltoir(self, ltoir, name="<external-ltoir>"):
|
3053
|
+
self._linker.add_ltoir(ltoir, name)
|
3054
|
+
|
3055
|
+
def add_object(self, obj, name="<external-object>"):
|
3056
|
+
self._linker.add_object(obj, name)
|
3057
|
+
|
3058
|
+
def add_file(self, path, kind):
|
3059
|
+
try:
|
3060
|
+
with open(path, "rb") as f:
|
3061
|
+
data = f.read()
|
3062
|
+
except FileNotFoundError:
|
3063
|
+
raise LinkerError(f"{path} not found")
|
3064
|
+
|
3065
|
+
name = pathlib.Path(path).name
|
3066
|
+
self.add_data(data, kind, name)
|
3067
|
+
|
3068
|
+
def add_data(self, data, kind, name):
|
3069
|
+
if kind == FILE_EXTENSION_MAP["cubin"]:
|
3070
|
+
fn = self._linker.add_cubin
|
3071
|
+
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
3072
|
+
fn = self._linker.add_fatbin
|
3073
|
+
elif kind == FILE_EXTENSION_MAP["a"]:
|
3074
|
+
fn = self._linker.add_library
|
3075
|
+
elif kind == FILE_EXTENSION_MAP["ptx"]:
|
3076
|
+
return self.add_ptx(data, name)
|
3077
|
+
elif kind == FILE_EXTENSION_MAP["o"]:
|
3078
|
+
fn = self._linker.add_object
|
3079
|
+
elif kind == FILE_EXTENSION_MAP["ltoir"]:
|
3080
|
+
fn = self._linker.add_ltoir
|
3081
|
+
else:
|
3082
|
+
raise LinkerError(f"Don't know how to link {kind}")
|
3083
|
+
|
3084
|
+
try:
|
3085
|
+
fn(data, name)
|
3086
|
+
except NvJitLinkError as e:
|
3087
|
+
raise LinkerError from e
|
3088
|
+
|
3089
|
+
def complete(self):
|
3090
|
+
try:
|
3091
|
+
return self._linker.get_linked_cubin()
|
3092
|
+
except NvJitLinkError as e:
|
3093
|
+
raise LinkerError from e
|
3094
|
+
|
2933
3095
|
# -----------------------------------------------------------------------------
|
2934
3096
|
|
2935
3097
|
|
@@ -18,6 +18,7 @@ from numba.misc.findlib import find_lib
|
|
18
18
|
from numba.cuda.cuda_paths import get_cuda_paths
|
19
19
|
from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
|
20
20
|
from numba.cuda.cudadrv.error import CudaSupportError
|
21
|
+
from numba.core import config
|
21
22
|
|
22
23
|
|
23
24
|
if sys.platform == 'win32':
|
@@ -60,6 +61,24 @@ def get_cudalib(lib, static=False):
|
|
60
61
|
return max(candidates) if candidates else namepattern % lib
|
61
62
|
|
62
63
|
|
64
|
+
def get_cuda_include_dir():
|
65
|
+
"""
|
66
|
+
Find the path to cuda include dir based on a list of default locations.
|
67
|
+
Note that this does not list the `CUDA_INCLUDE_PATH` entry in user
|
68
|
+
configuration.
|
69
|
+
"""
|
70
|
+
|
71
|
+
return get_cuda_paths()['include_dir'].info
|
72
|
+
|
73
|
+
|
74
|
+
def check_cuda_include_dir(path):
|
75
|
+
if path is None or not os.path.exists(path):
|
76
|
+
raise FileNotFoundError(f"{path} not found")
|
77
|
+
|
78
|
+
if not os.path.exists(os.path.join(path, "cuda_runtime.h")):
|
79
|
+
raise FileNotFoundError(f"Unable to find cuda_runtime.h from {path}")
|
80
|
+
|
81
|
+
|
63
82
|
def open_cudalib(lib):
|
64
83
|
path = get_cudalib(lib)
|
65
84
|
return ctypes.CDLL(path)
|
@@ -75,6 +94,8 @@ def _get_source_variable(lib, static=False):
|
|
75
94
|
return get_cuda_paths()['nvvm'].by
|
76
95
|
elif lib == 'libdevice':
|
77
96
|
return get_cuda_paths()['libdevice'].by
|
97
|
+
elif lib == 'include_dir':
|
98
|
+
return get_cuda_paths()['include_dir'].by
|
78
99
|
else:
|
79
100
|
dir_type = 'static_cudalib_dir' if static else 'cudalib_dir'
|
80
101
|
return get_cuda_paths()[dir_type].by
|
@@ -173,4 +194,21 @@ def test():
|
|
173
194
|
print('\tERROR: failed to find %s:\n%s' % (lib, e))
|
174
195
|
failed = True
|
175
196
|
|
197
|
+
# Check cuda include paths
|
198
|
+
|
199
|
+
print("Include directory configuration variable:")
|
200
|
+
print(f"\tCUDA_INCLUDE_PATH={config.CUDA_INCLUDE_PATH}")
|
201
|
+
|
202
|
+
where = _get_source_variable('include_dir')
|
203
|
+
print(f'Finding include directory from {where}')
|
204
|
+
include = get_cuda_include_dir()
|
205
|
+
print('\tLocated at', include)
|
206
|
+
try:
|
207
|
+
print('\tChecking include directory', end='...')
|
208
|
+
check_cuda_include_dir(include)
|
209
|
+
print('\tok')
|
210
|
+
except FileNotFoundError as e:
|
211
|
+
print('\tERROR: failed to find cuda include directory:\n%s' % e)
|
212
|
+
failed = True
|
213
|
+
|
176
214
|
return not failed
|
@@ -0,0 +1,63 @@
|
|
1
|
+
from .mappings import FILE_EXTENSION_MAP
|
2
|
+
|
3
|
+
|
4
|
+
class LinkableCode:
|
5
|
+
"""An object that can be passed in the `link` list argument to `@cuda.jit`
|
6
|
+
kernels to supply code to be linked from memory."""
|
7
|
+
|
8
|
+
def __init__(self, data, name=None):
|
9
|
+
self.data = data
|
10
|
+
self._name = name
|
11
|
+
|
12
|
+
@property
|
13
|
+
def name(self):
|
14
|
+
return self._name or self.default_name
|
15
|
+
|
16
|
+
|
17
|
+
class PTXSource(LinkableCode):
|
18
|
+
"""PTX Source code in memory"""
|
19
|
+
|
20
|
+
kind = FILE_EXTENSION_MAP["ptx"]
|
21
|
+
default_name = "<unnamed-ptx>"
|
22
|
+
|
23
|
+
|
24
|
+
class CUSource(LinkableCode):
|
25
|
+
"""CUDA C/C++ Source code in memory"""
|
26
|
+
|
27
|
+
kind = "cu"
|
28
|
+
default_name = "<unnamed-cu>"
|
29
|
+
|
30
|
+
|
31
|
+
class Fatbin(LinkableCode):
|
32
|
+
"""A fatbin ELF in memory"""
|
33
|
+
|
34
|
+
kind = FILE_EXTENSION_MAP["fatbin"]
|
35
|
+
default_name = "<unnamed-fatbin>"
|
36
|
+
|
37
|
+
|
38
|
+
class Cubin(LinkableCode):
|
39
|
+
"""A cubin ELF in memory"""
|
40
|
+
|
41
|
+
kind = FILE_EXTENSION_MAP["cubin"]
|
42
|
+
default_name = "<unnamed-cubin>"
|
43
|
+
|
44
|
+
|
45
|
+
class Archive(LinkableCode):
|
46
|
+
"""An archive of objects in memory"""
|
47
|
+
|
48
|
+
kind = FILE_EXTENSION_MAP["a"]
|
49
|
+
default_name = "<unnamed-archive>"
|
50
|
+
|
51
|
+
|
52
|
+
class Object(LinkableCode):
|
53
|
+
"""An object file in memory"""
|
54
|
+
|
55
|
+
kind = FILE_EXTENSION_MAP["o"]
|
56
|
+
default_name = "<unnamed-object>"
|
57
|
+
|
58
|
+
|
59
|
+
class LTOIR(LinkableCode):
|
60
|
+
"""An LTOIR file in memory"""
|
61
|
+
|
62
|
+
kind = "ltoir"
|
63
|
+
default_name = "<unnamed-ltoir>"
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from numba import config
|
2
|
+
from . import enums
|
3
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
4
|
+
from cuda import cuda
|
5
|
+
jitty = cuda.CUjitInputType
|
6
|
+
FILE_EXTENSION_MAP = {
|
7
|
+
'o': jitty.CU_JIT_INPUT_OBJECT,
|
8
|
+
'ptx': jitty.CU_JIT_INPUT_PTX,
|
9
|
+
'a': jitty.CU_JIT_INPUT_LIBRARY,
|
10
|
+
'lib': jitty.CU_JIT_INPUT_LIBRARY,
|
11
|
+
'cubin': jitty.CU_JIT_INPUT_CUBIN,
|
12
|
+
'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
|
13
|
+
'ltoir': jitty.CU_JIT_INPUT_NVVM,
|
14
|
+
}
|
15
|
+
else:
|
16
|
+
FILE_EXTENSION_MAP = {
|
17
|
+
'o': enums.CU_JIT_INPUT_OBJECT,
|
18
|
+
'ptx': enums.CU_JIT_INPUT_PTX,
|
19
|
+
'a': enums.CU_JIT_INPUT_LIBRARY,
|
20
|
+
'lib': enums.CU_JIT_INPUT_LIBRARY,
|
21
|
+
'cubin': enums.CU_JIT_INPUT_CUBIN,
|
22
|
+
'fatbin': enums.CU_JIT_INPUT_FATBINARY,
|
23
|
+
'ltoir': enums.CU_JIT_INPUT_NVVM,
|
24
|
+
}
|
@@ -1,9 +1,8 @@
|
|
1
1
|
from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
|
2
2
|
from enum import IntEnum
|
3
|
-
from numba.core import config
|
4
3
|
from numba.cuda.cudadrv.error import (NvrtcError, NvrtcCompilationError,
|
5
4
|
NvrtcSupportError)
|
6
|
-
|
5
|
+
from numba.cuda.cuda_paths import get_cuda_paths
|
7
6
|
import functools
|
8
7
|
import os
|
9
8
|
import threading
|
@@ -233,12 +232,18 @@ def compile(src, name, cc):
|
|
233
232
|
# being optimized away.
|
234
233
|
major, minor = cc
|
235
234
|
arch = f'--gpu-architecture=compute_{major}{minor}'
|
236
|
-
|
235
|
+
|
236
|
+
cuda_include = [
|
237
|
+
f"-I{get_cuda_paths()['include_dir'].info}",
|
238
|
+
]
|
237
239
|
|
238
240
|
cudadrv_path = os.path.dirname(os.path.abspath(__file__))
|
239
241
|
numba_cuda_path = os.path.dirname(cudadrv_path)
|
240
242
|
numba_include = f'-I{numba_cuda_path}'
|
241
|
-
options = [arch,
|
243
|
+
options = [arch, *cuda_include, numba_include, '-rdc', 'true']
|
244
|
+
|
245
|
+
if nvrtc.get_version() < (12, 0):
|
246
|
+
options += ["-std=c++17"]
|
242
247
|
|
243
248
|
# Compile the program
|
244
249
|
compile_error = nvrtc.compile_program(program, options)
|
@@ -31,6 +31,9 @@ from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
|
|
31
31
|
shfl_xor_sync)
|
32
32
|
|
33
33
|
from .kernels import reduction
|
34
|
+
from numba.cuda.cudadrv.linkable_code import (
|
35
|
+
Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
|
36
|
+
)
|
34
37
|
|
35
38
|
reduce = Reduce = reduction.Reduce
|
36
39
|
|