numba-cuda 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/codegen.py +15 -3
- numba_cuda/numba/cuda/cudadrv/driver.py +209 -47
- numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +63 -0
- numba_cuda/numba/cuda/cudadrv/mappings.py +24 -0
- numba_cuda/numba/cuda/device_init.py +3 -0
- numba_cuda/numba/cuda/dispatcher.py +2 -2
- numba_cuda/numba/cuda/intrinsics.py +6 -1
- numba_cuda/numba/cuda/printimpl.py +11 -0
- numba_cuda/numba/cuda/target.py +4 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +44 -4
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +51 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +163 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +19 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +3 -0
- {numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/METADATA +1 -1
- {numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/RECORD +23 -16
- {numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/WHEEL +1 -1
- {numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/LICENSE +0 -0
- {numba_cuda-0.0.16.dist-info → numba_cuda-0.0.18.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.18
|
numba_cuda/numba/cuda/codegen.py
CHANGED
@@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
59
59
|
get_cufunc), which may be of different compute capabilities.
|
60
60
|
"""
|
61
61
|
|
62
|
-
def __init__(
|
63
|
-
|
62
|
+
def __init__(
|
63
|
+
self,
|
64
|
+
codegen,
|
65
|
+
name,
|
66
|
+
entry_name=None,
|
67
|
+
max_registers=None,
|
68
|
+
lto=False,
|
69
|
+
nvvm_options=None
|
70
|
+
):
|
64
71
|
"""
|
65
72
|
codegen:
|
66
73
|
Codegen object.
|
@@ -71,6 +78,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
71
78
|
kernel and not a device function.
|
72
79
|
max_registers:
|
73
80
|
The maximum register usage to aim for when linking.
|
81
|
+
lto:
|
82
|
+
Whether to enable link-time optimization.
|
74
83
|
nvvm_options:
|
75
84
|
Dict of options to pass to NVVM.
|
76
85
|
"""
|
@@ -103,6 +112,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
103
112
|
self._cufunc_cache = {}
|
104
113
|
|
105
114
|
self._max_registers = max_registers
|
115
|
+
self._lto = lto
|
106
116
|
if nvvm_options is None:
|
107
117
|
nvvm_options = {}
|
108
118
|
self._nvvm_options = nvvm_options
|
@@ -178,7 +188,9 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
178
188
|
if cubin:
|
179
189
|
return cubin
|
180
190
|
|
181
|
-
linker = driver.Linker.new(
|
191
|
+
linker = driver.Linker.new(
|
192
|
+
max_registers=self._max_registers, cc=cc, lto=self._lto
|
193
|
+
)
|
182
194
|
|
183
195
|
if linker.lto:
|
184
196
|
ltoir = self.get_ltoir(cc=cc)
|
@@ -10,7 +10,6 @@ subsequent deallocation could further corrupt the CUDA context and causes the
|
|
10
10
|
system to freeze in some cases.
|
11
11
|
|
12
12
|
"""
|
13
|
-
|
14
13
|
import sys
|
15
14
|
import os
|
16
15
|
import ctypes
|
@@ -19,6 +18,7 @@ import functools
|
|
19
18
|
import warnings
|
20
19
|
import logging
|
21
20
|
import threading
|
21
|
+
import traceback
|
22
22
|
import asyncio
|
23
23
|
import pathlib
|
24
24
|
from itertools import product
|
@@ -35,6 +35,8 @@ from numba.core import utils, serialize, config
|
|
35
35
|
from .error import CudaSupportError, CudaDriverError
|
36
36
|
from .drvapi import API_PROTOTYPES
|
37
37
|
from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
|
38
|
+
from .mappings import FILE_EXTENSION_MAP
|
39
|
+
from .linkable_code import LinkableCode
|
38
40
|
from numba.cuda.cudadrv import enums, drvapi, nvrtc
|
39
41
|
|
40
42
|
USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
|
@@ -56,6 +58,52 @@ _py_decref.argtypes = [ctypes.py_object]
|
|
56
58
|
_py_incref.argtypes = [ctypes.py_object]
|
57
59
|
|
58
60
|
|
61
|
+
def _readenv(name, ctor, default):
|
62
|
+
value = os.environ.get(name)
|
63
|
+
if value is None:
|
64
|
+
return default() if callable(default) else default
|
65
|
+
try:
|
66
|
+
if ctor is bool:
|
67
|
+
return value.lower() in {'1', "true"}
|
68
|
+
return ctor(value)
|
69
|
+
except Exception:
|
70
|
+
warnings.warn(
|
71
|
+
f"Environment variable '{name}' is defined but its associated "
|
72
|
+
f"value '{value}' could not be parsed.\n"
|
73
|
+
"The parse failed with exception:\n"
|
74
|
+
f"{traceback.format_exc()}",
|
75
|
+
RuntimeWarning
|
76
|
+
)
|
77
|
+
return default
|
78
|
+
|
79
|
+
|
80
|
+
_MVC_ERROR_MESSAGE = (
|
81
|
+
"Minor version compatibility requires ptxcompiler and cubinlinker packages "
|
82
|
+
"to be available"
|
83
|
+
)
|
84
|
+
|
85
|
+
ENABLE_PYNVJITLINK = (
|
86
|
+
_readenv("NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False)
|
87
|
+
or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
|
88
|
+
)
|
89
|
+
if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
|
90
|
+
config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
|
91
|
+
|
92
|
+
if ENABLE_PYNVJITLINK:
|
93
|
+
try:
|
94
|
+
from pynvjitlink.api import NvJitLinker, NvJitLinkError
|
95
|
+
except ImportError:
|
96
|
+
raise ImportError(
|
97
|
+
"Using pynvjitlink requires the pynvjitlink package to be available"
|
98
|
+
)
|
99
|
+
|
100
|
+
if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
101
|
+
raise ValueError(
|
102
|
+
"Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
|
103
|
+
"CUDA_ENABLE_PYNVJITLINK at the same time"
|
104
|
+
)
|
105
|
+
|
106
|
+
|
59
107
|
def make_logger():
|
60
108
|
logger = logging.getLogger(__name__)
|
61
109
|
# is logging configured?
|
@@ -432,7 +480,7 @@ class Driver(object):
|
|
432
480
|
|
433
481
|
def get_version(self):
|
434
482
|
"""
|
435
|
-
Returns the CUDA
|
483
|
+
Returns the CUDA Driver version as a tuple (major, minor).
|
436
484
|
"""
|
437
485
|
if USE_NV_BINDING:
|
438
486
|
version = driver.cuDriverGetVersion()
|
@@ -2546,38 +2594,47 @@ def launch_kernel(cufunc_handle,
|
|
2546
2594
|
extra)
|
2547
2595
|
|
2548
2596
|
|
2549
|
-
if USE_NV_BINDING:
|
2550
|
-
jitty = binding.CUjitInputType
|
2551
|
-
FILE_EXTENSION_MAP = {
|
2552
|
-
'o': jitty.CU_JIT_INPUT_OBJECT,
|
2553
|
-
'ptx': jitty.CU_JIT_INPUT_PTX,
|
2554
|
-
'a': jitty.CU_JIT_INPUT_LIBRARY,
|
2555
|
-
'lib': jitty.CU_JIT_INPUT_LIBRARY,
|
2556
|
-
'cubin': jitty.CU_JIT_INPUT_CUBIN,
|
2557
|
-
'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
|
2558
|
-
}
|
2559
|
-
else:
|
2560
|
-
FILE_EXTENSION_MAP = {
|
2561
|
-
'o': enums.CU_JIT_INPUT_OBJECT,
|
2562
|
-
'ptx': enums.CU_JIT_INPUT_PTX,
|
2563
|
-
'a': enums.CU_JIT_INPUT_LIBRARY,
|
2564
|
-
'lib': enums.CU_JIT_INPUT_LIBRARY,
|
2565
|
-
'cubin': enums.CU_JIT_INPUT_CUBIN,
|
2566
|
-
'fatbin': enums.CU_JIT_INPUT_FATBINARY,
|
2567
|
-
}
|
2568
|
-
|
2569
|
-
|
2570
2597
|
class Linker(metaclass=ABCMeta):
|
2571
2598
|
"""Abstract base class for linkers"""
|
2572
2599
|
|
2573
2600
|
@classmethod
|
2574
|
-
def new(cls,
|
2575
|
-
|
2576
|
-
|
2577
|
-
|
2578
|
-
|
2601
|
+
def new(cls,
|
2602
|
+
max_registers=0,
|
2603
|
+
lineinfo=False,
|
2604
|
+
cc=None,
|
2605
|
+
lto=None,
|
2606
|
+
additional_flags=None
|
2607
|
+
):
|
2608
|
+
|
2609
|
+
driver_ver = driver.get_version()
|
2610
|
+
if (
|
2611
|
+
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
|
2612
|
+
and driver_ver >= (12, 0)
|
2613
|
+
):
|
2614
|
+
raise ValueError(
|
2615
|
+
"Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC"
|
2616
|
+
)
|
2617
|
+
if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
|
2618
|
+
raise ValueError(
|
2619
|
+
"Enabling pynvjitlink requires CUDA 12."
|
2620
|
+
)
|
2621
|
+
if config.CUDA_ENABLE_PYNVJITLINK:
|
2622
|
+
linker = PyNvJitLinker
|
2623
|
+
|
2624
|
+
elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
|
2625
|
+
linker = MVCLinker
|
2626
|
+
else:
|
2627
|
+
if USE_NV_BINDING:
|
2628
|
+
linker = CudaPythonLinker
|
2629
|
+
else:
|
2630
|
+
linker = CtypesLinker
|
2631
|
+
|
2632
|
+
if linker is PyNvJitLinker:
|
2633
|
+
return linker(max_registers, lineinfo, cc, lto, additional_flags)
|
2634
|
+
elif additional_flags or lto:
|
2635
|
+
raise ValueError("LTO and additional flags require PyNvJitLinker")
|
2579
2636
|
else:
|
2580
|
-
return
|
2637
|
+
return linker(max_registers, lineinfo, cc)
|
2581
2638
|
|
2582
2639
|
@abstractmethod
|
2583
2640
|
def __init__(self, max_registers, lineinfo, cc):
|
@@ -2626,19 +2683,42 @@ class Linker(metaclass=ABCMeta):
|
|
2626
2683
|
cu = f.read()
|
2627
2684
|
self.add_cu(cu, os.path.basename(path))
|
2628
2685
|
|
2629
|
-
def add_file_guess_ext(self,
|
2630
|
-
"""
|
2631
|
-
|
2632
|
-
|
2633
|
-
|
2634
|
-
|
2635
|
-
|
2686
|
+
def add_file_guess_ext(self, path_or_code):
|
2687
|
+
"""
|
2688
|
+
Add a file or LinkableCode object to the link. If a file is
|
2689
|
+
passed, the type will be inferred from the extension. A LinkableCode
|
2690
|
+
object represents a file already in memory.
|
2691
|
+
"""
|
2692
|
+
if isinstance(path_or_code, str):
|
2693
|
+
ext = pathlib.Path(path_or_code).suffix
|
2694
|
+
if ext == '':
|
2695
|
+
raise RuntimeError(
|
2696
|
+
"Don't know how to link file with no extension"
|
2697
|
+
)
|
2698
|
+
elif ext == '.cu':
|
2699
|
+
self.add_cu_file(path_or_code)
|
2700
|
+
else:
|
2701
|
+
kind = FILE_EXTENSION_MAP.get(ext.lstrip('.'), None)
|
2702
|
+
if kind is None:
|
2703
|
+
raise RuntimeError(
|
2704
|
+
"Don't know how to link file with extension "
|
2705
|
+
f"{ext}"
|
2706
|
+
)
|
2707
|
+
self.add_file(path_or_code, kind)
|
2708
|
+
return
|
2636
2709
|
else:
|
2637
|
-
|
2638
|
-
if
|
2639
|
-
raise
|
2640
|
-
|
2641
|
-
|
2710
|
+
# Otherwise, we should have been given a LinkableCode object
|
2711
|
+
if not isinstance(path_or_code, LinkableCode):
|
2712
|
+
raise TypeError(
|
2713
|
+
"Expected path to file or a LinkableCode object"
|
2714
|
+
)
|
2715
|
+
|
2716
|
+
if path_or_code.kind == "cu":
|
2717
|
+
self.add_cu(path_or_code.data, path_or_code.name)
|
2718
|
+
else:
|
2719
|
+
self.add_data(
|
2720
|
+
path_or_code.data, path_or_code.kind, path_or_code.name
|
2721
|
+
)
|
2642
2722
|
|
2643
2723
|
@abstractmethod
|
2644
2724
|
def complete(self):
|
@@ -2649,12 +2729,6 @@ class Linker(metaclass=ABCMeta):
|
|
2649
2729
|
"""
|
2650
2730
|
|
2651
2731
|
|
2652
|
-
_MVC_ERROR_MESSAGE = (
|
2653
|
-
"Minor version compatibility requires ptxcompiler and cubinlinker packages "
|
2654
|
-
"to be available"
|
2655
|
-
)
|
2656
|
-
|
2657
|
-
|
2658
2732
|
class MVCLinker(Linker):
|
2659
2733
|
"""
|
2660
2734
|
Linker supporting Minor Version Compatibility, backed by the cubinlinker
|
@@ -2930,6 +3004,94 @@ class CudaPythonLinker(Linker):
|
|
2930
3004
|
return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
|
2931
3005
|
|
2932
3006
|
|
3007
|
+
class PyNvJitLinker(Linker):
|
3008
|
+
def __init__(
|
3009
|
+
self,
|
3010
|
+
max_registers=None,
|
3011
|
+
lineinfo=False,
|
3012
|
+
cc=None,
|
3013
|
+
lto=False,
|
3014
|
+
additional_flags=None,
|
3015
|
+
):
|
3016
|
+
|
3017
|
+
if cc is None:
|
3018
|
+
raise RuntimeError("PyNvJitLinker requires CC to be specified")
|
3019
|
+
if not any(isinstance(cc, t) for t in [list, tuple]):
|
3020
|
+
raise TypeError("`cc` must be a list or tuple of length 2")
|
3021
|
+
|
3022
|
+
sm_ver = f"{cc[0] * 10 + cc[1]}"
|
3023
|
+
arch = f"-arch=sm_{sm_ver}"
|
3024
|
+
options = [arch]
|
3025
|
+
if max_registers:
|
3026
|
+
options.append(f"-maxrregcount={max_registers}")
|
3027
|
+
if lineinfo:
|
3028
|
+
options.append("-lineinfo")
|
3029
|
+
if lto:
|
3030
|
+
options.append("-lto")
|
3031
|
+
if additional_flags is not None:
|
3032
|
+
options.extend(additional_flags)
|
3033
|
+
|
3034
|
+
self._linker = NvJitLinker(*options)
|
3035
|
+
self.lto = lto
|
3036
|
+
self.options = options
|
3037
|
+
|
3038
|
+
@property
|
3039
|
+
def info_log(self):
|
3040
|
+
return self._linker.info_log
|
3041
|
+
|
3042
|
+
@property
|
3043
|
+
def error_log(self):
|
3044
|
+
return self._linker.error_log
|
3045
|
+
|
3046
|
+
def add_ptx(self, ptx, name="<cudapy-ptx>"):
|
3047
|
+
self._linker.add_ptx(ptx, name)
|
3048
|
+
|
3049
|
+
def add_fatbin(self, fatbin, name="<external-fatbin>"):
|
3050
|
+
self._linker.add_fatbin(fatbin, name)
|
3051
|
+
|
3052
|
+
def add_ltoir(self, ltoir, name="<external-ltoir>"):
|
3053
|
+
self._linker.add_ltoir(ltoir, name)
|
3054
|
+
|
3055
|
+
def add_object(self, obj, name="<external-object>"):
|
3056
|
+
self._linker.add_object(obj, name)
|
3057
|
+
|
3058
|
+
def add_file(self, path, kind):
|
3059
|
+
try:
|
3060
|
+
with open(path, "rb") as f:
|
3061
|
+
data = f.read()
|
3062
|
+
except FileNotFoundError:
|
3063
|
+
raise LinkerError(f"{path} not found")
|
3064
|
+
|
3065
|
+
name = pathlib.Path(path).name
|
3066
|
+
self.add_data(data, kind, name)
|
3067
|
+
|
3068
|
+
def add_data(self, data, kind, name):
|
3069
|
+
if kind == FILE_EXTENSION_MAP["cubin"]:
|
3070
|
+
fn = self._linker.add_cubin
|
3071
|
+
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
3072
|
+
fn = self._linker.add_fatbin
|
3073
|
+
elif kind == FILE_EXTENSION_MAP["a"]:
|
3074
|
+
fn = self._linker.add_library
|
3075
|
+
elif kind == FILE_EXTENSION_MAP["ptx"]:
|
3076
|
+
return self.add_ptx(data, name)
|
3077
|
+
elif kind == FILE_EXTENSION_MAP["o"]:
|
3078
|
+
fn = self._linker.add_object
|
3079
|
+
elif kind == FILE_EXTENSION_MAP["ltoir"]:
|
3080
|
+
fn = self._linker.add_ltoir
|
3081
|
+
else:
|
3082
|
+
raise LinkerError(f"Don't know how to link {kind}")
|
3083
|
+
|
3084
|
+
try:
|
3085
|
+
fn(data, name)
|
3086
|
+
except NvJitLinkError as e:
|
3087
|
+
raise LinkerError from e
|
3088
|
+
|
3089
|
+
def complete(self):
|
3090
|
+
try:
|
3091
|
+
return self._linker.get_linked_cubin()
|
3092
|
+
except NvJitLinkError as e:
|
3093
|
+
raise LinkerError from e
|
3094
|
+
|
2933
3095
|
# -----------------------------------------------------------------------------
|
2934
3096
|
|
2935
3097
|
|
@@ -0,0 +1,63 @@
|
|
1
|
+
from .mappings import FILE_EXTENSION_MAP
|
2
|
+
|
3
|
+
|
4
|
+
class LinkableCode:
|
5
|
+
"""An object that can be passed in the `link` list argument to `@cuda.jit`
|
6
|
+
kernels to supply code to be linked from memory."""
|
7
|
+
|
8
|
+
def __init__(self, data, name=None):
|
9
|
+
self.data = data
|
10
|
+
self._name = name
|
11
|
+
|
12
|
+
@property
|
13
|
+
def name(self):
|
14
|
+
return self._name or self.default_name
|
15
|
+
|
16
|
+
|
17
|
+
class PTXSource(LinkableCode):
|
18
|
+
"""PTX Source code in memory"""
|
19
|
+
|
20
|
+
kind = FILE_EXTENSION_MAP["ptx"]
|
21
|
+
default_name = "<unnamed-ptx>"
|
22
|
+
|
23
|
+
|
24
|
+
class CUSource(LinkableCode):
|
25
|
+
"""CUDA C/C++ Source code in memory"""
|
26
|
+
|
27
|
+
kind = "cu"
|
28
|
+
default_name = "<unnamed-cu>"
|
29
|
+
|
30
|
+
|
31
|
+
class Fatbin(LinkableCode):
|
32
|
+
"""A fatbin ELF in memory"""
|
33
|
+
|
34
|
+
kind = FILE_EXTENSION_MAP["fatbin"]
|
35
|
+
default_name = "<unnamed-fatbin>"
|
36
|
+
|
37
|
+
|
38
|
+
class Cubin(LinkableCode):
|
39
|
+
"""A cubin ELF in memory"""
|
40
|
+
|
41
|
+
kind = FILE_EXTENSION_MAP["cubin"]
|
42
|
+
default_name = "<unnamed-cubin>"
|
43
|
+
|
44
|
+
|
45
|
+
class Archive(LinkableCode):
|
46
|
+
"""An archive of objects in memory"""
|
47
|
+
|
48
|
+
kind = FILE_EXTENSION_MAP["a"]
|
49
|
+
default_name = "<unnamed-archive>"
|
50
|
+
|
51
|
+
|
52
|
+
class Object(LinkableCode):
|
53
|
+
"""An object file in memory"""
|
54
|
+
|
55
|
+
kind = FILE_EXTENSION_MAP["o"]
|
56
|
+
default_name = "<unnamed-object>"
|
57
|
+
|
58
|
+
|
59
|
+
class LTOIR(LinkableCode):
|
60
|
+
"""An LTOIR file in memory"""
|
61
|
+
|
62
|
+
kind = "ltoir"
|
63
|
+
default_name = "<unnamed-ltoir>"
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from numba import config
|
2
|
+
from . import enums
|
3
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
4
|
+
from cuda import cuda
|
5
|
+
jitty = cuda.CUjitInputType
|
6
|
+
FILE_EXTENSION_MAP = {
|
7
|
+
'o': jitty.CU_JIT_INPUT_OBJECT,
|
8
|
+
'ptx': jitty.CU_JIT_INPUT_PTX,
|
9
|
+
'a': jitty.CU_JIT_INPUT_LIBRARY,
|
10
|
+
'lib': jitty.CU_JIT_INPUT_LIBRARY,
|
11
|
+
'cubin': jitty.CU_JIT_INPUT_CUBIN,
|
12
|
+
'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
|
13
|
+
'ltoir': jitty.CU_JIT_INPUT_NVVM,
|
14
|
+
}
|
15
|
+
else:
|
16
|
+
FILE_EXTENSION_MAP = {
|
17
|
+
'o': enums.CU_JIT_INPUT_OBJECT,
|
18
|
+
'ptx': enums.CU_JIT_INPUT_PTX,
|
19
|
+
'a': enums.CU_JIT_INPUT_LIBRARY,
|
20
|
+
'lib': enums.CU_JIT_INPUT_LIBRARY,
|
21
|
+
'cubin': enums.CU_JIT_INPUT_CUBIN,
|
22
|
+
'fatbin': enums.CU_JIT_INPUT_FATBINARY,
|
23
|
+
'ltoir': enums.CU_JIT_INPUT_NVVM,
|
24
|
+
}
|
@@ -31,6 +31,9 @@ from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
|
|
31
31
|
shfl_xor_sync)
|
32
32
|
|
33
33
|
from .kernels import reduction
|
34
|
+
from numba.cuda.cudadrv.linkable_code import (
|
35
|
+
Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
|
36
|
+
)
|
34
37
|
|
35
38
|
reduce = Reduce = reduction.Reduce
|
36
39
|
|
@@ -46,7 +46,7 @@ class _Kernel(serialize.ReduceMixin):
|
|
46
46
|
@global_compiler_lock
|
47
47
|
def __init__(self, py_func, argtypes, link=None, debug=False,
|
48
48
|
lineinfo=False, inline=False, fastmath=False, extensions=None,
|
49
|
-
max_registers=None, opt=True, device=False):
|
49
|
+
max_registers=None, lto=False, opt=True, device=False):
|
50
50
|
|
51
51
|
if device:
|
52
52
|
raise RuntimeError('Cannot compile a device function as a kernel')
|
@@ -94,7 +94,7 @@ class _Kernel(serialize.ReduceMixin):
|
|
94
94
|
lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc,
|
95
95
|
debug, lineinfo, nvvm_options,
|
96
96
|
filename, linenum,
|
97
|
-
max_registers)
|
97
|
+
max_registers, lto)
|
98
98
|
|
99
99
|
if not link:
|
100
100
|
link = []
|
@@ -4,7 +4,7 @@ from numba import cuda, types
|
|
4
4
|
from numba.core import cgutils
|
5
5
|
from numba.core.errors import RequireLiteralValue
|
6
6
|
from numba.core.typing import signature
|
7
|
-
from numba.core.extending import overload_attribute
|
7
|
+
from numba.core.extending import overload_attribute, overload_method
|
8
8
|
from numba.cuda import nvvmutils
|
9
9
|
from numba.cuda.extending import intrinsic
|
10
10
|
|
@@ -196,3 +196,8 @@ def syncthreads_or(typingctx, predicate):
|
|
196
196
|
'''
|
197
197
|
fname = 'llvm.nvvm.barrier0.or'
|
198
198
|
return _syncthreads_predicate(typingctx, predicate, fname)
|
199
|
+
|
200
|
+
|
201
|
+
@overload_method(types.Integer, 'bit_count', target='cuda')
|
202
|
+
def integer_bit_count(i):
|
203
|
+
return lambda i: cuda.popc(i)
|
@@ -63,6 +63,17 @@ def dim3_print_impl(ty, context, builder, val):
|
|
63
63
|
return rawfmt, [x, y, z]
|
64
64
|
|
65
65
|
|
66
|
+
@print_item.register(types.Boolean)
|
67
|
+
def bool_print_impl(ty, context, builder, val):
|
68
|
+
true_string = context.insert_string_const_addrspace(builder, "True")
|
69
|
+
false_string = context.insert_string_const_addrspace(builder, "False")
|
70
|
+
res_ptr = cgutils.alloca_once_value(builder, false_string)
|
71
|
+
with builder.if_then(val):
|
72
|
+
builder.store(true_string, res_ptr)
|
73
|
+
rawfmt = "%s"
|
74
|
+
return rawfmt, [builder.load(res_ptr)]
|
75
|
+
|
76
|
+
|
66
77
|
@lower(print, types.VarArg(types.Any))
|
67
78
|
def print_varargs(context, builder, sig, args):
|
68
79
|
"""This function is a generic 'print' wrapper for arbitrary types.
|
numba_cuda/numba/cuda/target.py
CHANGED
@@ -148,7 +148,7 @@ class CUDATargetContext(BaseContext):
|
|
148
148
|
|
149
149
|
def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
|
150
150
|
nvvm_options, filename, linenum,
|
151
|
-
max_registers=None):
|
151
|
+
max_registers=None, lto=False):
|
152
152
|
"""
|
153
153
|
Adapt a code library ``codelib`` with the numba compiled CUDA kernel
|
154
154
|
with name ``fname`` and arguments ``argtypes`` for NVVM.
|
@@ -175,7 +175,9 @@ class CUDATargetContext(BaseContext):
|
|
175
175
|
library = self.codegen().create_library(f'{codelib.name}_kernel_',
|
176
176
|
entry_name=kernel_name,
|
177
177
|
nvvm_options=nvvm_options,
|
178
|
-
max_registers=max_registers
|
178
|
+
max_registers=max_registers,
|
179
|
+
lto=lto
|
180
|
+
)
|
179
181
|
library.add_linking_library(codelib)
|
180
182
|
wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
|
181
183
|
debug, lineinfo, filename,
|
@@ -0,0 +1,199 @@
|
|
1
|
+
from numba.cuda.testing import unittest
|
2
|
+
from numba.cuda.testing import skip_on_cudasim
|
3
|
+
from numba.cuda.testing import CUDATestCase
|
4
|
+
from numba.cuda.cudadrv.driver import PyNvJitLinker
|
5
|
+
|
6
|
+
import itertools
|
7
|
+
import os
|
8
|
+
from numba.cuda import get_current_device
|
9
|
+
from numba import cuda
|
10
|
+
from numba import config
|
11
|
+
|
12
|
+
TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
|
13
|
+
if TEST_BIN_DIR:
|
14
|
+
test_device_functions_a = os.path.join(
|
15
|
+
TEST_BIN_DIR, "test_device_functions.a"
|
16
|
+
)
|
17
|
+
test_device_functions_cubin = os.path.join(
|
18
|
+
TEST_BIN_DIR, "test_device_functions.cubin"
|
19
|
+
)
|
20
|
+
test_device_functions_cu = os.path.join(
|
21
|
+
TEST_BIN_DIR, "test_device_functions.cu"
|
22
|
+
)
|
23
|
+
test_device_functions_fatbin = os.path.join(
|
24
|
+
TEST_BIN_DIR, "test_device_functions.fatbin"
|
25
|
+
)
|
26
|
+
test_device_functions_o = os.path.join(
|
27
|
+
TEST_BIN_DIR, "test_device_functions.o"
|
28
|
+
)
|
29
|
+
test_device_functions_ptx = os.path.join(
|
30
|
+
TEST_BIN_DIR, "test_device_functions.ptx"
|
31
|
+
)
|
32
|
+
test_device_functions_ltoir = os.path.join(
|
33
|
+
TEST_BIN_DIR, "test_device_functions.ltoir"
|
34
|
+
)
|
35
|
+
|
36
|
+
|
37
|
+
@unittest.skipIf(
|
38
|
+
not config.CUDA_ENABLE_PYNVJITLINK or not TEST_BIN_DIR,
|
39
|
+
"pynvjitlink not enabled"
|
40
|
+
)
|
41
|
+
@skip_on_cudasim("Linking unsupported in the simulator")
|
42
|
+
class TestLinker(CUDATestCase):
|
43
|
+
_NUMBA_NVIDIA_BINDING_0_ENV = {"NUMBA_CUDA_USE_NVIDIA_BINDING": "0"}
|
44
|
+
|
45
|
+
def test_nvjitlink_create(self):
|
46
|
+
patched_linker = PyNvJitLinker(cc=(7, 5))
|
47
|
+
assert "-arch=sm_75" in patched_linker.options
|
48
|
+
|
49
|
+
def test_nvjitlink_create_no_cc_error(self):
|
50
|
+
# nvJitLink expects at least the architecture to be specified.
|
51
|
+
with self.assertRaisesRegex(
|
52
|
+
RuntimeError, "PyNvJitLinker requires CC to be specified"
|
53
|
+
):
|
54
|
+
PyNvJitLinker()
|
55
|
+
|
56
|
+
def test_nvjitlink_invalid_arch_error(self):
|
57
|
+
from pynvjitlink.api import NvJitLinkError
|
58
|
+
|
59
|
+
# CC 0.0 is not a valid compute capability
|
60
|
+
with self.assertRaisesRegex(
|
61
|
+
NvJitLinkError, "NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"
|
62
|
+
):
|
63
|
+
PyNvJitLinker(cc=(0, 0))
|
64
|
+
|
65
|
+
def test_nvjitlink_invalid_cc_type_error(self):
|
66
|
+
with self.assertRaisesRegex(
|
67
|
+
TypeError, "`cc` must be a list or tuple of length 2"
|
68
|
+
):
|
69
|
+
PyNvJitLinker(cc=0)
|
70
|
+
|
71
|
+
def test_nvjitlink_ptx_compile_options(self):
|
72
|
+
|
73
|
+
max_registers = (None, 32)
|
74
|
+
lineinfo = (False, True)
|
75
|
+
lto = (False, True)
|
76
|
+
additional_flags = (None, ("-g",), ("-g", "-time"))
|
77
|
+
for (
|
78
|
+
max_registers_i,
|
79
|
+
line_info_i,
|
80
|
+
lto_i,
|
81
|
+
additional_flags_i,
|
82
|
+
) in itertools.product(max_registers, lineinfo, lto, additional_flags):
|
83
|
+
with self.subTest(
|
84
|
+
max_registers=max_registers_i,
|
85
|
+
lineinfo=line_info_i,
|
86
|
+
lto=lto_i,
|
87
|
+
additional_flags=additional_flags_i,
|
88
|
+
):
|
89
|
+
patched_linker = PyNvJitLinker(
|
90
|
+
cc=(7, 5),
|
91
|
+
max_registers=max_registers_i,
|
92
|
+
lineinfo=line_info_i,
|
93
|
+
lto=lto_i,
|
94
|
+
additional_flags=additional_flags_i,
|
95
|
+
)
|
96
|
+
assert "-arch=sm_75" in patched_linker.options
|
97
|
+
|
98
|
+
if max_registers_i:
|
99
|
+
assert (
|
100
|
+
f"-maxrregcount={max_registers_i}"
|
101
|
+
in patched_linker.options
|
102
|
+
)
|
103
|
+
else:
|
104
|
+
assert "-maxrregcount" not in patched_linker.options
|
105
|
+
|
106
|
+
if line_info_i:
|
107
|
+
assert "-lineinfo" in patched_linker.options
|
108
|
+
else:
|
109
|
+
assert "-lineinfo" not in patched_linker.options
|
110
|
+
|
111
|
+
if lto_i:
|
112
|
+
assert "-lto" in patched_linker.options
|
113
|
+
else:
|
114
|
+
assert "-lto" not in patched_linker.options
|
115
|
+
|
116
|
+
if additional_flags_i:
|
117
|
+
for flag in additional_flags_i:
|
118
|
+
assert flag in patched_linker.options
|
119
|
+
|
120
|
+
def test_nvjitlink_add_file_guess_ext_linkable_code(self):
|
121
|
+
files = (
|
122
|
+
test_device_functions_a,
|
123
|
+
test_device_functions_cubin,
|
124
|
+
test_device_functions_cu,
|
125
|
+
test_device_functions_fatbin,
|
126
|
+
test_device_functions_o,
|
127
|
+
test_device_functions_ptx,
|
128
|
+
)
|
129
|
+
for file in files:
|
130
|
+
with self.subTest(file=file):
|
131
|
+
patched_linker = PyNvJitLinker(
|
132
|
+
cc=get_current_device().compute_capability
|
133
|
+
)
|
134
|
+
patched_linker.add_file_guess_ext(file)
|
135
|
+
|
136
|
+
def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
|
137
|
+
with open(test_device_functions_cubin, "rb") as f:
|
138
|
+
content = f.read()
|
139
|
+
|
140
|
+
patched_linker = PyNvJitLinker(
|
141
|
+
cc=get_current_device().compute_capability
|
142
|
+
)
|
143
|
+
with self.assertRaisesRegex(
|
144
|
+
TypeError, "Expected path to file or a LinkableCode"
|
145
|
+
):
|
146
|
+
# Feeding raw data as bytes to add_file_guess_ext should raise,
|
147
|
+
# because there's no way to know what kind of file to treat it as
|
148
|
+
patched_linker.add_file_guess_ext(content)
|
149
|
+
|
150
|
+
def test_nvjitlink_jit_with_linkable_code(self):
|
151
|
+
files = (
|
152
|
+
test_device_functions_a,
|
153
|
+
test_device_functions_cubin,
|
154
|
+
test_device_functions_cu,
|
155
|
+
test_device_functions_fatbin,
|
156
|
+
test_device_functions_o,
|
157
|
+
test_device_functions_ptx,
|
158
|
+
)
|
159
|
+
for file in files:
|
160
|
+
with self.subTest(file=file):
|
161
|
+
sig = "uint32(uint32, uint32)"
|
162
|
+
add_from_numba = cuda.declare_device("add_from_numba", sig)
|
163
|
+
|
164
|
+
@cuda.jit(link=[file])
|
165
|
+
def kernel(result):
|
166
|
+
result[0] = add_from_numba(1, 2)
|
167
|
+
|
168
|
+
result = cuda.device_array(1)
|
169
|
+
kernel[1, 1](result)
|
170
|
+
assert result[0] == 3
|
171
|
+
|
172
|
+
def test_nvjitlink_jit_with_linkable_code_lto(self):
|
173
|
+
file = test_device_functions_ltoir
|
174
|
+
|
175
|
+
sig = "uint32(uint32, uint32)"
|
176
|
+
add_from_numba = cuda.declare_device("add_from_numba", sig)
|
177
|
+
|
178
|
+
@cuda.jit(link=[file], lto=True)
|
179
|
+
def kernel(result):
|
180
|
+
result[0] = add_from_numba(1, 2)
|
181
|
+
|
182
|
+
result = cuda.device_array(1)
|
183
|
+
kernel[1, 1](result)
|
184
|
+
assert result[0] == 3
|
185
|
+
|
186
|
+
def test_nvjitlink_jit_with_invalid_linkable_code(self):
|
187
|
+
with open(test_device_functions_cubin, "rb") as f:
|
188
|
+
content = f.read()
|
189
|
+
with self.assertRaisesRegex(
|
190
|
+
TypeError, "Expected path to file or a LinkableCode"
|
191
|
+
):
|
192
|
+
|
193
|
+
@cuda.jit("void()", link=[content])
|
194
|
+
def kernel():
|
195
|
+
pass
|
196
|
+
|
197
|
+
|
198
|
+
if __name__ == "__main__":
|
199
|
+
unittest.main()
|
@@ -68,6 +68,10 @@ def simple_popc(ary, c):
|
|
68
68
|
ary[0] = cuda.popc(c)
|
69
69
|
|
70
70
|
|
71
|
+
def simple_bit_count(ary, c):
|
72
|
+
ary[0] = c.bit_count()
|
73
|
+
|
74
|
+
|
71
75
|
def simple_fma(ary, a, b, c):
|
72
76
|
ary[0] = cuda.fma(a, b, c)
|
73
77
|
|
@@ -550,17 +554,53 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
550
554
|
|
551
555
|
self.assertTrue(np.all(arr))
|
552
556
|
|
557
|
+
def test_popc_u1(self):
|
558
|
+
compiled = cuda.jit("void(int32[:], uint8)")(simple_popc)
|
559
|
+
ary = np.zeros(1, dtype=np.int8)
|
560
|
+
compiled[1, 1](ary, np.uint8(0xFF))
|
561
|
+
self.assertEqual(ary[0], 8)
|
562
|
+
|
563
|
+
def test_popc_u2(self):
|
564
|
+
compiled = cuda.jit("void(int32[:], uint16)")(simple_popc)
|
565
|
+
ary = np.zeros(1, dtype=np.int16)
|
566
|
+
compiled[1, 1](ary, np.uint16(0xFFFF))
|
567
|
+
self.assertEqual(ary[0], 16)
|
568
|
+
|
553
569
|
def test_popc_u4(self):
|
554
570
|
compiled = cuda.jit("void(int32[:], uint32)")(simple_popc)
|
555
571
|
ary = np.zeros(1, dtype=np.int32)
|
556
|
-
compiled[1, 1](ary,
|
557
|
-
self.assertEqual(ary[0],
|
572
|
+
compiled[1, 1](ary, np.uint32(0xFFFFFFFF))
|
573
|
+
self.assertEqual(ary[0], 32)
|
558
574
|
|
559
575
|
def test_popc_u8(self):
|
560
576
|
compiled = cuda.jit("void(int32[:], uint64)")(simple_popc)
|
561
577
|
ary = np.zeros(1, dtype=np.int32)
|
562
|
-
compiled[1, 1](ary,
|
563
|
-
self.assertEqual(ary[0],
|
578
|
+
compiled[1, 1](ary, np.uint64(0xFFFFFFFFFFFFFFFF))
|
579
|
+
self.assertEqual(ary[0], 64)
|
580
|
+
|
581
|
+
def test_bit_count_u1(self):
|
582
|
+
compiled = cuda.jit("void(int32[:], uint8)")(simple_bit_count)
|
583
|
+
ary = np.zeros(1, dtype=np.int8)
|
584
|
+
compiled[1, 1](ary, np.uint8(0xFF))
|
585
|
+
self.assertEqual(ary[0], 8)
|
586
|
+
|
587
|
+
def test_bit_count_u2(self):
|
588
|
+
compiled = cuda.jit("void(int32[:], uint16)")(simple_bit_count)
|
589
|
+
ary = np.zeros(1, dtype=np.int16)
|
590
|
+
compiled[1, 1](ary, np.uint16(0xFFFF))
|
591
|
+
self.assertEqual(ary[0], 16)
|
592
|
+
|
593
|
+
def test_bit_count_u4(self):
|
594
|
+
compiled = cuda.jit("void(int32[:], uint32)")(simple_bit_count)
|
595
|
+
ary = np.zeros(1, dtype=np.int32)
|
596
|
+
compiled[1, 1](ary, np.uint32(0xFFFFFFFF))
|
597
|
+
self.assertEqual(ary[0], 32)
|
598
|
+
|
599
|
+
def test_bit_count_u8(self):
|
600
|
+
compiled = cuda.jit("void(int32[:], uint64)")(simple_bit_count)
|
601
|
+
ary = np.zeros(1, dtype=np.int32)
|
602
|
+
compiled[1, 1](ary, np.uint64(0xFFFFFFFFFFFFFFFF))
|
603
|
+
self.assertEqual(ary[0], 64)
|
564
604
|
|
565
605
|
def test_fma_f4(self):
|
566
606
|
compiled = cuda.jit("void(f4[:], f4, f4, f4)")(simple_fma)
|
@@ -32,6 +32,21 @@ cuda.synchronize()
|
|
32
32
|
"""
|
33
33
|
|
34
34
|
|
35
|
+
printbool_usecase = """\
|
36
|
+
from numba import cuda
|
37
|
+
|
38
|
+
@cuda.jit
|
39
|
+
def printbool(x):
|
40
|
+
print(True)
|
41
|
+
print(False)
|
42
|
+
print(x == 0)
|
43
|
+
|
44
|
+
printbool[1, 1](0)
|
45
|
+
printbool[1, 1](1)
|
46
|
+
cuda.synchronize()
|
47
|
+
"""
|
48
|
+
|
49
|
+
|
35
50
|
printstring_usecase = """\
|
36
51
|
from numba import cuda
|
37
52
|
|
@@ -109,6 +124,11 @@ class TestPrint(CUDATestCase):
|
|
109
124
|
expected_cases = ["0 23 34.750000 321", "0 23 34.75 321"]
|
110
125
|
self.assertIn(output.strip(), expected_cases)
|
111
126
|
|
127
|
+
def test_bool(self):
|
128
|
+
output, _ = self.run_code(printbool_usecase)
|
129
|
+
expected = "True\nFalse\nTrue\nTrue\nFalse\nFalse"
|
130
|
+
self.assertEqual(output.strip(), expected)
|
131
|
+
|
112
132
|
def test_printempty(self):
|
113
133
|
output, _ = self.run_code(printempty_usecase)
|
114
134
|
self.assertEqual(output.strip(), "")
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Generates the input files used by the pynvjitlink binding test suite
|
2
|
+
|
3
|
+
# Test binaries are built taking into account the CC of the GPU in the test machine
|
4
|
+
GPU_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv | grep -v compute_cap | head -n 1 | sed 's/\.//')
|
5
|
+
GPU_CC ?= 75
|
6
|
+
|
7
|
+
# Use CC 7.0 as an alternative in fatbin testing, unless CC is 7.x
|
8
|
+
ifeq ($(shell echo "$(GPU_CC)" | cut -c1),7)
|
9
|
+
ALT_CC := 80
|
10
|
+
else
|
11
|
+
ALT_CC := 70
|
12
|
+
endif
|
13
|
+
|
14
|
+
# Gencode flags suitable for most tests
|
15
|
+
GENCODE := -gencode arch=compute_$(GPU_CC),code=sm_$(GPU_CC)
|
16
|
+
|
17
|
+
# Fatbin tests need to generate code for an additional compute capability
|
18
|
+
FATBIN_GENCODE := $(GENCODE) -gencode arch=compute_$(ALT_CC),code=sm_$(ALT_CC)
|
19
|
+
|
20
|
+
# LTO-IR tests need to generate for the LTO "architecture" instead
|
21
|
+
LTOIR_GENCODE := -gencode arch=lto_$(GPU_CC),code=lto_$(GPU_CC)
|
22
|
+
|
23
|
+
# Compile with optimization; use relocatable device code to preserve device
|
24
|
+
# functions in the final output
|
25
|
+
NVCC_FLAGS := -O3 -rdc true
|
26
|
+
|
27
|
+
# Flags specific to output type
|
28
|
+
CUBIN_FLAGS := $(GENCODE) --cubin
|
29
|
+
PTX_FLAGS := $(GENCODE) -ptx
|
30
|
+
OBJECT_FLAGS := $(GENCODE) -dc
|
31
|
+
LIBRARY_FLAGS := $(GENCODE) -lib
|
32
|
+
FATBIN_FLAGS := $(FATBIN_GENCODE) --fatbin
|
33
|
+
LTOIR_FLAGS := $(LTOIR_GENCODE) -dc
|
34
|
+
|
35
|
+
OUTPUT_DIR := ./
|
36
|
+
|
37
|
+
all:
|
38
|
+
@echo "GPU CC: $(GPU_CC)"
|
39
|
+
@echo "Alternative CC: $(ALT_CC)"
|
40
|
+
# Compile all test objects
|
41
|
+
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/undefined_extern.cubin undefined_extern.cu
|
42
|
+
nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.cubin test_device_functions.cu
|
43
|
+
nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.fatbin test_device_functions.cu
|
44
|
+
nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ptx test_device_functions.cu
|
45
|
+
nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.o test_device_functions.cu
|
46
|
+
nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.a test_device_functions.cu
|
47
|
+
|
48
|
+
# Generate LTO-IR wrapped in a fatbin
|
49
|
+
nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ltoir.o test_device_functions.cu
|
50
|
+
# Generate LTO-IR in a "raw" LTO-IR container
|
51
|
+
python generate_raw_ltoir.py --arch sm_$(GPU_CC) -o $(OUTPUT_DIR)/test_device_functions.ltoir test_device_functions.cu
|
@@ -0,0 +1,163 @@
|
|
1
|
+
# Copyright (c) 2024, NVIDIA CORPORATION.
|
2
|
+
|
3
|
+
import argparse
|
4
|
+
import pathlib
|
5
|
+
import subprocess
|
6
|
+
import sys
|
7
|
+
|
8
|
+
from cuda import nvrtc
|
9
|
+
|
10
|
+
# Magic number found at the start of an LTO-IR file
|
11
|
+
LTOIR_MAGIC = 0x7F4E43ED
|
12
|
+
|
13
|
+
|
14
|
+
def check(args):
|
15
|
+
"""
|
16
|
+
Abort and print an error message in the presence of an error result.
|
17
|
+
|
18
|
+
Otherwise:
|
19
|
+
- Return None if there were no more arguments,
|
20
|
+
- Return the singular argument if there was only one further argument,
|
21
|
+
- Return the tuple of arguments if multiple followed.
|
22
|
+
"""
|
23
|
+
|
24
|
+
result, *args = args
|
25
|
+
value = result.value
|
26
|
+
|
27
|
+
if value:
|
28
|
+
error_string = check(nvrtc.nvrtcGetErrorString(result)).decode()
|
29
|
+
msg = f"NVRTC error, code {value}: {error_string}"
|
30
|
+
print(msg, file=sys.stderr)
|
31
|
+
sys.exit(1)
|
32
|
+
|
33
|
+
if len(args) == 0:
|
34
|
+
return None
|
35
|
+
elif len(args) == 1:
|
36
|
+
return args[0]
|
37
|
+
else:
|
38
|
+
return args
|
39
|
+
|
40
|
+
|
41
|
+
def determine_include_flags():
|
42
|
+
# Inspired by the logic in FindCUDAToolkit.cmake. We need the CUDA include
|
43
|
+
# paths because NVRTC doesn't add them by default, and we can compile a
|
44
|
+
# much broader set of test files if the CUDA includes are available.
|
45
|
+
|
46
|
+
# We invoke NVCC in verbose mode ("-v") and give a dummy filename, without
|
47
|
+
# which it won't produce output.
|
48
|
+
|
49
|
+
cmd = ["nvcc", "-v", "__dummy"]
|
50
|
+
cp = subprocess.run(cmd, capture_output=True)
|
51
|
+
|
52
|
+
# Since the dummy file doesn't actually exist, NVCC is expected to exit
|
53
|
+
# with an error code of 1.
|
54
|
+
rc = cp.returncode
|
55
|
+
if rc != 1:
|
56
|
+
print(f"Unexpected return code ({rc}) from `nvcc -v`. Expected 1.")
|
57
|
+
return None
|
58
|
+
|
59
|
+
output = cp.stderr.decode()
|
60
|
+
lines = output.splitlines()
|
61
|
+
|
62
|
+
includes_lines = [line for line in lines if line.startswith("#$ INCLUDES=")]
|
63
|
+
if len(includes_lines) != 1:
|
64
|
+
print(f"Expected exactly one INCLUDES line. Got {len(includes_lines)}.")
|
65
|
+
return None
|
66
|
+
|
67
|
+
# Parse out the arguments following "INCLUDES=" - these are a space
|
68
|
+
# separated list of strings that are potentially quoted.
|
69
|
+
|
70
|
+
quoted_flags = includes_lines[0].split("INCLUDES=")[1].strip().split()
|
71
|
+
include_flags = [flag.strip('"') for flag in quoted_flags]
|
72
|
+
print(f"Using CUDA include flags: {include_flags}")
|
73
|
+
|
74
|
+
return include_flags
|
75
|
+
|
76
|
+
|
77
|
+
def get_ltoir(source, name, arch):
|
78
|
+
"""Given a CUDA C/C++ source, compile it and return the LTO-IR."""
|
79
|
+
|
80
|
+
program = check(
|
81
|
+
nvrtc.nvrtcCreateProgram(source.encode(), name.encode(), 0, [], [])
|
82
|
+
)
|
83
|
+
|
84
|
+
cuda_include_flags = determine_include_flags()
|
85
|
+
if cuda_include_flags is None:
|
86
|
+
print("Error determining CUDA include flags. Exiting.", file=sys.stderr)
|
87
|
+
sys.exit(1)
|
88
|
+
|
89
|
+
options = [
|
90
|
+
f"--gpu-architecture={arch}",
|
91
|
+
"-dlto",
|
92
|
+
"-rdc",
|
93
|
+
"true",
|
94
|
+
*cuda_include_flags,
|
95
|
+
]
|
96
|
+
options = [o.encode() for o in options]
|
97
|
+
|
98
|
+
result = nvrtc.nvrtcCompileProgram(program, len(options), options)
|
99
|
+
|
100
|
+
# Report compilation errors back to the user
|
101
|
+
if result[0] == nvrtc.nvrtcResult.NVRTC_ERROR_COMPILATION:
|
102
|
+
log_size = check(nvrtc.nvrtcGetProgramLogSize(program))
|
103
|
+
log = b" " * log_size
|
104
|
+
check(nvrtc.nvrtcGetProgramLog(program, log))
|
105
|
+
print("NVRTC compilation error:\n", file=sys.stderr)
|
106
|
+
print(log.decode(), file=sys.stderr)
|
107
|
+
sys.exit(1)
|
108
|
+
|
109
|
+
# Handle other errors in the standard way
|
110
|
+
check(result)
|
111
|
+
|
112
|
+
ltoir_size = check(nvrtc.nvrtcGetLTOIRSize(program))
|
113
|
+
ltoir = b" " * ltoir_size
|
114
|
+
check(nvrtc.nvrtcGetLTOIR(program, ltoir))
|
115
|
+
|
116
|
+
# Check that the output looks like an LTO-IR container
|
117
|
+
header = int.from_bytes(ltoir[:4], byteorder="little")
|
118
|
+
if header != LTOIR_MAGIC:
|
119
|
+
print(
|
120
|
+
f"Unexpected header value 0x{header:X}.\n"
|
121
|
+
f"Expected LTO-IR magic number 0x{LTOIR_MAGIC:X}."
|
122
|
+
"\nExiting.",
|
123
|
+
file=sys.stderr,
|
124
|
+
)
|
125
|
+
sys.exit(1)
|
126
|
+
|
127
|
+
return ltoir
|
128
|
+
|
129
|
+
|
130
|
+
def main(sourcepath, outputpath, arch):
|
131
|
+
with open(sourcepath) as f:
|
132
|
+
source = f.read()
|
133
|
+
|
134
|
+
name = pathlib.Path(sourcepath).name
|
135
|
+
ltoir = get_ltoir(source, name, arch)
|
136
|
+
|
137
|
+
print(f"Writing {outputpath}...")
|
138
|
+
|
139
|
+
with open(outputpath, "wb") as f:
|
140
|
+
f.write(ltoir)
|
141
|
+
|
142
|
+
|
143
|
+
if __name__ == "__main__":
|
144
|
+
description = "Compiles CUDA C/C++ to LTO-IR using NVRTC."
|
145
|
+
parser = argparse.ArgumentParser(description=description)
|
146
|
+
parser.add_argument("sourcepath", help="path to source file")
|
147
|
+
parser.add_argument(
|
148
|
+
"-o", "--output", help="path to output file", default=None
|
149
|
+
)
|
150
|
+
parser.add_argument(
|
151
|
+
"-a",
|
152
|
+
"--arch",
|
153
|
+
help="compute arch to target (e.g. sm_87). " "Defaults to sm_50.",
|
154
|
+
default="sm_50",
|
155
|
+
)
|
156
|
+
|
157
|
+
args = parser.parse_args()
|
158
|
+
outputpath = args.output
|
159
|
+
|
160
|
+
if outputpath is None:
|
161
|
+
outputpath = pathlib.Path(args.sourcepath).with_suffix(".ltoir")
|
162
|
+
|
163
|
+
main(args.sourcepath, outputpath, args.arch)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#include <cuda_fp16.h>
|
2
|
+
|
3
|
+
extern __device__ bool __heq(__half arg1, __half arg2);
|
4
|
+
|
5
|
+
__device__ __half test_add_fp16(__half arg1, __half arg2) {
|
6
|
+
return __hadd(arg1, arg2);
|
7
|
+
}
|
8
|
+
|
9
|
+
__device__ bool test_cmp_fp16(__half arg1, __half arg2) {
|
10
|
+
return __heq(arg1, arg2);
|
11
|
+
}
|
12
|
+
|
13
|
+
typedef unsigned int uint32_t;
|
14
|
+
|
15
|
+
extern "C" __device__ int add_from_numba(uint32_t *result, uint32_t a,
|
16
|
+
uint32_t b) {
|
17
|
+
*result = a + b;
|
18
|
+
return 0;
|
19
|
+
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
_numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
|
2
2
|
_numba_cuda_redirector.py,sha256=rc56rnb40w3AtrqnhS66JSgYTSTsi3iTn8yP3NuoQV8,2401
|
3
|
-
numba_cuda/VERSION,sha256=
|
3
|
+
numba_cuda/VERSION,sha256=9p4BNLUELS6P4gQF_geoXDc4ldjt9TTmnJlhGbwWsO0,7
|
4
4
|
numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
|
5
5
|
numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
|
6
6
|
numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
|
@@ -8,7 +8,7 @@ numba_cuda/numba/cuda/api.py,sha256=shLu7NEZHRMcaZAMEXSoyA5Gi5m0tm6ZRymxKLEKCSg,
|
|
8
8
|
numba_cuda/numba/cuda/api_util.py,sha256=aQfUV2-4RM_oGVvckMjbMr5e3effOQNX04v1T0O2EfQ,861
|
9
9
|
numba_cuda/numba/cuda/args.py,sha256=HloHkw_PQal2DT-I70Xf_XbnGObS1jiUgcRrQ85Gq28,1978
|
10
10
|
numba_cuda/numba/cuda/cg.py,sha256=9V1uZqyGOJX1aFd9c6GAPbLSqq83lE8LoP-vxxrKENY,1490
|
11
|
-
numba_cuda/numba/cuda/codegen.py,sha256=
|
11
|
+
numba_cuda/numba/cuda/codegen.py,sha256=9LnTlei-4JK7iq3Rg-H2Y19Oh_u5ZXMC_CPfattANjw,12358
|
12
12
|
numba_cuda/numba/cuda/compiler.py,sha256=47SjuI5p4yWCujAglIq0Cb0ARO8QxRp4fOZropkNMtQ,16001
|
13
13
|
numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=iv84_F6Q9kFjV_kclrQz1msh6Dud8mI3qNkswTid7Qc,953
|
14
14
|
numba_cuda/numba/cuda/cuda_fp16.h,sha256=1IC0mdNdkvKbvAe0-f4uYVS7WFrVqOyI1nRUbBiqr6A,126844
|
@@ -19,14 +19,14 @@ numba_cuda/numba/cuda/cudaimpl.py,sha256=3YMxQSCv2KClBrpuXGchrTNICV1F6NIjjL2rie5
|
|
19
19
|
numba_cuda/numba/cuda/cudamath.py,sha256=EFNtdzEytAZuwijdRoFGzVKCeal76UzzaNy7wUFQx8I,3978
|
20
20
|
numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZpwJocM,7823
|
21
21
|
numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
|
22
|
-
numba_cuda/numba/cuda/device_init.py,sha256=
|
22
|
+
numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
|
23
23
|
numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
|
24
|
-
numba_cuda/numba/cuda/dispatcher.py,sha256=
|
24
|
+
numba_cuda/numba/cuda/dispatcher.py,sha256=CwFksBBcjNg9dLSTgC4GgqOy2sLeZYX8mvZvdzscGBw,40206
|
25
25
|
numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
|
26
26
|
numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
|
27
27
|
numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
|
28
28
|
numba_cuda/numba/cuda/intrinsic_wrapper.py,sha256=zbcUbegbfF3GdnC2Rl-z26-gozE8xBtaMxpS8LpOhfo,2239
|
29
|
-
numba_cuda/numba/cuda/intrinsics.py,sha256=
|
29
|
+
numba_cuda/numba/cuda/intrinsics.py,sha256=k0mQYAt0FTlJeghE5V8lSBtO4fgKH1jSRRLwHHcH4M0,6100
|
30
30
|
numba_cuda/numba/cuda/libdevice.py,sha256=476LeIEaAth409m-0OO1SMMmY5AHzN2AotXI__k_yYE,60065
|
31
31
|
numba_cuda/numba/cuda/libdevicedecl.py,sha256=xdZbb_rCaftMf8Pbw63g_Lr230N-1QoaYzBxq8udKTg,532
|
32
32
|
numba_cuda/numba/cuda/libdevicefuncs.py,sha256=c80lGpGoFIYkAdgr4fzbxzdNCyJYrLdss64bwa0Mc6w,37471
|
@@ -34,11 +34,11 @@ numba_cuda/numba/cuda/libdeviceimpl.py,sha256=a9BmJ5kRtZ_mB7KjbDWW-PEpRuNiO_SMOx
|
|
34
34
|
numba_cuda/numba/cuda/mathimpl.py,sha256=d_gCoQ4hJzNBFNc2hvRON5h1F052epgQ8zh_RKTlLlI,14416
|
35
35
|
numba_cuda/numba/cuda/models.py,sha256=2c_seT-cWX-VyWYmcapaqOEl1M4FX6_kdIOusj4s5aE,1328
|
36
36
|
numba_cuda/numba/cuda/nvvmutils.py,sha256=W1zr1TpnmFjTkHF0qeu5wnBHub6gzrnpzsvgmu2OLcU,8295
|
37
|
-
numba_cuda/numba/cuda/printimpl.py,sha256=
|
37
|
+
numba_cuda/numba/cuda/printimpl.py,sha256=Y1BCQ7EgO2wQ7O6LibNVYBG3tmjVTvmURATW403rLao,3504
|
38
38
|
numba_cuda/numba/cuda/random.py,sha256=khX8iDdde_RTUPWhAqrxZacHRQAorFr7BokPuxRWzrg,10456
|
39
39
|
numba_cuda/numba/cuda/simulator_init.py,sha256=W_bPRtmPGOQVuiprbgt7ENnnnELv_LPCeLDIsfsvFZ8,460
|
40
40
|
numba_cuda/numba/cuda/stubs.py,sha256=W3tozv4ganMnfbdFqyPjgQXYeX8GQhwx_xXgv8jk6iM,22270
|
41
|
-
numba_cuda/numba/cuda/target.py,sha256=
|
41
|
+
numba_cuda/numba/cuda/target.py,sha256=LUOJRvGrX7Ch3-vLbZcjti21RAwUctdodVVcl82wYJ0,16954
|
42
42
|
numba_cuda/numba/cuda/testing.py,sha256=E0wP2vfno1yWsl0v1zg31kpbU8FrKxTF-5y9Iv4WjA4,6412
|
43
43
|
numba_cuda/numba/cuda/types.py,sha256=WVfjcly_VUpG9FfKueiEPzZm2NV8Hg0XAFg3bNzPdVc,1314
|
44
44
|
numba_cuda/numba/cuda/ufuncs.py,sha256=txw27IxG80W1Yo7e-XwL2AMcQo0fMnxMjBIMy-n5pCo,23317
|
@@ -47,12 +47,14 @@ numba_cuda/numba/cuda/vectorizers.py,sha256=u_0EzaD5tqVH8uOz4Gmqn3FgPC1rckwDAQuR
|
|
47
47
|
numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=0TL4MZcJXUoo9qA7uu0vLv7eHrXRerVmyfi7O149ITw,199
|
48
48
|
numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=B3ItYQywTnwTWjltxVRx6oaKRq7rxTtvOaiqTWsMQ2w,31123
|
49
49
|
numba_cuda/numba/cuda/cudadrv/devices.py,sha256=6SneNmoq83gue0txFWWx4A65vViAa8xA06FzkApoqAk,7992
|
50
|
-
numba_cuda/numba/cuda/cudadrv/driver.py,sha256=
|
50
|
+
numba_cuda/numba/cuda/cudadrv/driver.py,sha256=uPjKugdtSJfIwVSAo3KgkvQhctbABkQphHAfcq6Q7ec,110892
|
51
51
|
numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=52ms3X6hfPaQB8E1jb6g7QKqRvHzBMlDQ-V2DM1rXxQ,17178
|
52
52
|
numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWxqEDJedpwDPEZ44,14209
|
53
|
-
numba_cuda/numba/cuda/cudadrv/enums.py,sha256=
|
53
|
+
numba_cuda/numba/cuda/cudadrv/enums.py,sha256=37zZmyrLvT-7R8wWtwKJkQhN8siLMxsDGiA3_NQ-yx8,23740
|
54
54
|
numba_cuda/numba/cuda/cudadrv/error.py,sha256=zEIryW6aIy8GG4ypmTliB6RgY4Gy2n8ckz7I6W99LUM,524
|
55
55
|
numba_cuda/numba/cuda/cudadrv/libs.py,sha256=PRyxal4bz9jVZmuLpKiYw-VaR59LekfwJgWKo7R5uRY,6005
|
56
|
+
numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=Q_YTv0apBo9t8pkMlKrthPPSVeLd376ZTmVDF5NtVVo,1328
|
57
|
+
numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
|
56
58
|
numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
|
57
59
|
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=CLpuD9VzPcYoXj8dZ2meSoqbWXHOOC5V5D6dFNdXqmg,9693
|
58
60
|
numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=v2hJJTAQeRmoG59-hnhgMEp5BSVA73QHtEoy636VKao,24107
|
@@ -100,6 +102,7 @@ numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py,sha256=0KPe4E9wOZsSV_0QI0Lmj
|
|
100
102
|
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py,sha256=_l2_EQEko2Jet5ooj4XMT0L4BjOuqLjbONGj1_MVI50,10161
|
101
103
|
numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py,sha256=kYXYMkx_3GPAITKp4reLeM8KSzKkpxiC8nxnBvXpaTA,4979
|
102
104
|
numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py,sha256=984jATSa01SRoSrVqxPeO6ujJ7w2jsnZa39ABInFLVI,1529
|
105
|
+
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=m5zv6K6PHLnm-AqHKo5x9f_ZBrn3rmvPX_ZGjjrkPfI,6807
|
103
106
|
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=DF7KV5uh-yMztks0f47NhpalV64dvsNy-f8HY6GhAhE,7373
|
104
107
|
numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py,sha256=u_TthSS2N-2J4eBIuF4PGg33AjD-wxly7MKpz0vRAKc,944
|
105
108
|
numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py,sha256=MQWZx1j3lbEpWmIpQ1bV9szrGOV3VHN0QrEnJRjAhW4,508
|
@@ -151,7 +154,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py,sha256=Uhe8Q0u42jySrpwA
|
|
151
154
|
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py,sha256=luDtBxFS_5ZbVemXe1Z7gfqMliaU_EAOR4SuLsU5rhw,2677
|
152
155
|
numba_cuda/numba/cuda/tests/cudapy/test_idiv.py,sha256=HLJ_f2lX8m_NNJjUbl_8zZ0-8GsBlRdBP2CUo_yWb0Y,1056
|
153
156
|
numba_cuda/numba/cuda/tests/cudapy/test_inspect.py,sha256=lP9-8SbWFn2Xc-qmF6UNhcY6LreKTnveaK5CGW2pu8E,5196
|
154
|
-
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=
|
157
|
+
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=M6-pad8nVM0fuL18uFxvE6tmHw0spLNhnMBLVlO0FKU,36400
|
155
158
|
numba_cuda/numba/cuda/tests/cudapy/test_ipc.py,sha256=fggyy-kmsOkCb906_q3kXPGRziccWu7Co7ir83zBMwM,10536
|
156
159
|
numba_cuda/numba/cuda/tests/cudapy/test_iterators.py,sha256=daQW3kSkp7icCmlTn9pCvnaauz60k_eBf4x1UQF-XVY,2344
|
157
160
|
numba_cuda/numba/cuda/tests/cudapy/test_lang.py,sha256=U1BCVZMjU1AZ4wDSmjsRIPPcAReiq4dB77Cz7GmrdmA,1691
|
@@ -172,7 +175,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_operator.py,sha256=0nJej4D898_JU-jhlif44
|
|
172
175
|
numba_cuda/numba/cuda/tests/cudapy/test_optimization.py,sha256=SvqRsSFgcGxkFDZS-kul5B-mi8GxINTS98uUzAy4dhw,2647
|
173
176
|
numba_cuda/numba/cuda/tests/cudapy/test_overload.py,sha256=u4yUDVFcV9E3NWMlNjM81e3IW4KaIkcDtXig8JYevsw,8538
|
174
177
|
numba_cuda/numba/cuda/tests/cudapy/test_powi.py,sha256=TI82rYRnkSnwv9VN6PMpBnr9JqMJ_F3HhH4cKY6O8tw,3276
|
175
|
-
numba_cuda/numba/cuda/tests/cudapy/test_print.py,sha256
|
178
|
+
numba_cuda/numba/cuda/tests/cudapy/test_print.py,sha256=QXhhhnEz1d5BlldLINQVnmuHeM_dT3aLvfGS7jm24nE,4451
|
176
179
|
numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py,sha256=R88Vfgg3mSAZ0Jy6WT6dJNmkFTsxnVnEmO7XqpqyxuU,986
|
177
180
|
numba_cuda/numba/cuda/tests/cudapy/test_random.py,sha256=rLw7_8a7BBhD_8GNqMal0l_AbWXzLs_Q0hC6_X8gdjA,3467
|
178
181
|
numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py,sha256=grR64kdRlsLcR0K3IxSfI2VKsTrrqxsXuROOpvj-6nw,18769
|
@@ -224,8 +227,12 @@ numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py,sha256=o4DYocyHK7
|
|
224
227
|
numba_cuda/numba/cuda/tests/nocuda/test_import.py,sha256=teiL8rpFGQOh41kyBSSNHHFYAJYgpdStXkTcpK4_fxo,1641
|
225
228
|
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA9Ym-iT_B972bgFRu3UkRtwIgWtuI,7948
|
226
229
|
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
|
227
|
-
numba_cuda
|
228
|
-
numba_cuda
|
229
|
-
numba_cuda
|
230
|
-
numba_cuda
|
231
|
-
numba_cuda-0.0.
|
230
|
+
numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=OFC_6irwscCNGAyJJKq7fTchzWosCUuiVWU02m0bcUQ,2248
|
231
|
+
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=OqqmFhDk3c0Edt4AvAGm0MQRCXb9jLSO2wpQ72oiXXI,4838
|
232
|
+
numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
|
233
|
+
numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
|
234
|
+
numba_cuda-0.0.18.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
|
235
|
+
numba_cuda-0.0.18.dist-info/METADATA,sha256=kJletXn1FHyLocorf4n5QLO1TH0v6G_8uNkbqBAwiWY,1393
|
236
|
+
numba_cuda-0.0.18.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
237
|
+
numba_cuda-0.0.18.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
|
238
|
+
numba_cuda-0.0.18.dist-info/RECORD,,
|
File without changes
|
File without changes
|