numba-cuda 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
- numba_cuda/numba/cuda/api.py +6 -1
- numba_cuda/numba/cuda/bf16.py +285 -2
- numba_cuda/numba/cuda/cgutils.py +2 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/compiler.py +373 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +2 -2
- numba_cuda/numba/cuda/core/compiler.py +6 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +48 -26
- numba_cuda/numba/cuda/core/ir_utils.py +15 -26
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +738 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +422 -246
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
- numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
- numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
- numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +5 -1
- numba_cuda/numba/cuda/debuginfo.py +85 -2
- numba_cuda/numba/cuda/decorators.py +3 -3
- numba_cuda/numba/cuda/descriptor.py +3 -4
- numba_cuda/numba/cuda/deviceufunc.py +66 -2
- numba_cuda/numba/cuda/dispatcher.py +18 -39
- numba_cuda/numba/cuda/flags.py +141 -1
- numba_cuda/numba/cuda/fp16.py +0 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/lowering.py +7 -144
- numba_cuda/numba/cuda/mathimpl.py +2 -1
- numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +9 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +1 -1
- numba_cuda/numba/cuda/printimpl.py +12 -1
- numba_cuda/numba/cuda/random.py +1 -1
- numba_cuda/numba/cuda/serialize.py +1 -1
- numba_cuda/numba/cuda/simulator/__init__.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/compiler.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
- numba_cuda/numba/cuda/target.py +35 -17
- numba_cuda/numba/cuda/testing.py +4 -19
- numba_cuda/numba/cuda/tests/__init__.py +1 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
- numba_cuda/numba/cuda/tests/support.py +55 -15
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +56 -0
- numba_cuda/numba/cuda/typing/__init__.py +9 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +7 -6
- numba_cuda/numba/cuda/ufuncs.py +3 -3
- numba_cuda/numba/cuda/utils.py +6 -112
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +2 -1
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/RECORD +170 -115
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -19,7 +19,8 @@ import numba
|
|
|
19
19
|
from numba import _devicearray
|
|
20
20
|
from numba.cuda.cudadrv import devices, dummyarray
|
|
21
21
|
from numba.cuda.cudadrv import driver as _driver
|
|
22
|
-
from numba.core import types
|
|
22
|
+
from numba.core import types
|
|
23
|
+
from numba.cuda.core import config
|
|
23
24
|
from numba.np.unsafe.ndarray import to_fixed_tuple
|
|
24
25
|
from numba.np.numpy_support import numpy_version
|
|
25
26
|
from numba.np import numpy_support
|
|
@@ -47,7 +47,7 @@ from collections import namedtuple, deque
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
from numba import mviewbuf
|
|
50
|
-
from numba.core import config
|
|
50
|
+
from numba.cuda.core import config
|
|
51
51
|
from numba.cuda import utils, serialize
|
|
52
52
|
from .error import CudaSupportError, CudaDriverError
|
|
53
53
|
from .drvapi import API_PROTOTYPES
|
|
@@ -82,12 +82,6 @@ _py_incref = ctypes.pythonapi.Py_IncRef
|
|
|
82
82
|
_py_decref.argtypes = [ctypes.py_object]
|
|
83
83
|
_py_incref.argtypes = [ctypes.py_object]
|
|
84
84
|
|
|
85
|
-
|
|
86
|
-
_MVC_ERROR_MESSAGE = (
|
|
87
|
-
"Minor version compatibility requires ptxcompiler and cubinlinker packages "
|
|
88
|
-
"to be available"
|
|
89
|
-
)
|
|
90
|
-
|
|
91
85
|
USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
|
|
92
86
|
|
|
93
87
|
if USE_NV_BINDING:
|
|
@@ -137,7 +131,7 @@ def _have_nvjitlink():
|
|
|
137
131
|
nvjitlink_internal._inspect_function_pointer("__nvJitLinkVersion")
|
|
138
132
|
!= 0
|
|
139
133
|
)
|
|
140
|
-
except NotSupportedError:
|
|
134
|
+
except (RuntimeError, NotSupportedError):
|
|
141
135
|
# no driver
|
|
142
136
|
return False
|
|
143
137
|
|
|
@@ -161,12 +155,6 @@ class CudaAPIError(CudaDriverError):
|
|
|
161
155
|
|
|
162
156
|
|
|
163
157
|
def locate_driver_and_loader():
|
|
164
|
-
envpath = config.CUDA_DRIVER
|
|
165
|
-
|
|
166
|
-
if envpath == "0":
|
|
167
|
-
# Force fail
|
|
168
|
-
_raise_driver_not_found()
|
|
169
|
-
|
|
170
158
|
# Determine DLL type
|
|
171
159
|
if sys.platform == "win32":
|
|
172
160
|
dlloader = ctypes.WinDLL
|
|
@@ -182,26 +170,11 @@ def locate_driver_and_loader():
|
|
|
182
170
|
dldir = ["/usr/lib", "/usr/lib64"]
|
|
183
171
|
dlnames = ["libcuda.so", "libcuda.so.1"]
|
|
184
172
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
"NUMBA_CUDA_DRIVER %s is not a valid path" % envpath
|
|
191
|
-
)
|
|
192
|
-
if not os.path.isfile(envpath):
|
|
193
|
-
raise ValueError(
|
|
194
|
-
"NUMBA_CUDA_DRIVER %s is not a valid file "
|
|
195
|
-
"path. Note it must be a filepath of the .so/"
|
|
196
|
-
".dll/.dylib or the driver" % envpath
|
|
197
|
-
)
|
|
198
|
-
candidates = [envpath]
|
|
199
|
-
else:
|
|
200
|
-
# First search for the name in the default library path.
|
|
201
|
-
# If that is not found, try the specific path.
|
|
202
|
-
candidates = dlnames + [
|
|
203
|
-
os.path.join(x, y) for x, y in product(dldir, dlnames)
|
|
204
|
-
]
|
|
173
|
+
# First search for the name in the default library path.
|
|
174
|
+
# If that is not found, try specific common paths.
|
|
175
|
+
candidates = dlnames + [
|
|
176
|
+
os.path.join(x, y) for x, y in product(dldir, dlnames)
|
|
177
|
+
]
|
|
205
178
|
|
|
206
179
|
return dlloader, candidates
|
|
207
180
|
|
|
@@ -237,9 +210,7 @@ def find_driver():
|
|
|
237
210
|
|
|
238
211
|
DRIVER_NOT_FOUND_MSG = """
|
|
239
212
|
CUDA driver library cannot be found.
|
|
240
|
-
|
|
241
|
-
try setting environment variable NUMBA_CUDA_DRIVER
|
|
242
|
-
with the file path of the CUDA driver shared library.
|
|
213
|
+
Ensure that a compatible NVIDIA driver is installed and available on your system path.
|
|
243
214
|
"""
|
|
244
215
|
|
|
245
216
|
DRIVER_LOAD_ERROR_MSG = """
|
|
@@ -2842,10 +2813,7 @@ class _LinkerBase(metaclass=ABCMeta):
|
|
|
2842
2813
|
def add_cu(self, cu, name):
|
|
2843
2814
|
"""Add CUDA source in a string to the link. The name of the source
|
|
2844
2815
|
file should be specified in `name`."""
|
|
2845
|
-
|
|
2846
|
-
dev = driver.get_device(ac.devnum)
|
|
2847
|
-
cc = dev.compute_capability
|
|
2848
|
-
ptx, log = nvrtc.compile(cu, name, cc)
|
|
2816
|
+
ptx, log = nvrtc.compile(cu, name, self.cc)
|
|
2849
2817
|
|
|
2850
2818
|
if config.DUMP_ASSEMBLY:
|
|
2851
2819
|
print(("ASSEMBLY %s" % name).center(80, "-"))
|
|
@@ -3009,10 +2977,7 @@ class _Linker(_LinkerBase):
|
|
|
3009
2977
|
self._object_codes.append(obj)
|
|
3010
2978
|
|
|
3011
2979
|
def add_cu(self, cu, name="<cudapy-cu>"):
|
|
3012
|
-
|
|
3013
|
-
dev = driver.get_device(ac.devnum)
|
|
3014
|
-
cc = dev.compute_capability
|
|
3015
|
-
obj, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
|
|
2980
|
+
obj, log = nvrtc.compile(cu, name, self.cc, ltoir=self.lto)
|
|
3016
2981
|
|
|
3017
2982
|
if not self.lto and config.DUMP_ASSEMBLY:
|
|
3018
2983
|
print(("ASSEMBLY %s" % name).center(80, "-"))
|
|
@@ -3099,101 +3064,6 @@ class _Linker(_LinkerBase):
|
|
|
3099
3064
|
return result
|
|
3100
3065
|
|
|
3101
3066
|
|
|
3102
|
-
class MVCLinker(_LinkerBase):
|
|
3103
|
-
"""
|
|
3104
|
-
Linker supporting Minor Version Compatibility, backed by the cubinlinker
|
|
3105
|
-
package.
|
|
3106
|
-
"""
|
|
3107
|
-
|
|
3108
|
-
def __init__(self, max_registers=None, lineinfo=False, cc=None):
|
|
3109
|
-
try:
|
|
3110
|
-
from cubinlinker import CubinLinker
|
|
3111
|
-
except ImportError as err:
|
|
3112
|
-
raise ImportError(_MVC_ERROR_MESSAGE) from err
|
|
3113
|
-
|
|
3114
|
-
if cc is None:
|
|
3115
|
-
raise RuntimeError(
|
|
3116
|
-
"MVCLinker requires Compute Capability to be "
|
|
3117
|
-
"specified, but cc is None"
|
|
3118
|
-
)
|
|
3119
|
-
|
|
3120
|
-
super().__init__(max_registers, lineinfo, cc)
|
|
3121
|
-
|
|
3122
|
-
arch = f"sm_{cc[0] * 10 + cc[1]}"
|
|
3123
|
-
ptx_compile_opts = ["--gpu-name", arch, "-c"]
|
|
3124
|
-
if max_registers:
|
|
3125
|
-
arg = f"--maxrregcount={max_registers}"
|
|
3126
|
-
ptx_compile_opts.append(arg)
|
|
3127
|
-
if lineinfo:
|
|
3128
|
-
ptx_compile_opts.append("--generate-line-info")
|
|
3129
|
-
self.ptx_compile_options = tuple(ptx_compile_opts)
|
|
3130
|
-
|
|
3131
|
-
self._linker = CubinLinker(f"--arch={arch}")
|
|
3132
|
-
|
|
3133
|
-
@property
|
|
3134
|
-
def info_log(self):
|
|
3135
|
-
return self._linker.info_log
|
|
3136
|
-
|
|
3137
|
-
@property
|
|
3138
|
-
def error_log(self):
|
|
3139
|
-
return self._linker.error_log
|
|
3140
|
-
|
|
3141
|
-
def add_ptx(self, ptx, name="<cudapy-ptx>"):
|
|
3142
|
-
try:
|
|
3143
|
-
from ptxcompiler import compile_ptx
|
|
3144
|
-
from cubinlinker import CubinLinkerError
|
|
3145
|
-
except ImportError as err:
|
|
3146
|
-
raise ImportError(_MVC_ERROR_MESSAGE) from err
|
|
3147
|
-
compile_result = compile_ptx(ptx.decode(), self.ptx_compile_options)
|
|
3148
|
-
try:
|
|
3149
|
-
self._linker.add_cubin(compile_result.compiled_program, name)
|
|
3150
|
-
except CubinLinkerError as e:
|
|
3151
|
-
raise LinkerError from e
|
|
3152
|
-
|
|
3153
|
-
def add_data(self, data, kind, name):
|
|
3154
|
-
msg = "Adding in-memory data unsupported in the MVC linker"
|
|
3155
|
-
raise LinkerError(msg)
|
|
3156
|
-
|
|
3157
|
-
def add_file(self, path, kind):
|
|
3158
|
-
try:
|
|
3159
|
-
from cubinlinker import CubinLinkerError
|
|
3160
|
-
except ImportError as err:
|
|
3161
|
-
raise ImportError(_MVC_ERROR_MESSAGE) from err
|
|
3162
|
-
|
|
3163
|
-
try:
|
|
3164
|
-
data = cached_file_read(path, how="rb")
|
|
3165
|
-
except FileNotFoundError:
|
|
3166
|
-
raise LinkerError(f"{path} not found")
|
|
3167
|
-
|
|
3168
|
-
name = pathlib.Path(path).name
|
|
3169
|
-
if kind == FILE_EXTENSION_MAP["cubin"]:
|
|
3170
|
-
fn = self._linker.add_cubin
|
|
3171
|
-
elif kind == FILE_EXTENSION_MAP["fatbin"]:
|
|
3172
|
-
fn = self._linker.add_fatbin
|
|
3173
|
-
elif kind == FILE_EXTENSION_MAP["a"]:
|
|
3174
|
-
raise LinkerError(f"Don't know how to link {kind}")
|
|
3175
|
-
elif kind == FILE_EXTENSION_MAP["ptx"]:
|
|
3176
|
-
return self.add_ptx(data, name)
|
|
3177
|
-
else:
|
|
3178
|
-
raise LinkerError(f"Don't know how to link {kind}")
|
|
3179
|
-
|
|
3180
|
-
try:
|
|
3181
|
-
fn(data, name)
|
|
3182
|
-
except CubinLinkerError as e:
|
|
3183
|
-
raise LinkerError from e
|
|
3184
|
-
|
|
3185
|
-
def complete(self):
|
|
3186
|
-
try:
|
|
3187
|
-
from cubinlinker import CubinLinkerError
|
|
3188
|
-
except ImportError as err:
|
|
3189
|
-
raise ImportError(_MVC_ERROR_MESSAGE) from err
|
|
3190
|
-
|
|
3191
|
-
try:
|
|
3192
|
-
return self._linker.complete()
|
|
3193
|
-
except CubinLinkerError as e:
|
|
3194
|
-
raise LinkerError from e
|
|
3195
|
-
|
|
3196
|
-
|
|
3197
3067
|
class CtypesLinker(_LinkerBase):
|
|
3198
3068
|
"""
|
|
3199
3069
|
Links for current device if no CC given
|
|
@@ -3218,6 +3088,7 @@ class CtypesLinker(_LinkerBase):
|
|
|
3218
3088
|
if lineinfo:
|
|
3219
3089
|
options[enums.CU_JIT_GENERATE_LINE_INFO] = c_void_p(1)
|
|
3220
3090
|
|
|
3091
|
+
self.cc = cc
|
|
3221
3092
|
if cc is None:
|
|
3222
3093
|
# No option value is needed, but we need something as a placeholder
|
|
3223
3094
|
options[enums.CU_JIT_TARGET_FROM_CUCONTEXT] = 1
|
|
@@ -5,25 +5,118 @@ from collections import namedtuple
|
|
|
5
5
|
import itertools
|
|
6
6
|
import functools
|
|
7
7
|
import operator
|
|
8
|
-
import ctypes
|
|
9
8
|
|
|
10
|
-
import numpy as np
|
|
11
|
-
|
|
12
|
-
from numba import _helperlib
|
|
13
9
|
|
|
14
10
|
Extent = namedtuple("Extent", ["begin", "end"])
|
|
15
11
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
12
|
+
|
|
13
|
+
def attempt_nocopy_reshape(
|
|
14
|
+
nd, dims, strides, newnd, newdims, newstrides, itemsize, is_f_order
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Attempt to reshape an array without copying data.
|
|
18
|
+
|
|
19
|
+
This function should correctly handle all reshapes, including
|
|
20
|
+
axes of length 1. Zero strides should work but are untested.
|
|
21
|
+
|
|
22
|
+
If a copy is needed, returns 0
|
|
23
|
+
If no copy is needed, returns 1 and fills `newstrides`
|
|
24
|
+
with appropriate strides
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
olddims = []
|
|
28
|
+
oldstrides = []
|
|
29
|
+
oldnd = 0
|
|
30
|
+
|
|
31
|
+
# Remove axes with dimension 1 from the old array. They have no effect
|
|
32
|
+
# but would need special cases since their strides do not matter.
|
|
33
|
+
for oi in range(nd):
|
|
34
|
+
if dims[oi] != 1:
|
|
35
|
+
olddims.append(dims[oi])
|
|
36
|
+
oldstrides.append(strides[oi])
|
|
37
|
+
oldnd += 1
|
|
38
|
+
|
|
39
|
+
# Calculate total sizes
|
|
40
|
+
np_total = 1
|
|
41
|
+
for ni in range(newnd):
|
|
42
|
+
np_total *= newdims[ni]
|
|
43
|
+
|
|
44
|
+
op_total = 1
|
|
45
|
+
for oi in range(oldnd):
|
|
46
|
+
op_total *= olddims[oi]
|
|
47
|
+
|
|
48
|
+
if np_total != op_total:
|
|
49
|
+
# Different total sizes; no hope
|
|
50
|
+
return 0
|
|
51
|
+
|
|
52
|
+
if np_total == 0:
|
|
53
|
+
# Handle zero-sized arrays
|
|
54
|
+
# Just make the strides vaguely reasonable
|
|
55
|
+
# (they can have any value in theory).
|
|
56
|
+
for i in range(newnd):
|
|
57
|
+
newstrides[i] = itemsize
|
|
58
|
+
return 1
|
|
59
|
+
|
|
60
|
+
# oi to oj and ni to nj give the axis ranges currently worked with
|
|
61
|
+
oi = 0
|
|
62
|
+
oj = 1
|
|
63
|
+
ni = 0
|
|
64
|
+
nj = 1
|
|
65
|
+
|
|
66
|
+
while ni < newnd and oi < oldnd:
|
|
67
|
+
np = newdims[ni]
|
|
68
|
+
op = olddims[oi]
|
|
69
|
+
|
|
70
|
+
while np != op:
|
|
71
|
+
if np < op:
|
|
72
|
+
# Misses trailing 1s, these are handled later
|
|
73
|
+
np *= newdims[nj]
|
|
74
|
+
nj += 1
|
|
75
|
+
else:
|
|
76
|
+
op *= olddims[oj]
|
|
77
|
+
oj += 1
|
|
78
|
+
|
|
79
|
+
# Check whether the original axes can be combined
|
|
80
|
+
for ok in range(oi, oj - 1):
|
|
81
|
+
if is_f_order:
|
|
82
|
+
if oldstrides[ok + 1] != olddims[ok] * oldstrides[ok]:
|
|
83
|
+
# not contiguous enough
|
|
84
|
+
return 0
|
|
85
|
+
else:
|
|
86
|
+
# C order
|
|
87
|
+
if oldstrides[ok] != olddims[ok + 1] * oldstrides[ok + 1]:
|
|
88
|
+
# not contiguous enough
|
|
89
|
+
return 0
|
|
90
|
+
|
|
91
|
+
# Calculate new strides for all axes currently worked with
|
|
92
|
+
if is_f_order:
|
|
93
|
+
newstrides[ni] = oldstrides[oi]
|
|
94
|
+
for nk in range(ni + 1, nj):
|
|
95
|
+
newstrides[nk] = newstrides[nk - 1] * newdims[nk - 1]
|
|
96
|
+
else:
|
|
97
|
+
# C order
|
|
98
|
+
newstrides[nj - 1] = oldstrides[oj - 1]
|
|
99
|
+
for nk in range(nj - 1, ni, -1):
|
|
100
|
+
newstrides[nk - 1] = newstrides[nk] * newdims[nk]
|
|
101
|
+
|
|
102
|
+
ni = nj
|
|
103
|
+
nj += 1
|
|
104
|
+
oi = oj
|
|
105
|
+
oj += 1
|
|
106
|
+
|
|
107
|
+
# Set strides corresponding to trailing 1s of the new shape
|
|
108
|
+
if ni >= 1:
|
|
109
|
+
last_stride = newstrides[ni - 1]
|
|
110
|
+
else:
|
|
111
|
+
last_stride = itemsize
|
|
112
|
+
|
|
113
|
+
if is_f_order:
|
|
114
|
+
last_stride *= newdims[ni - 1]
|
|
115
|
+
|
|
116
|
+
for nk in range(ni, newnd):
|
|
117
|
+
newstrides[nk] = last_stride
|
|
118
|
+
|
|
119
|
+
return 1
|
|
27
120
|
|
|
28
121
|
|
|
29
122
|
class Dim(object):
|
|
@@ -333,18 +426,12 @@ class Array(object):
|
|
|
333
426
|
else:
|
|
334
427
|
raise AssertionError("unreachable")
|
|
335
428
|
else:
|
|
336
|
-
newstrides =
|
|
337
|
-
|
|
338
|
-
# need to keep these around in variables, not temporaries, so they
|
|
339
|
-
# don't get GC'ed before we call into the C code
|
|
340
|
-
olddims = np.array(self.shape, dtype=np.ctypeslib.c_intp)
|
|
341
|
-
oldstrides = np.array(self.strides, dtype=np.ctypeslib.c_intp)
|
|
342
|
-
newdims = np.array(newdims, dtype=np.ctypeslib.c_intp)
|
|
429
|
+
newstrides = [0] * newnd
|
|
343
430
|
|
|
344
431
|
if not attempt_nocopy_reshape(
|
|
345
432
|
oldnd,
|
|
346
|
-
|
|
347
|
-
|
|
433
|
+
self.shape,
|
|
434
|
+
self.strides,
|
|
348
435
|
newnd,
|
|
349
436
|
newdims,
|
|
350
437
|
newstrides,
|
|
@@ -16,11 +16,11 @@ import os
|
|
|
16
16
|
import sys
|
|
17
17
|
import ctypes
|
|
18
18
|
|
|
19
|
-
from numba.misc.findlib import find_lib
|
|
19
|
+
from numba.cuda.misc.findlib import find_lib
|
|
20
20
|
from numba.cuda.cuda_paths import get_cuda_paths
|
|
21
21
|
from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
|
|
22
22
|
from numba.cuda.cudadrv.error import CudaSupportError
|
|
23
|
-
from numba.core import config
|
|
23
|
+
from numba.cuda.core import config
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
if sys.platform == "win32":
|
|
@@ -54,9 +54,9 @@ def get_cudalib(lib, static=False):
|
|
|
54
54
|
"""
|
|
55
55
|
if lib in {"nvrtc", "nvvm"}:
|
|
56
56
|
return get_cuda_paths()[lib].info or _dllnamepattern % lib
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
57
|
+
|
|
58
|
+
dir_type = "static_cudalib_dir" if static else "cudalib_dir"
|
|
59
|
+
libdir = get_cuda_paths()[dir_type].info
|
|
60
60
|
|
|
61
61
|
candidates = find_lib(lib, libdir, static=static)
|
|
62
62
|
namepattern = _staticnamepattern if static else _dllnamepattern
|
|
@@ -10,7 +10,7 @@ from numba.cuda.cudadrv.error import (
|
|
|
10
10
|
NvrtcCompilationError,
|
|
11
11
|
NvrtcSupportError,
|
|
12
12
|
)
|
|
13
|
-
from numba import config
|
|
13
|
+
from numba.cuda import config
|
|
14
14
|
from numba.cuda.cuda_paths import get_cuda_paths
|
|
15
15
|
from numba.cuda.utils import _readenv
|
|
16
16
|
|
|
@@ -21,8 +21,8 @@ import warnings
|
|
|
21
21
|
|
|
22
22
|
NVRTC_EXTRA_SEARCH_PATHS = _readenv(
|
|
23
23
|
"NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", str, ""
|
|
24
|
-
) or getattr(config, "
|
|
25
|
-
if not hasattr(config, "
|
|
24
|
+
) or getattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS", "")
|
|
25
|
+
if not hasattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
|
|
26
26
|
config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = NVRTC_EXTRA_SEARCH_PATHS
|
|
27
27
|
|
|
28
28
|
# Opaque handle for compilation unit
|
|
@@ -347,15 +347,26 @@ def compile(src, name, cc, ltoir=False):
|
|
|
347
347
|
arch = f"--gpu-architecture=compute_{major}{minor}"
|
|
348
348
|
|
|
349
349
|
cuda_include_dir = get_cuda_paths()["include_dir"].info
|
|
350
|
-
cuda_includes = [
|
|
351
|
-
f"{cuda_include_dir}",
|
|
352
|
-
f"{os.path.join(cuda_include_dir, 'cccl')}",
|
|
353
|
-
]
|
|
350
|
+
cuda_includes = [f"{cuda_include_dir}"]
|
|
354
351
|
|
|
355
352
|
cudadrv_path = os.path.dirname(os.path.abspath(__file__))
|
|
356
353
|
numba_cuda_path = os.path.dirname(cudadrv_path)
|
|
357
354
|
|
|
358
|
-
|
|
355
|
+
nvrtc_ver_major = version[0]
|
|
356
|
+
if nvrtc_ver_major == 12:
|
|
357
|
+
numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
|
|
358
|
+
# For CUDA 12 wheels, `cuda_include_dir` is `site-packages/nvidia/cuda_runtime/include`
|
|
359
|
+
# We need to find CCCL at `site-packages/nvidia/cuda_cccl/include`
|
|
360
|
+
# For CUDA 12 conda / system install, CCCL is just in the `include` directory
|
|
361
|
+
cuda_includes.append(
|
|
362
|
+
f"{os.path.join(cuda_include_dir, '..', '..', 'cuda_cccl', 'include')}"
|
|
363
|
+
)
|
|
364
|
+
elif nvrtc_ver_major == 13:
|
|
365
|
+
numba_include = f"{os.path.join(numba_cuda_path, 'include', '13')}"
|
|
366
|
+
# For CUDA 13 wheels, `cuda_include_dir` is `site-packages/nvidia/cu13/include`
|
|
367
|
+
# We need to find CCCL at `site-packages/nvidia/cu13/include/cccl`
|
|
368
|
+
# For CUDA 13 conda / system install, CCCL is in the `include/cccl` directory
|
|
369
|
+
cuda_includes.append(f"{os.path.join(cuda_include_dir, 'cccl')}")
|
|
359
370
|
|
|
360
371
|
if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
|
|
361
372
|
extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
|
|
@@ -154,10 +154,7 @@ class NVVM(object):
|
|
|
154
154
|
inst.driver = open_cudalib("nvvm")
|
|
155
155
|
except OSError as e:
|
|
156
156
|
cls.__INSTANCE = None
|
|
157
|
-
errmsg =
|
|
158
|
-
"libNVVM cannot be found. Do `conda install "
|
|
159
|
-
"cudatoolkit`:\n%s"
|
|
160
|
-
)
|
|
157
|
+
errmsg = "libNVVM cannot be found. Please install the cuda-toolkit conda package:\n%s"
|
|
161
158
|
raise NvvmSupportError(errmsg % e)
|
|
162
159
|
|
|
163
160
|
# Find & populate functions
|
|
@@ -8,7 +8,7 @@ The toolkit version can now be obtained from NVRTC, so we don't use a binding
|
|
|
8
8
|
to the runtime anymore. This file is provided to maintain the existing API.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
from numba import config
|
|
11
|
+
from numba.cuda import config
|
|
12
12
|
from numba.cuda.cudadrv.nvrtc import NVRTC
|
|
13
13
|
|
|
14
14
|
|
|
@@ -10,7 +10,7 @@ from llvmlite import ir
|
|
|
10
10
|
import llvmlite.binding as ll
|
|
11
11
|
|
|
12
12
|
from numba.core.imputils import Registry
|
|
13
|
-
from numba.
|
|
13
|
+
from numba.cuda.typing.npydecl import parse_dtype
|
|
14
14
|
from numba.core.datamodel import models
|
|
15
15
|
from numba.core import types
|
|
16
16
|
from numba.cuda import cgutils
|
|
@@ -25,6 +25,10 @@ registry = Registry()
|
|
|
25
25
|
lower = registry.lower
|
|
26
26
|
lower_attr = registry.lower_getattr
|
|
27
27
|
lower_constant = registry.lower_constant
|
|
28
|
+
lower_getattr_generic = registry.lower_getattr_generic
|
|
29
|
+
lower_setattr = registry.lower_setattr
|
|
30
|
+
lower_setattr_generic = registry.lower_setattr_generic
|
|
31
|
+
lower_cast = registry.lower_cast
|
|
28
32
|
|
|
29
33
|
|
|
30
34
|
def initialize_dim3(builder, prefix):
|
|
@@ -1,15 +1,98 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
|
+
import abc
|
|
4
5
|
import os
|
|
6
|
+
from contextlib import contextmanager
|
|
5
7
|
|
|
6
8
|
from llvmlite import ir
|
|
7
|
-
from numba.core import types
|
|
9
|
+
from numba.core import types
|
|
10
|
+
from numba.cuda.core import config
|
|
8
11
|
from numba.cuda import cgutils
|
|
9
12
|
from numba.core.datamodel.models import ComplexModel, UnionModel, UniTupleModel
|
|
10
|
-
from numba.core.debuginfo import AbstractDIBuilder
|
|
11
13
|
from numba.cuda.types import GridGroup
|
|
12
14
|
|
|
15
|
+
|
|
16
|
+
@contextmanager
|
|
17
|
+
def suspend_emission(builder):
|
|
18
|
+
"""Suspends the emission of debug_metadata for the duration of the context
|
|
19
|
+
managed block."""
|
|
20
|
+
ref = builder.debug_metadata
|
|
21
|
+
builder.debug_metadata = None
|
|
22
|
+
try:
|
|
23
|
+
yield
|
|
24
|
+
finally:
|
|
25
|
+
builder.debug_metadata = ref
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AbstractDIBuilder(metaclass=abc.ABCMeta):
|
|
29
|
+
@abc.abstractmethod
|
|
30
|
+
def mark_variable(
|
|
31
|
+
self,
|
|
32
|
+
builder,
|
|
33
|
+
allocavalue,
|
|
34
|
+
name,
|
|
35
|
+
lltype,
|
|
36
|
+
size,
|
|
37
|
+
line,
|
|
38
|
+
datamodel=None,
|
|
39
|
+
argidx=None,
|
|
40
|
+
):
|
|
41
|
+
"""Emit debug info for the variable."""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@abc.abstractmethod
|
|
45
|
+
def mark_location(self, builder, line):
|
|
46
|
+
"""Emit source location information to the given IRBuilder."""
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
@abc.abstractmethod
|
|
50
|
+
def mark_subprogram(self, function, qualname, argnames, argtypes, line):
|
|
51
|
+
"""Emit source location information for the given function."""
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
@abc.abstractmethod
|
|
55
|
+
def initialize(self):
|
|
56
|
+
"""Initialize the debug info. An opportunity for the debuginfo to
|
|
57
|
+
prepare any necessary data structures.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
@abc.abstractmethod
|
|
61
|
+
def finalize(self):
|
|
62
|
+
"""Finalize the debuginfo by emitting all necessary metadata."""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class DummyDIBuilder(AbstractDIBuilder):
|
|
67
|
+
def __init__(self, module, filepath, cgctx, directives_only):
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def mark_variable(
|
|
71
|
+
self,
|
|
72
|
+
builder,
|
|
73
|
+
allocavalue,
|
|
74
|
+
name,
|
|
75
|
+
lltype,
|
|
76
|
+
size,
|
|
77
|
+
line,
|
|
78
|
+
datamodel=None,
|
|
79
|
+
argidx=None,
|
|
80
|
+
):
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
def mark_location(self, builder, line):
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
def mark_subprogram(self, function, qualname, argnames, argtypes, line):
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
def initialize(self):
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
def finalize(self):
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
|
|
13
96
|
_BYTE_SIZE = 8
|
|
14
97
|
|
|
15
98
|
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
4
|
from warnings import warn
|
|
5
|
-
from numba.core import types
|
|
5
|
+
from numba.core import types
|
|
6
6
|
from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
|
|
7
7
|
from numba.cuda.compiler import declare_device_function
|
|
8
|
-
from numba.cuda.core import sigutils
|
|
8
|
+
from numba.cuda.core import sigutils, config
|
|
9
9
|
from numba.cuda.dispatcher import CUDADispatcher
|
|
10
10
|
from numba.cuda.simulator.kernel import FakeCUDAKernel
|
|
11
11
|
from numba.cuda.cudadrv.driver import _have_nvjitlink
|
|
@@ -199,7 +199,7 @@ def jit(
|
|
|
199
199
|
raise TypeError("CUDA kernel must have void return type.")
|
|
200
200
|
|
|
201
201
|
if device:
|
|
202
|
-
from numba.core import typeinfer
|
|
202
|
+
from numba.cuda.core import typeinfer
|
|
203
203
|
|
|
204
204
|
with typeinfer.register_dispatcher(disp):
|
|
205
205
|
disp.compile_device(argtypes, restype)
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
|
-
from numba.core.
|
|
5
|
-
from numba.core.options import TargetOptions
|
|
4
|
+
from numba.cuda.core.options import TargetOptions
|
|
6
5
|
from .target import CUDATargetContext, CUDATypingContext
|
|
7
6
|
|
|
8
7
|
|
|
@@ -10,7 +9,7 @@ class CUDATargetOptions(TargetOptions):
|
|
|
10
9
|
pass
|
|
11
10
|
|
|
12
11
|
|
|
13
|
-
class CUDATarget
|
|
12
|
+
class CUDATarget:
|
|
14
13
|
def __init__(self, name):
|
|
15
14
|
self.options = CUDATargetOptions
|
|
16
15
|
# The typing and target contexts are initialized only when needed -
|
|
@@ -18,7 +17,7 @@ class CUDATarget(TargetDescriptor):
|
|
|
18
17
|
# systems that might not have them present.
|
|
19
18
|
self._typingctx = None
|
|
20
19
|
self._targetctx = None
|
|
21
|
-
|
|
20
|
+
self._target_name = name
|
|
22
21
|
|
|
23
22
|
@property
|
|
24
23
|
def typing_context(self):
|