numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,7 @@ attempt_nocopy_reshape = ctypes.CFUNCTYPE(
|
|
20
20
|
np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newstrides
|
21
21
|
ctypes.c_long, # itemsize
|
22
22
|
ctypes.c_int, # is_f_order
|
23
|
-
)(_helperlib.c_helpers[
|
23
|
+
)(_helperlib.c_helpers["attempt_nocopy_reshape"])
|
24
24
|
|
25
25
|
|
26
26
|
class Dim(object):
|
@@ -37,7 +37,8 @@ class Dim(object):
|
|
37
37
|
stride:
|
38
38
|
item stride
|
39
39
|
"""
|
40
|
-
|
40
|
+
|
41
|
+
__slots__ = "start", "stop", "size", "stride", "single"
|
41
42
|
|
42
43
|
def __init__(self, start, stop, size, stride, single):
|
43
44
|
self.start = start
|
@@ -58,15 +59,11 @@ class Dim(object):
|
|
58
59
|
else:
|
59
60
|
size = _compute_size(start, stop, stride)
|
60
61
|
ret = Dim(
|
61
|
-
start=start,
|
62
|
-
stop=stop,
|
63
|
-
size=size,
|
64
|
-
stride=stride,
|
65
|
-
single=False
|
62
|
+
start=start, stop=stop, size=size, stride=stride, single=False
|
66
63
|
)
|
67
64
|
return ret
|
68
65
|
else:
|
69
|
-
sliced = self[item:item + 1] if item != -1 else self[-1:]
|
66
|
+
sliced = self[item : item + 1] if item != -1 else self[-1:]
|
70
67
|
if sliced.size != 1:
|
71
68
|
raise IndexError
|
72
69
|
return Dim(
|
@@ -85,8 +82,13 @@ class Dim(object):
|
|
85
82
|
return strfmt % (self.start, self.stop, self.size, self.stride)
|
86
83
|
|
87
84
|
def normalize(self, base):
|
88
|
-
return Dim(
|
89
|
-
|
85
|
+
return Dim(
|
86
|
+
start=self.start - base,
|
87
|
+
stop=self.stop - base,
|
88
|
+
size=self.size,
|
89
|
+
stride=self.stride,
|
90
|
+
single=self.single,
|
91
|
+
)
|
90
92
|
|
91
93
|
def copy(self, start=None, stop=None, size=None, stride=None, single=None):
|
92
94
|
if start is None:
|
@@ -143,14 +145,16 @@ class Array(object):
|
|
143
145
|
extent: (start, end)
|
144
146
|
start and end offset containing the memory region
|
145
147
|
"""
|
148
|
+
|
146
149
|
is_array = True
|
147
150
|
|
148
151
|
@classmethod
|
149
152
|
def from_desc(cls, offset, shape, strides, itemsize):
|
150
153
|
dims = []
|
151
154
|
for ashape, astride in zip(shape, strides):
|
152
|
-
dim = Dim(
|
153
|
-
|
155
|
+
dim = Dim(
|
156
|
+
offset, offset + ashape * astride, ashape, astride, single=False
|
157
|
+
)
|
154
158
|
dims.append(dim)
|
155
159
|
offset = 0 # offset only applies to first dimension
|
156
160
|
return cls(dims, itemsize)
|
@@ -173,23 +177,23 @@ class Array(object):
|
|
173
177
|
|
174
178
|
# Records have no dims, and we can treat them as contiguous
|
175
179
|
if not self.dims:
|
176
|
-
return {
|
180
|
+
return {"C_CONTIGUOUS": True, "F_CONTIGUOUS": True}
|
177
181
|
|
178
182
|
# If this is a broadcast array then it is not contiguous
|
179
183
|
if any([dim.stride == 0 for dim in self.dims]):
|
180
|
-
return {
|
184
|
+
return {"C_CONTIGUOUS": False, "F_CONTIGUOUS": False}
|
181
185
|
|
182
|
-
flags = {
|
186
|
+
flags = {"C_CONTIGUOUS": True, "F_CONTIGUOUS": True}
|
183
187
|
|
184
188
|
# Check C contiguity
|
185
189
|
sd = self.itemsize
|
186
190
|
for dim in reversed(self.dims):
|
187
191
|
if dim.size == 0:
|
188
192
|
# Contiguous by definition
|
189
|
-
return {
|
193
|
+
return {"C_CONTIGUOUS": True, "F_CONTIGUOUS": True}
|
190
194
|
if dim.size != 1:
|
191
195
|
if dim.stride != sd:
|
192
|
-
flags[
|
196
|
+
flags["C_CONTIGUOUS"] = False
|
193
197
|
sd *= dim.size
|
194
198
|
|
195
199
|
# Check F contiguity
|
@@ -197,7 +201,7 @@ class Array(object):
|
|
197
201
|
for dim in self.dims:
|
198
202
|
if dim.size != 1:
|
199
203
|
if dim.stride != sd:
|
200
|
-
flags[
|
204
|
+
flags["F_CONTIGUOUS"] = False
|
201
205
|
return flags
|
202
206
|
sd *= dim.size
|
203
207
|
|
@@ -208,11 +212,11 @@ class Array(object):
|
|
208
212
|
lastidx = [s - 1 for s in self.shape]
|
209
213
|
start = compute_index(firstidx, self.dims)
|
210
214
|
stop = compute_index(lastidx, self.dims) + self.itemsize
|
211
|
-
stop = max(stop, start)
|
215
|
+
stop = max(stop, start) # ensure positive extent
|
212
216
|
return Extent(start, stop)
|
213
217
|
|
214
218
|
def __repr__(self):
|
215
|
-
return
|
219
|
+
return "<Array dims=%s itemsize=%s>" % (self.dims, self.itemsize)
|
216
220
|
|
217
221
|
def __getitem__(self, item):
|
218
222
|
if not isinstance(item, tuple):
|
@@ -240,15 +244,14 @@ class Array(object):
|
|
240
244
|
|
241
245
|
@property
|
242
246
|
def is_c_contig(self):
|
243
|
-
return self.flags[
|
247
|
+
return self.flags["C_CONTIGUOUS"]
|
244
248
|
|
245
249
|
@property
|
246
250
|
def is_f_contig(self):
|
247
|
-
return self.flags[
|
251
|
+
return self.flags["F_CONTIGUOUS"]
|
248
252
|
|
249
253
|
def iter_contiguous_extent(self):
|
250
|
-
"""
|
251
|
-
"""
|
254
|
+
"""Generates extents"""
|
252
255
|
if self.is_c_contig or self.is_f_contig:
|
253
256
|
yield self.extent
|
254
257
|
else:
|
@@ -279,11 +282,11 @@ class Array(object):
|
|
279
282
|
if newdims == self.shape:
|
280
283
|
return self, None
|
281
284
|
|
282
|
-
order = kws.pop(
|
285
|
+
order = kws.pop("order", "C")
|
283
286
|
if kws:
|
284
|
-
raise TypeError(
|
285
|
-
if order not in
|
286
|
-
raise ValueError(
|
287
|
+
raise TypeError("unknown keyword arguments %s" % kws.keys())
|
288
|
+
if order not in "CFA":
|
289
|
+
raise ValueError("order not C|F|A")
|
287
290
|
|
288
291
|
# check for exactly one instance of -1 in newdims
|
289
292
|
# https://github.com/numpy/numpy/blob/623bc1fae1d47df24e7f1e29321d0c0ba2771ce0/numpy/core/src/multiarray/shape.c#L470-L515 # noqa: E501
|
@@ -301,25 +304,28 @@ class Array(object):
|
|
301
304
|
# compute the missing dimension
|
302
305
|
if unknownidx >= 0:
|
303
306
|
if knownsize == 0 or self.size % knownsize != 0:
|
304
|
-
raise ValueError(
|
305
|
-
|
307
|
+
raise ValueError(
|
308
|
+
"cannot infer valid shape for unknown dimension"
|
309
|
+
)
|
306
310
|
else:
|
307
|
-
newdims =
|
308
|
-
|
309
|
-
+
|
311
|
+
newdims = (
|
312
|
+
newdims[0:unknownidx]
|
313
|
+
+ (self.size // knownsize,)
|
314
|
+
+ newdims[unknownidx + 1 :]
|
315
|
+
)
|
310
316
|
|
311
317
|
newsize = functools.reduce(operator.mul, newdims, 1)
|
312
318
|
|
313
|
-
if order ==
|
314
|
-
order =
|
319
|
+
if order == "A":
|
320
|
+
order = "F" if self.is_f_contig else "C"
|
315
321
|
|
316
322
|
if newsize != self.size:
|
317
323
|
raise ValueError("reshape changes the size of the array")
|
318
324
|
|
319
325
|
if self.is_c_contig or self.is_f_contig:
|
320
|
-
if order ==
|
326
|
+
if order == "C":
|
321
327
|
newstrides = list(iter_strides_c_contig(self, newdims))
|
322
|
-
elif order ==
|
328
|
+
elif order == "F":
|
323
329
|
newstrides = list(iter_strides_f_contig(self, newdims))
|
324
330
|
else:
|
325
331
|
raise AssertionError("unreachable")
|
@@ -340,12 +346,16 @@ class Array(object):
|
|
340
346
|
newdims,
|
341
347
|
newstrides,
|
342
348
|
self.itemsize,
|
343
|
-
order ==
|
349
|
+
order == "F",
|
344
350
|
):
|
345
|
-
raise NotImplementedError(
|
351
|
+
raise NotImplementedError("reshape would require copy")
|
346
352
|
|
347
|
-
ret = self.from_desc(
|
348
|
-
|
353
|
+
ret = self.from_desc(
|
354
|
+
self.extent.begin,
|
355
|
+
shape=newdims,
|
356
|
+
strides=newstrides,
|
357
|
+
itemsize=self.itemsize,
|
358
|
+
)
|
349
359
|
|
350
360
|
return ret, list(self.iter_contiguous_extent())
|
351
361
|
|
@@ -377,16 +387,21 @@ class Array(object):
|
|
377
387
|
)
|
378
388
|
return newarr, list(self.iter_contiguous_extent())
|
379
389
|
|
380
|
-
def ravel(self, order=
|
381
|
-
if order not in
|
382
|
-
raise ValueError(
|
390
|
+
def ravel(self, order="C"):
|
391
|
+
if order not in "CFA":
|
392
|
+
raise ValueError("order not C|F|A")
|
383
393
|
|
384
|
-
if (
|
385
|
-
|
394
|
+
if (
|
395
|
+
order in "CA"
|
396
|
+
and self.is_c_contig
|
397
|
+
or order in "FA"
|
398
|
+
and self.is_f_contig
|
399
|
+
):
|
386
400
|
newshape = (self.size,)
|
387
401
|
newstrides = (self.itemsize,)
|
388
|
-
arr = self.from_desc(
|
389
|
-
|
402
|
+
arr = self.from_desc(
|
403
|
+
self.extent.begin, newshape, newstrides, self.itemsize
|
404
|
+
)
|
390
405
|
return arr, list(self.iter_contiguous_extent())
|
391
406
|
|
392
407
|
else:
|
@@ -394,8 +409,7 @@ class Array(object):
|
|
394
409
|
|
395
410
|
|
396
411
|
def iter_strides_f_contig(arr, shape=None):
|
397
|
-
"""yields the f-contiguous strides
|
398
|
-
"""
|
412
|
+
"""yields the f-contiguous strides"""
|
399
413
|
shape = arr.shape if shape is None else shape
|
400
414
|
itemsize = arr.itemsize
|
401
415
|
yield itemsize
|
@@ -406,8 +420,7 @@ def iter_strides_f_contig(arr, shape=None):
|
|
406
420
|
|
407
421
|
|
408
422
|
def iter_strides_c_contig(arr, shape=None):
|
409
|
-
"""yields the c-contiguous strides
|
410
|
-
"""
|
423
|
+
"""yields the c-contiguous strides"""
|
411
424
|
shape = arr.shape if shape is None else shape
|
412
425
|
itemsize = arr.itemsize
|
413
426
|
|
@@ -438,8 +451,7 @@ def is_element_indexing(item, ndim):
|
|
438
451
|
|
439
452
|
|
440
453
|
def _compute_size(start, stop, step):
|
441
|
-
"""Algorithm adapted from cpython rangeobject.c
|
442
|
-
"""
|
454
|
+
"""Algorithm adapted from cpython rangeobject.c"""
|
443
455
|
if step > 0:
|
444
456
|
lo = start
|
445
457
|
hi = stop
|
@@ -12,7 +12,7 @@ class CudaSupportError(ImportError):
|
|
12
12
|
|
13
13
|
class NvvmError(Exception):
|
14
14
|
def __str__(self):
|
15
|
-
return
|
15
|
+
return "\n".join(map(str, self.args))
|
16
16
|
|
17
17
|
|
18
18
|
class NvvmSupportError(ImportError):
|
@@ -25,12 +25,16 @@ class NvvmWarning(Warning):
|
|
25
25
|
|
26
26
|
class NvrtcError(Exception):
|
27
27
|
def __str__(self):
|
28
|
-
return
|
28
|
+
return "\n".join(map(str, self.args))
|
29
29
|
|
30
30
|
|
31
31
|
class NvrtcCompilationError(NvrtcError):
|
32
32
|
pass
|
33
33
|
|
34
34
|
|
35
|
+
class NvrtcBuiltinOperationFailure(NvrtcError):
|
36
|
+
pass
|
37
|
+
|
38
|
+
|
35
39
|
class NvrtcSupportError(ImportError):
|
36
40
|
pass
|
@@ -21,25 +21,25 @@ from numba.cuda.cudadrv.error import CudaSupportError
|
|
21
21
|
from numba.core import config
|
22
22
|
|
23
23
|
|
24
|
-
if sys.platform ==
|
25
|
-
_dllnamepattern =
|
26
|
-
_staticnamepattern =
|
27
|
-
elif sys.platform ==
|
28
|
-
_dllnamepattern =
|
29
|
-
_staticnamepattern =
|
24
|
+
if sys.platform == "win32":
|
25
|
+
_dllnamepattern = "%s.dll"
|
26
|
+
_staticnamepattern = "%s.lib"
|
27
|
+
elif sys.platform == "darwin":
|
28
|
+
_dllnamepattern = "lib%s.dylib"
|
29
|
+
_staticnamepattern = "lib%s.a"
|
30
30
|
else:
|
31
|
-
_dllnamepattern =
|
32
|
-
_staticnamepattern =
|
31
|
+
_dllnamepattern = "lib%s.so"
|
32
|
+
_staticnamepattern = "lib%s.a"
|
33
33
|
|
34
34
|
|
35
35
|
def get_libdevice():
|
36
36
|
d = get_cuda_paths()
|
37
|
-
paths = d[
|
37
|
+
paths = d["libdevice"].info
|
38
38
|
return paths
|
39
39
|
|
40
40
|
|
41
41
|
def open_libdevice():
|
42
|
-
with open(get_libdevice(),
|
42
|
+
with open(get_libdevice(), "rb") as bcfile:
|
43
43
|
return bcfile.read()
|
44
44
|
|
45
45
|
|
@@ -50,10 +50,10 @@ def get_cudalib(lib, static=False):
|
|
50
50
|
'libnvvm.so' for 'nvvm') so that we may attempt to load it using the system
|
51
51
|
loader's search mechanism.
|
52
52
|
"""
|
53
|
-
if lib
|
54
|
-
return get_cuda_paths()[
|
53
|
+
if lib in {"nvrtc", "nvvm"}:
|
54
|
+
return get_cuda_paths()[lib].info or _dllnamepattern % lib
|
55
55
|
else:
|
56
|
-
dir_type =
|
56
|
+
dir_type = "static_cudalib_dir" if static else "cudalib_dir"
|
57
57
|
libdir = get_cuda_paths()[dir_type].info
|
58
58
|
|
59
59
|
candidates = find_lib(lib, libdir, static=static)
|
@@ -68,7 +68,7 @@ def get_cuda_include_dir():
|
|
68
68
|
configuration.
|
69
69
|
"""
|
70
70
|
|
71
|
-
return get_cuda_paths()[
|
71
|
+
return get_cuda_paths()["include_dir"].info
|
72
72
|
|
73
73
|
|
74
74
|
def check_cuda_include_dir(path):
|
@@ -86,39 +86,40 @@ def open_cudalib(lib):
|
|
86
86
|
|
87
87
|
def check_static_lib(path):
|
88
88
|
if not os.path.isfile(path):
|
89
|
-
raise FileNotFoundError(f
|
89
|
+
raise FileNotFoundError(f"{path} not found")
|
90
90
|
|
91
91
|
|
92
92
|
def _get_source_variable(lib, static=False):
|
93
|
-
if lib ==
|
94
|
-
return get_cuda_paths()[
|
95
|
-
elif lib ==
|
96
|
-
return get_cuda_paths()[
|
97
|
-
elif lib ==
|
98
|
-
return get_cuda_paths()[
|
93
|
+
if lib == "nvvm":
|
94
|
+
return get_cuda_paths()["nvvm"].by
|
95
|
+
elif lib == "nvrtc":
|
96
|
+
return get_cuda_paths()["nvrtc"].by
|
97
|
+
elif lib == "libdevice":
|
98
|
+
return get_cuda_paths()["libdevice"].by
|
99
|
+
elif lib == "include_dir":
|
100
|
+
return get_cuda_paths()["include_dir"].by
|
99
101
|
else:
|
100
|
-
dir_type =
|
102
|
+
dir_type = "static_cudalib_dir" if static else "cudalib_dir"
|
101
103
|
return get_cuda_paths()[dir_type].by
|
102
104
|
|
103
105
|
|
104
106
|
def test():
|
105
|
-
"""Test library lookup. Path info is printed to stdout.
|
106
|
-
"""
|
107
|
+
"""Test library lookup. Path info is printed to stdout."""
|
107
108
|
failed = False
|
108
109
|
|
109
110
|
# Check for the driver
|
110
111
|
try:
|
111
112
|
dlloader, candidates = locate_driver_and_loader()
|
112
|
-
print(
|
113
|
+
print("Finding driver from candidates:")
|
113
114
|
for location in candidates:
|
114
|
-
print(f
|
115
|
-
print(f
|
116
|
-
print(
|
115
|
+
print(f"\t{location}")
|
116
|
+
print(f"Using loader {dlloader}")
|
117
|
+
print("\tTrying to load driver", end="...")
|
117
118
|
dll, path = load_driver(dlloader, candidates)
|
118
|
-
print(
|
119
|
-
print(f
|
119
|
+
print("\tok")
|
120
|
+
print(f"\t\tLoaded from {path}")
|
120
121
|
except CudaSupportError as e:
|
121
|
-
print(f
|
122
|
+
print(f"\tERROR: failed to open driver: {e}")
|
122
123
|
failed = True
|
123
124
|
|
124
125
|
# Find the absolute location of the driver on Linux. Various driver-related
|
@@ -127,9 +128,9 @@ def test():
|
|
127
128
|
# Providing the absolute location of the driver indicates its version
|
128
129
|
# number in the soname (e.g. "libcuda.so.530.30.02"), which can be used to
|
129
130
|
# look up whether the driver was intended for "native" Linux.
|
130
|
-
if sys.platform ==
|
131
|
+
if sys.platform == "linux" and not failed:
|
131
132
|
pid = os.getpid()
|
132
|
-
mapsfile = os.path.join(os.path.sep,
|
133
|
+
mapsfile = os.path.join(os.path.sep, "proc", f"{pid}", "maps")
|
133
134
|
try:
|
134
135
|
with open(mapsfile) as f:
|
135
136
|
maps = f.read()
|
@@ -140,58 +141,61 @@ def test():
|
|
140
141
|
# It's helpful to report that this went wrong to the user, but we
|
141
142
|
# don't set failed to True because this doesn't have any connection
|
142
143
|
# to actual CUDA functionality.
|
143
|
-
print(
|
144
|
-
|
144
|
+
print(
|
145
|
+
f"\tERROR: Could not open {mapsfile} to determine absolute "
|
146
|
+
"path to libcuda.so"
|
147
|
+
)
|
145
148
|
else:
|
146
149
|
# In this case we could read the maps, so we can report the
|
147
150
|
# relevant ones to the user
|
148
|
-
locations = set(s for s in maps.split() if
|
149
|
-
print(
|
151
|
+
locations = set(s for s in maps.split() if "libcuda.so" in s)
|
152
|
+
print("\tMapped libcuda.so paths:")
|
150
153
|
for location in locations:
|
151
|
-
print(f
|
154
|
+
print(f"\t\t{location}")
|
152
155
|
|
153
156
|
# Checks for dynamic libraries
|
154
|
-
libs =
|
157
|
+
libs = "nvvm nvrtc cudart".split()
|
155
158
|
for lib in libs:
|
156
159
|
path = get_cudalib(lib)
|
157
|
-
print(
|
158
|
-
print(
|
160
|
+
print("Finding {} from {}".format(lib, _get_source_variable(lib)))
|
161
|
+
print("\tLocated at", path)
|
159
162
|
|
160
163
|
try:
|
161
|
-
print(
|
164
|
+
print("\tTrying to open library", end="...")
|
162
165
|
open_cudalib(lib)
|
163
|
-
print(
|
166
|
+
print("\tok")
|
164
167
|
except OSError as e:
|
165
|
-
print(
|
168
|
+
print("\tERROR: failed to open %s:\n%s" % (lib, e))
|
166
169
|
failed = True
|
167
170
|
|
168
171
|
# Check for cudadevrt (the only static library)
|
169
|
-
lib =
|
172
|
+
lib = "cudadevrt"
|
170
173
|
path = get_cudalib(lib, static=True)
|
171
|
-
print(
|
172
|
-
|
173
|
-
|
174
|
+
print(
|
175
|
+
"Finding {} from {}".format(lib, _get_source_variable(lib, static=True))
|
176
|
+
)
|
177
|
+
print("\tLocated at", path)
|
174
178
|
|
175
179
|
try:
|
176
|
-
print(
|
180
|
+
print("\tChecking library", end="...")
|
177
181
|
check_static_lib(path)
|
178
|
-
print(
|
182
|
+
print("\tok")
|
179
183
|
except FileNotFoundError as e:
|
180
|
-
print(
|
184
|
+
print("\tERROR: failed to find %s:\n%s" % (lib, e))
|
181
185
|
failed = True
|
182
186
|
|
183
187
|
# Check for libdevice
|
184
|
-
where = _get_source_variable(
|
185
|
-
print(f
|
188
|
+
where = _get_source_variable("libdevice")
|
189
|
+
print(f"Finding libdevice from {where}")
|
186
190
|
path = get_libdevice()
|
187
|
-
print(
|
191
|
+
print("\tLocated at", path)
|
188
192
|
|
189
193
|
try:
|
190
|
-
print(
|
194
|
+
print("\tChecking library", end="...")
|
191
195
|
check_static_lib(path)
|
192
|
-
print(
|
196
|
+
print("\tok")
|
193
197
|
except FileNotFoundError as e:
|
194
|
-
print(
|
198
|
+
print("\tERROR: failed to find %s:\n%s" % (lib, e))
|
195
199
|
failed = True
|
196
200
|
|
197
201
|
# Check cuda include paths
|
@@ -199,16 +203,16 @@ def test():
|
|
199
203
|
print("Include directory configuration variable:")
|
200
204
|
print(f"\tCUDA_INCLUDE_PATH={config.CUDA_INCLUDE_PATH}")
|
201
205
|
|
202
|
-
where = _get_source_variable(
|
203
|
-
print(f
|
206
|
+
where = _get_source_variable("include_dir")
|
207
|
+
print(f"Finding include directory from {where}")
|
204
208
|
include = get_cuda_include_dir()
|
205
|
-
print(
|
209
|
+
print("\tLocated at", include)
|
206
210
|
try:
|
207
|
-
print(
|
211
|
+
print("\tChecking include directory", end="...")
|
208
212
|
check_cuda_include_dir(include)
|
209
|
-
print(
|
213
|
+
print("\tok")
|
210
214
|
except FileNotFoundError as e:
|
211
|
-
print(
|
215
|
+
print("\tERROR: failed to find cuda include directory:\n%s" % e)
|
212
216
|
failed = True
|
213
217
|
|
214
218
|
return not failed
|
@@ -1,22 +1,46 @@
|
|
1
|
+
import io
|
1
2
|
from .mappings import FILE_EXTENSION_MAP
|
2
3
|
|
3
4
|
|
4
5
|
class LinkableCode:
|
5
6
|
"""An object that holds code to be linked from memory.
|
6
7
|
|
7
|
-
:param data: A buffer containing the data to link.
|
8
|
+
:param data: A buffer, StringIO or BytesIO containing the data to link.
|
9
|
+
If a file object is passed, the content in the object is
|
10
|
+
read when `data` property is accessed.
|
8
11
|
:param name: The name of the file to be referenced in any compilation or
|
9
12
|
linking errors that may be produced.
|
13
|
+
:param setup_callback: A function called prior to the launch of a kernel
|
14
|
+
contained within a module that has this code object
|
15
|
+
linked into it.
|
16
|
+
:param teardown_callback: A function called just prior to the unloading of
|
17
|
+
a module that has this code object linked into
|
18
|
+
it.
|
10
19
|
"""
|
11
20
|
|
12
|
-
def __init__(
|
13
|
-
self
|
21
|
+
def __init__(
|
22
|
+
self, data, name=None, setup_callback=None, teardown_callback=None
|
23
|
+
):
|
24
|
+
if setup_callback and not callable(setup_callback):
|
25
|
+
raise TypeError("setup_callback must be callable")
|
26
|
+
if teardown_callback and not callable(teardown_callback):
|
27
|
+
raise TypeError("teardown_callback must be callable")
|
28
|
+
|
14
29
|
self._name = name
|
30
|
+
self._data = data
|
31
|
+
self.setup_callback = setup_callback
|
32
|
+
self.teardown_callback = teardown_callback
|
15
33
|
|
16
34
|
@property
|
17
35
|
def name(self):
|
18
36
|
return self._name or self.default_name
|
19
37
|
|
38
|
+
@property
|
39
|
+
def data(self):
|
40
|
+
if isinstance(self._data, (io.StringIO, io.BytesIO)):
|
41
|
+
return self._data.getvalue()
|
42
|
+
return self._data
|
43
|
+
|
20
44
|
|
21
45
|
class PTXSource(LinkableCode):
|
22
46
|
"""PTX source code in memory."""
|
@@ -1,24 +1,26 @@
|
|
1
1
|
from numba import config
|
2
2
|
from . import enums
|
3
|
+
|
3
4
|
if config.CUDA_USE_NVIDIA_BINDING:
|
4
5
|
from cuda import cuda
|
6
|
+
|
5
7
|
jitty = cuda.CUjitInputType
|
6
8
|
FILE_EXTENSION_MAP = {
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
"o": jitty.CU_JIT_INPUT_OBJECT,
|
10
|
+
"ptx": jitty.CU_JIT_INPUT_PTX,
|
11
|
+
"a": jitty.CU_JIT_INPUT_LIBRARY,
|
12
|
+
"lib": jitty.CU_JIT_INPUT_LIBRARY,
|
13
|
+
"cubin": jitty.CU_JIT_INPUT_CUBIN,
|
14
|
+
"fatbin": jitty.CU_JIT_INPUT_FATBINARY,
|
15
|
+
"ltoir": jitty.CU_JIT_INPUT_NVVM,
|
14
16
|
}
|
15
17
|
else:
|
16
18
|
FILE_EXTENSION_MAP = {
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
"o": enums.CU_JIT_INPUT_OBJECT,
|
20
|
+
"ptx": enums.CU_JIT_INPUT_PTX,
|
21
|
+
"a": enums.CU_JIT_INPUT_LIBRARY,
|
22
|
+
"lib": enums.CU_JIT_INPUT_LIBRARY,
|
23
|
+
"cubin": enums.CU_JIT_INPUT_CUBIN,
|
24
|
+
"fatbin": enums.CU_JIT_INPUT_FATBINARY,
|
25
|
+
"ltoir": enums.CU_JIT_INPUT_NVVM,
|
24
26
|
}
|