numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
- numba_cuda/numba/cuda/api.py +6 -1
- numba_cuda/numba/cuda/bf16.py +285 -2
- numba_cuda/numba/cuda/cgutils.py +2 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/compiler.py +373 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +2 -2
- numba_cuda/numba/cuda/core/compiler.py +6 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +48 -26
- numba_cuda/numba/cuda/core/ir_utils.py +15 -26
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +738 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +422 -246
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
- numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
- numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
- numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +5 -1
- numba_cuda/numba/cuda/debuginfo.py +85 -2
- numba_cuda/numba/cuda/decorators.py +3 -3
- numba_cuda/numba/cuda/descriptor.py +3 -4
- numba_cuda/numba/cuda/deviceufunc.py +66 -2
- numba_cuda/numba/cuda/dispatcher.py +18 -39
- numba_cuda/numba/cuda/flags.py +141 -1
- numba_cuda/numba/cuda/fp16.py +0 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/lowering.py +7 -144
- numba_cuda/numba/cuda/mathimpl.py +2 -1
- numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +9 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +1 -1
- numba_cuda/numba/cuda/printimpl.py +12 -1
- numba_cuda/numba/cuda/random.py +1 -1
- numba_cuda/numba/cuda/serialize.py +1 -1
- numba_cuda/numba/cuda/simulator/__init__.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/compiler.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
- numba_cuda/numba/cuda/target.py +35 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +1 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
- numba_cuda/numba/cuda/tests/support.py +55 -15
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +56 -0
- numba_cuda/numba/cuda/typing/__init__.py +9 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +7 -6
- numba_cuda/numba/cuda/ufuncs.py +3 -3
- numba_cuda/numba/cuda/utils.py +6 -112
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
|
@@ -2,12 +2,273 @@
|
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
4
|
import functools
|
|
5
|
+
import warnings
|
|
5
6
|
import numpy as np
|
|
6
7
|
import unittest
|
|
7
8
|
|
|
8
|
-
from numba import
|
|
9
|
+
from numba import cuda, types, njit, typeof
|
|
10
|
+
from numba.cuda import config
|
|
11
|
+
from numba.np import numpy_support
|
|
9
12
|
from numba.cuda.tests.support import TestCase
|
|
10
|
-
from numba.tests.
|
|
13
|
+
from numba.cuda.tests.support import MemoryLeakMixin
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BaseUFuncTest(MemoryLeakMixin):
|
|
17
|
+
def setUp(self):
|
|
18
|
+
super(BaseUFuncTest, self).setUp()
|
|
19
|
+
self.inputs = [
|
|
20
|
+
(np.uint32(0), types.uint32),
|
|
21
|
+
(np.uint32(1), types.uint32),
|
|
22
|
+
(np.int32(-1), types.int32),
|
|
23
|
+
(np.int32(0), types.int32),
|
|
24
|
+
(np.int32(1), types.int32),
|
|
25
|
+
(np.uint64(0), types.uint64),
|
|
26
|
+
(np.uint64(1), types.uint64),
|
|
27
|
+
(np.int64(-1), types.int64),
|
|
28
|
+
(np.int64(0), types.int64),
|
|
29
|
+
(np.int64(1), types.int64),
|
|
30
|
+
(np.float32(-0.5), types.float32),
|
|
31
|
+
(np.float32(0.0), types.float32),
|
|
32
|
+
(np.float32(0.5), types.float32),
|
|
33
|
+
(np.float64(-0.5), types.float64),
|
|
34
|
+
(np.float64(0.0), types.float64),
|
|
35
|
+
(np.float64(0.5), types.float64),
|
|
36
|
+
(np.array([0, 1], dtype="u4"), types.Array(types.uint32, 1, "C")),
|
|
37
|
+
(np.array([0, 1], dtype="u8"), types.Array(types.uint64, 1, "C")),
|
|
38
|
+
(
|
|
39
|
+
np.array([-1, 0, 1], dtype="i4"),
|
|
40
|
+
types.Array(types.int32, 1, "C"),
|
|
41
|
+
),
|
|
42
|
+
(
|
|
43
|
+
np.array([-1, 0, 1], dtype="i8"),
|
|
44
|
+
types.Array(types.int64, 1, "C"),
|
|
45
|
+
),
|
|
46
|
+
(
|
|
47
|
+
np.array([-0.5, 0.0, 0.5], dtype="f4"),
|
|
48
|
+
types.Array(types.float32, 1, "C"),
|
|
49
|
+
),
|
|
50
|
+
(
|
|
51
|
+
np.array([-0.5, 0.0, 0.5], dtype="f8"),
|
|
52
|
+
types.Array(types.float64, 1, "C"),
|
|
53
|
+
),
|
|
54
|
+
(np.array([0, 1], dtype=np.int8), types.Array(types.int8, 1, "C")),
|
|
55
|
+
(
|
|
56
|
+
np.array([0, 1], dtype=np.int16),
|
|
57
|
+
types.Array(types.int16, 1, "C"),
|
|
58
|
+
),
|
|
59
|
+
(
|
|
60
|
+
np.array([0, 1], dtype=np.uint8),
|
|
61
|
+
types.Array(types.uint8, 1, "C"),
|
|
62
|
+
),
|
|
63
|
+
(
|
|
64
|
+
np.array([0, 1], dtype=np.uint16),
|
|
65
|
+
types.Array(types.uint16, 1, "C"),
|
|
66
|
+
),
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
@functools.lru_cache(maxsize=None)
|
|
70
|
+
def _compile(self, pyfunc, args, nrt=False):
|
|
71
|
+
# NOTE: to test the implementation of Numpy ufuncs, we disable
|
|
72
|
+
# rewriting of array expressions.
|
|
73
|
+
return njit(args, _nrt=nrt, no_rewrites=True)(pyfunc)
|
|
74
|
+
|
|
75
|
+
def _determine_output_type(
|
|
76
|
+
self, input_type, int_output_type=None, float_output_type=None
|
|
77
|
+
):
|
|
78
|
+
ty = input_type
|
|
79
|
+
if isinstance(ty, types.Array):
|
|
80
|
+
ndim = ty.ndim
|
|
81
|
+
ty = ty.dtype
|
|
82
|
+
else:
|
|
83
|
+
ndim = 1
|
|
84
|
+
|
|
85
|
+
if ty in types.signed_domain:
|
|
86
|
+
if int_output_type:
|
|
87
|
+
output_type = types.Array(int_output_type, ndim, "C")
|
|
88
|
+
else:
|
|
89
|
+
output_type = types.Array(ty, ndim, "C")
|
|
90
|
+
elif ty in types.unsigned_domain:
|
|
91
|
+
if int_output_type:
|
|
92
|
+
output_type = types.Array(int_output_type, ndim, "C")
|
|
93
|
+
else:
|
|
94
|
+
output_type = types.Array(ty, ndim, "C")
|
|
95
|
+
else:
|
|
96
|
+
if float_output_type:
|
|
97
|
+
output_type = types.Array(float_output_type, ndim, "C")
|
|
98
|
+
else:
|
|
99
|
+
output_type = types.Array(ty, ndim, "C")
|
|
100
|
+
return output_type
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class BasicUFuncTest(BaseUFuncTest):
|
|
104
|
+
def _make_ufunc_usecase(self, ufunc):
|
|
105
|
+
return _make_ufunc_usecase(ufunc)
|
|
106
|
+
|
|
107
|
+
def basic_ufunc_test(
|
|
108
|
+
self,
|
|
109
|
+
ufunc,
|
|
110
|
+
skip_inputs=[],
|
|
111
|
+
additional_inputs=[],
|
|
112
|
+
int_output_type=None,
|
|
113
|
+
float_output_type=None,
|
|
114
|
+
kinds="ifc",
|
|
115
|
+
positive_only=False,
|
|
116
|
+
):
|
|
117
|
+
# Necessary to avoid some Numpy warnings being silenced, despite
|
|
118
|
+
# the simplefilter() call below.
|
|
119
|
+
self.reset_module_warnings(__name__)
|
|
120
|
+
|
|
121
|
+
pyfunc = self._make_ufunc_usecase(ufunc)
|
|
122
|
+
|
|
123
|
+
inputs = list(self.inputs) + additional_inputs
|
|
124
|
+
|
|
125
|
+
for input_tuple in inputs:
|
|
126
|
+
input_operand = input_tuple[0]
|
|
127
|
+
input_type = input_tuple[1]
|
|
128
|
+
|
|
129
|
+
is_tuple = isinstance(input_operand, tuple)
|
|
130
|
+
if is_tuple:
|
|
131
|
+
args = input_operand
|
|
132
|
+
else:
|
|
133
|
+
args = (input_operand,) * ufunc.nin
|
|
134
|
+
|
|
135
|
+
if input_type in skip_inputs:
|
|
136
|
+
continue
|
|
137
|
+
if positive_only and np.any(args[0] < 0):
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
# Some ufuncs don't allow all kinds of arguments
|
|
141
|
+
if args[0].dtype.kind not in kinds:
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
output_type = self._determine_output_type(
|
|
145
|
+
input_type, int_output_type, float_output_type
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
input_types = (input_type,) * ufunc.nin
|
|
149
|
+
output_types = (output_type,) * ufunc.nout
|
|
150
|
+
argtys = input_types + output_types
|
|
151
|
+
cfunc = self._compile(pyfunc, argtys)
|
|
152
|
+
|
|
153
|
+
if isinstance(args[0], np.ndarray):
|
|
154
|
+
results = [
|
|
155
|
+
np.zeros(args[0].shape, dtype=out_ty.dtype.name)
|
|
156
|
+
for out_ty in output_types
|
|
157
|
+
]
|
|
158
|
+
expected = [
|
|
159
|
+
np.zeros(args[0].shape, dtype=out_ty.dtype.name)
|
|
160
|
+
for out_ty in output_types
|
|
161
|
+
]
|
|
162
|
+
else:
|
|
163
|
+
results = [
|
|
164
|
+
np.zeros(1, dtype=out_ty.dtype.name)
|
|
165
|
+
for out_ty in output_types
|
|
166
|
+
]
|
|
167
|
+
expected = [
|
|
168
|
+
np.zeros(1, dtype=out_ty.dtype.name)
|
|
169
|
+
for out_ty in output_types
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
invalid_flag = False
|
|
173
|
+
with warnings.catch_warnings(record=True) as warnlist:
|
|
174
|
+
warnings.simplefilter("always")
|
|
175
|
+
pyfunc(*args, *expected)
|
|
176
|
+
|
|
177
|
+
warnmsg = "invalid value encountered"
|
|
178
|
+
for thiswarn in warnlist:
|
|
179
|
+
if issubclass(thiswarn.category, RuntimeWarning) and str(
|
|
180
|
+
thiswarn.message
|
|
181
|
+
).startswith(warnmsg):
|
|
182
|
+
invalid_flag = True
|
|
183
|
+
|
|
184
|
+
cfunc(*args, *results)
|
|
185
|
+
|
|
186
|
+
for expected_i, result_i in zip(expected, results):
|
|
187
|
+
msg = "\n".join(
|
|
188
|
+
[
|
|
189
|
+
"ufunc '{0}' failed",
|
|
190
|
+
"inputs ({1}):",
|
|
191
|
+
"{2}",
|
|
192
|
+
"got({3})",
|
|
193
|
+
"{4}",
|
|
194
|
+
"expected ({5}):",
|
|
195
|
+
"{6}",
|
|
196
|
+
]
|
|
197
|
+
).format(
|
|
198
|
+
ufunc.__name__,
|
|
199
|
+
input_type,
|
|
200
|
+
input_operand,
|
|
201
|
+
output_type,
|
|
202
|
+
result_i,
|
|
203
|
+
expected_i.dtype,
|
|
204
|
+
expected_i,
|
|
205
|
+
)
|
|
206
|
+
try:
|
|
207
|
+
np.testing.assert_array_almost_equal(
|
|
208
|
+
expected_i, result_i, decimal=5, err_msg=msg
|
|
209
|
+
)
|
|
210
|
+
except AssertionError:
|
|
211
|
+
if invalid_flag:
|
|
212
|
+
# Allow output to mismatch for invalid input
|
|
213
|
+
print(
|
|
214
|
+
"Output mismatch for invalid input",
|
|
215
|
+
input_tuple,
|
|
216
|
+
result_i,
|
|
217
|
+
expected_i,
|
|
218
|
+
)
|
|
219
|
+
else:
|
|
220
|
+
raise
|
|
221
|
+
|
|
222
|
+
def signed_unsigned_cmp_test(self, comparison_ufunc):
|
|
223
|
+
self.basic_ufunc_test(comparison_ufunc)
|
|
224
|
+
|
|
225
|
+
if numpy_support.numpy_version < (1, 25):
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
# Test additional implementations that specifically handle signed /
|
|
229
|
+
# unsigned comparisons added in NumPy 1.25:
|
|
230
|
+
# https://github.com/numpy/numpy/pull/23713
|
|
231
|
+
additional_inputs = (
|
|
232
|
+
(np.int64(-1), np.uint64(0)),
|
|
233
|
+
(np.int64(-1), np.uint64(1)),
|
|
234
|
+
(np.int64(0), np.uint64(0)),
|
|
235
|
+
(np.int64(0), np.uint64(1)),
|
|
236
|
+
(np.int64(1), np.uint64(0)),
|
|
237
|
+
(np.int64(1), np.uint64(1)),
|
|
238
|
+
(np.uint64(0), np.int64(-1)),
|
|
239
|
+
(np.uint64(0), np.int64(0)),
|
|
240
|
+
(np.uint64(0), np.int64(1)),
|
|
241
|
+
(np.uint64(1), np.int64(-1)),
|
|
242
|
+
(np.uint64(1), np.int64(0)),
|
|
243
|
+
(np.uint64(1), np.int64(1)),
|
|
244
|
+
(
|
|
245
|
+
np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64),
|
|
246
|
+
np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64),
|
|
247
|
+
),
|
|
248
|
+
(
|
|
249
|
+
np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64),
|
|
250
|
+
np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64),
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
pyfunc = self._make_ufunc_usecase(comparison_ufunc)
|
|
255
|
+
|
|
256
|
+
for a, b in additional_inputs:
|
|
257
|
+
input_types = (typeof(a), typeof(b))
|
|
258
|
+
output_type = types.Array(types.bool_, 1, "C")
|
|
259
|
+
argtys = input_types + (output_type,)
|
|
260
|
+
cfunc = self._compile(pyfunc, argtys)
|
|
261
|
+
|
|
262
|
+
if isinstance(a, np.ndarray):
|
|
263
|
+
result = np.zeros(a.shape, dtype=np.bool_)
|
|
264
|
+
else:
|
|
265
|
+
result = np.zeros(1, dtype=np.bool_)
|
|
266
|
+
|
|
267
|
+
expected = np.zeros_like(result)
|
|
268
|
+
|
|
269
|
+
pyfunc(a, b, expected)
|
|
270
|
+
cfunc(a, b, result)
|
|
271
|
+
np.testing.assert_equal(expected, result)
|
|
11
272
|
|
|
12
273
|
|
|
13
274
|
def _make_ufunc_usecase(ufunc):
|
|
@@ -2,17 +2,123 @@
|
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
|
+
import math
|
|
5
6
|
|
|
6
|
-
from numba import vectorize, cuda
|
|
7
|
-
from numba.tests.npyufunc.test_vectorize_decor import (
|
|
8
|
-
BaseVectorizeDecor,
|
|
9
|
-
BaseVectorizeNopythonArg,
|
|
10
|
-
BaseVectorizeUnrecognizedArg,
|
|
11
|
-
)
|
|
7
|
+
from numba import vectorize, cuda, int32, uint32, float32, float64
|
|
12
8
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
|
9
|
+
from numba.cuda.tests.support import CheckWarningsMixin
|
|
10
|
+
|
|
13
11
|
import unittest
|
|
14
12
|
|
|
15
13
|
|
|
14
|
+
pi = math.pi
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def sinc(x):
|
|
18
|
+
if x == 0.0:
|
|
19
|
+
return 1.0
|
|
20
|
+
else:
|
|
21
|
+
return math.sin(x * pi) / (pi * x)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def scaled_sinc(x, scale):
|
|
25
|
+
if x == 0.0:
|
|
26
|
+
return scale
|
|
27
|
+
else:
|
|
28
|
+
return scale * (math.sin(x * pi) / (pi * x))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def vector_add(a, b):
|
|
32
|
+
return a + b
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BaseVectorizeDecor(object):
|
|
36
|
+
target = None
|
|
37
|
+
wrapper = None
|
|
38
|
+
funcs = {
|
|
39
|
+
"func1": sinc,
|
|
40
|
+
"func2": scaled_sinc,
|
|
41
|
+
"func3": vector_add,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def _run_and_compare(cls, func, sig, A, *args, **kwargs):
|
|
46
|
+
if cls.wrapper is not None:
|
|
47
|
+
func = cls.wrapper(func)
|
|
48
|
+
numba_func = vectorize(sig, target=cls.target)(func)
|
|
49
|
+
numpy_func = np.vectorize(func)
|
|
50
|
+
result = numba_func(A, *args)
|
|
51
|
+
gold = numpy_func(A, *args)
|
|
52
|
+
np.testing.assert_allclose(result, gold, **kwargs)
|
|
53
|
+
|
|
54
|
+
def test_1(self):
|
|
55
|
+
sig = ["float64(float64)", "float32(float32)"]
|
|
56
|
+
func = self.funcs["func1"]
|
|
57
|
+
A = np.arange(100, dtype=np.float64)
|
|
58
|
+
self._run_and_compare(func, sig, A)
|
|
59
|
+
|
|
60
|
+
def test_2(self):
|
|
61
|
+
sig = [float64(float64), float32(float32)]
|
|
62
|
+
func = self.funcs["func1"]
|
|
63
|
+
A = np.arange(100, dtype=np.float64)
|
|
64
|
+
self._run_and_compare(func, sig, A)
|
|
65
|
+
|
|
66
|
+
def test_3(self):
|
|
67
|
+
sig = ["float64(float64, uint32)"]
|
|
68
|
+
func = self.funcs["func2"]
|
|
69
|
+
A = np.arange(100, dtype=np.float64)
|
|
70
|
+
scale = np.uint32(3)
|
|
71
|
+
self._run_and_compare(func, sig, A, scale, atol=1e-8)
|
|
72
|
+
|
|
73
|
+
def test_4(self):
|
|
74
|
+
sig = [
|
|
75
|
+
int32(int32, int32),
|
|
76
|
+
uint32(uint32, uint32),
|
|
77
|
+
float32(float32, float32),
|
|
78
|
+
float64(float64, float64),
|
|
79
|
+
]
|
|
80
|
+
func = self.funcs["func3"]
|
|
81
|
+
A = np.arange(100, dtype=np.float64)
|
|
82
|
+
self._run_and_compare(func, sig, A, A)
|
|
83
|
+
A = A.astype(np.float32)
|
|
84
|
+
self._run_and_compare(func, sig, A, A)
|
|
85
|
+
A = A.astype(np.int32)
|
|
86
|
+
self._run_and_compare(func, sig, A, A)
|
|
87
|
+
A = A.astype(np.uint32)
|
|
88
|
+
self._run_and_compare(func, sig, A, A)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class BaseVectorizeNopythonArg(unittest.TestCase, CheckWarningsMixin):
|
|
92
|
+
"""
|
|
93
|
+
Test passing the nopython argument to the vectorize decorator.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def _test_target_nopython(self, target, warnings, with_sig=True):
|
|
97
|
+
a = np.array([2.0], dtype=np.float32)
|
|
98
|
+
b = np.array([3.0], dtype=np.float32)
|
|
99
|
+
sig = [float32(float32, float32)]
|
|
100
|
+
args = with_sig and [sig] or []
|
|
101
|
+
with self.check_warnings(warnings):
|
|
102
|
+
f = vectorize(*args, target=target, nopython=True)(vector_add)
|
|
103
|
+
f(a, b)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class BaseVectorizeUnrecognizedArg(unittest.TestCase, CheckWarningsMixin):
|
|
107
|
+
"""
|
|
108
|
+
Test passing an unrecognized argument to the vectorize decorator.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def _test_target_unrecognized_arg(self, target, with_sig=True):
|
|
112
|
+
a = np.array([2.0], dtype=np.float32)
|
|
113
|
+
b = np.array([3.0], dtype=np.float32)
|
|
114
|
+
sig = [float32(float32, float32)]
|
|
115
|
+
args = with_sig and [sig] or []
|
|
116
|
+
with self.assertRaises(KeyError) as raises:
|
|
117
|
+
f = vectorize(*args, target=target, nonexistent=2)(vector_add)
|
|
118
|
+
f(a, b)
|
|
119
|
+
self.assertIn("Unrecognized options", str(raises.exception))
|
|
120
|
+
|
|
121
|
+
|
|
16
122
|
@skip_on_cudasim("ufunc API unsupported in the simulator")
|
|
17
123
|
class TestVectorizeDecor(CUDATestCase, BaseVectorizeDecor):
|
|
18
124
|
"""
|
|
@@ -7,7 +7,7 @@ import numpy as np
|
|
|
7
7
|
from numba import cuda, int32, int64, float32, float64
|
|
8
8
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
9
9
|
from numba.cuda.compiler import compile_ptx
|
|
10
|
-
from numba.core import config
|
|
10
|
+
from numba.cuda.core import config
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def useful_syncwarp(ary):
|
|
@@ -10,13 +10,11 @@ from numba.cuda.testing import (
|
|
|
10
10
|
skip_on_cudasim,
|
|
11
11
|
skip_if_cudadevrt_missing,
|
|
12
12
|
skip_unless_cc_60,
|
|
13
|
-
skip_if_mvc_enabled,
|
|
14
13
|
)
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
@skip_if_cudadevrt_missing
|
|
18
17
|
@skip_unless_cc_60
|
|
19
|
-
@skip_if_mvc_enabled("CG not supported with MVC")
|
|
20
18
|
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
|
21
19
|
class TestCooperativeGroups(CUDATestCase):
|
|
22
20
|
def test_ex_grid_sync(self):
|
|
@@ -90,7 +90,8 @@ class TestFFI(CUDATestCase):
|
|
|
90
90
|
|
|
91
91
|
def test_ex_extra_includes(self):
|
|
92
92
|
import numpy as np
|
|
93
|
-
from numba import cuda
|
|
93
|
+
from numba import cuda
|
|
94
|
+
from numba.cuda import config
|
|
94
95
|
import os
|
|
95
96
|
|
|
96
97
|
basedir = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -103,7 +104,7 @@ class TestFFI(CUDATestCase):
|
|
|
103
104
|
includedir = ":".join([mul_dir, add_dir])
|
|
104
105
|
with override_config("CUDA_NVRTC_EXTRA_SEARCH_PATHS", includedir):
|
|
105
106
|
# magictoken.ex_extra_search_paths.begin
|
|
106
|
-
from numba import config
|
|
107
|
+
from numba.cuda import config
|
|
107
108
|
|
|
108
109
|
includedir = ":".join([mul_dir, add_dir])
|
|
109
110
|
config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = includedir
|
|
@@ -8,14 +8,12 @@ from numba.cuda.testing import (
|
|
|
8
8
|
skip_if_cudadevrt_missing,
|
|
9
9
|
skip_on_cudasim,
|
|
10
10
|
skip_unless_cc_60,
|
|
11
|
-
skip_if_mvc_enabled,
|
|
12
11
|
)
|
|
13
12
|
from numba.cuda.tests.support import captured_stdout
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
@skip_if_cudadevrt_missing
|
|
17
16
|
@skip_unless_cc_60
|
|
18
|
-
@skip_if_mvc_enabled("CG not supported with MVC")
|
|
19
17
|
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
|
20
18
|
class TestLaplace(CUDATestCase):
|
|
21
19
|
"""
|
|
@@ -8,14 +8,12 @@ from numba.cuda.testing import (
|
|
|
8
8
|
skip_if_cudadevrt_missing,
|
|
9
9
|
skip_on_cudasim,
|
|
10
10
|
skip_unless_cc_60,
|
|
11
|
-
skip_if_mvc_enabled,
|
|
12
11
|
)
|
|
13
12
|
from numba.cuda.tests.support import captured_stdout
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
@skip_if_cudadevrt_missing
|
|
17
16
|
@skip_unless_cc_60
|
|
18
|
-
@skip_if_mvc_enabled("CG not supported with MVC")
|
|
19
17
|
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
|
20
18
|
class TestSessionization(CUDATestCase):
|
|
21
19
|
"""
|
|
@@ -26,6 +26,9 @@ class TestImport(unittest.TestCase):
|
|
|
26
26
|
"numba.cpython.mathimpl",
|
|
27
27
|
"numba.cpython.printimpl",
|
|
28
28
|
"numba.cpython.randomimpl",
|
|
29
|
+
"numba.cuda.cpython.numbers",
|
|
30
|
+
"numba.cuda.cpython.cmathimpl",
|
|
31
|
+
"numba.cuda.cpython.mathimpl",
|
|
29
32
|
"numba.core.optional",
|
|
30
33
|
"numba.misc.gdb_hook",
|
|
31
34
|
"numba.misc.literal",
|
|
@@ -37,7 +40,6 @@ class TestImport(unittest.TestCase):
|
|
|
37
40
|
"numba.np.npyimpl",
|
|
38
41
|
"numba.typed.typeddict",
|
|
39
42
|
"numba.typed.typedlist",
|
|
40
|
-
"numba.experimental.jitclass.base",
|
|
41
43
|
)
|
|
42
44
|
|
|
43
45
|
code = "import sys; from numba import cuda; print(list(sys.modules))"
|
|
@@ -6,7 +6,8 @@ import os
|
|
|
6
6
|
import multiprocessing as mp
|
|
7
7
|
import warnings
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
|
|
10
|
+
from numba.cuda.core.config import IS_WIN32
|
|
10
11
|
from numba.core.errors import NumbaWarning
|
|
11
12
|
from numba.cuda.cudadrv import nvvm
|
|
12
13
|
from numba.cuda.testing import (
|
|
@@ -19,6 +20,7 @@ from numba.cuda.cuda_paths import (
|
|
|
19
20
|
_get_nvvm_path_decision,
|
|
20
21
|
_get_cudalib_dir_path_decision,
|
|
21
22
|
get_system_ctk,
|
|
23
|
+
get_system_ctk_libdir,
|
|
22
24
|
)
|
|
23
25
|
|
|
24
26
|
|
|
@@ -102,10 +104,12 @@ class TestLibDeviceLookUp(LibraryLookupBase):
|
|
|
102
104
|
# Check that CUDA_HOME works by removing conda-env
|
|
103
105
|
by, info, warns = self.remote_do(self.do_set_cuda_home)
|
|
104
106
|
self.assertEqual(by, "CUDA_HOME")
|
|
105
|
-
self.
|
|
107
|
+
self.assertTrue(
|
|
108
|
+
info.startswith(os.path.join("mycudahome", "nvvm", "libdevice"))
|
|
109
|
+
)
|
|
106
110
|
self.assertFalse(warns)
|
|
107
111
|
|
|
108
|
-
if get_system_ctk() is None:
|
|
112
|
+
if get_system_ctk("nvvm", "libdevice") is None:
|
|
109
113
|
# Fake remove conda environment so no cudatoolkit is available
|
|
110
114
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
111
115
|
self.assertEqual(by, "<unknown>")
|
|
@@ -148,13 +152,16 @@ class TestNvvmLookUp(LibraryLookupBase):
|
|
|
148
152
|
self.assertEqual(by, "CUDA_HOME")
|
|
149
153
|
self.assertFalse(warns)
|
|
150
154
|
if IS_WIN32:
|
|
151
|
-
self.assertEqual(
|
|
152
|
-
|
|
153
|
-
|
|
155
|
+
self.assertEqual(
|
|
156
|
+
os.path.dirname(info), os.path.join("mycudahome", "nvvm", "bin")
|
|
157
|
+
)
|
|
154
158
|
else:
|
|
155
|
-
self.assertEqual(
|
|
159
|
+
self.assertEqual(
|
|
160
|
+
os.path.dirname(info),
|
|
161
|
+
os.path.join("mycudahome", "nvvm", "lib64"),
|
|
162
|
+
)
|
|
156
163
|
|
|
157
|
-
if get_system_ctk() is None:
|
|
164
|
+
if get_system_ctk("nvvm") is None:
|
|
158
165
|
# Fake remove conda environment so no cudatoolkit is available
|
|
159
166
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
160
167
|
self.assertEqual(by, "<unknown>")
|
|
@@ -199,12 +206,17 @@ class TestCudaLibLookUp(LibraryLookupBase):
|
|
|
199
206
|
self.assertEqual(by, "CUDA_HOME")
|
|
200
207
|
self.assertFalse(warns)
|
|
201
208
|
if IS_WIN32:
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
209
|
+
# I think only wheels don't have the "Library" directory?
|
|
210
|
+
self.assertTrue(
|
|
211
|
+
info
|
|
212
|
+
in (
|
|
213
|
+
os.path.join("mycudahome", "bin"),
|
|
214
|
+
os.path.join("mycudahome", "Library", "bin"),
|
|
215
|
+
)
|
|
216
|
+
)
|
|
205
217
|
else:
|
|
206
218
|
self.assertEqual(info, os.path.join("mycudahome", "lib64"))
|
|
207
|
-
if
|
|
219
|
+
if get_system_ctk_libdir() is None:
|
|
208
220
|
# Fake remove conda environment so no cudatoolkit is available
|
|
209
221
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
|
210
222
|
self.assertEqual(by, "<unknown>")
|
|
@@ -10,9 +10,10 @@ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
|
10
10
|
from numba.cuda.tests.support import run_in_subprocess, override_config
|
|
11
11
|
from numba.cuda import get_current_device
|
|
12
12
|
from numba.cuda.cudadrv.nvrtc import compile
|
|
13
|
-
from numba import
|
|
13
|
+
from numba import types
|
|
14
14
|
from numba.core.typing import signature
|
|
15
15
|
from numba import cuda
|
|
16
|
+
from numba.cuda import config
|
|
16
17
|
from numba.cuda.typing.templates import AbstractTemplate
|
|
17
18
|
from numba.cuda.cudadrv.linkable_code import (
|
|
18
19
|
CUSource,
|