numba-cuda 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (171) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +4 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  129. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  130. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  135. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  140. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  142. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  143. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  145. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  146. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  147. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  148. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  151. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  152. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  153. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  154. numba_cuda/numba/cuda/tests/support.py +55 -15
  155. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  156. numba_cuda/numba/cuda/types.py +56 -0
  157. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  158. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  159. numba_cuda/numba/cuda/typing/context.py +751 -0
  160. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  161. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  162. numba_cuda/numba/cuda/typing/templates.py +7 -6
  163. numba_cuda/numba/cuda/ufuncs.py +3 -3
  164. numba_cuda/numba/cuda/utils.py +6 -112
  165. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +2 -1
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/RECORD +170 -115
  167. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  168. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE.numba +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
@@ -2,12 +2,273 @@
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
4
  import functools
5
+ import warnings
5
6
  import numpy as np
6
7
  import unittest
7
8
 
8
- from numba import config, cuda, types
9
+ from numba import cuda, types, njit, typeof
10
+ from numba.cuda import config
11
+ from numba.np import numpy_support
9
12
  from numba.cuda.tests.support import TestCase
10
- from numba.tests.test_ufuncs import BasicUFuncTest
13
+ from numba.cuda.tests.support import MemoryLeakMixin
14
+
15
+
16
+ class BaseUFuncTest(MemoryLeakMixin):
17
+ def setUp(self):
18
+ super(BaseUFuncTest, self).setUp()
19
+ self.inputs = [
20
+ (np.uint32(0), types.uint32),
21
+ (np.uint32(1), types.uint32),
22
+ (np.int32(-1), types.int32),
23
+ (np.int32(0), types.int32),
24
+ (np.int32(1), types.int32),
25
+ (np.uint64(0), types.uint64),
26
+ (np.uint64(1), types.uint64),
27
+ (np.int64(-1), types.int64),
28
+ (np.int64(0), types.int64),
29
+ (np.int64(1), types.int64),
30
+ (np.float32(-0.5), types.float32),
31
+ (np.float32(0.0), types.float32),
32
+ (np.float32(0.5), types.float32),
33
+ (np.float64(-0.5), types.float64),
34
+ (np.float64(0.0), types.float64),
35
+ (np.float64(0.5), types.float64),
36
+ (np.array([0, 1], dtype="u4"), types.Array(types.uint32, 1, "C")),
37
+ (np.array([0, 1], dtype="u8"), types.Array(types.uint64, 1, "C")),
38
+ (
39
+ np.array([-1, 0, 1], dtype="i4"),
40
+ types.Array(types.int32, 1, "C"),
41
+ ),
42
+ (
43
+ np.array([-1, 0, 1], dtype="i8"),
44
+ types.Array(types.int64, 1, "C"),
45
+ ),
46
+ (
47
+ np.array([-0.5, 0.0, 0.5], dtype="f4"),
48
+ types.Array(types.float32, 1, "C"),
49
+ ),
50
+ (
51
+ np.array([-0.5, 0.0, 0.5], dtype="f8"),
52
+ types.Array(types.float64, 1, "C"),
53
+ ),
54
+ (np.array([0, 1], dtype=np.int8), types.Array(types.int8, 1, "C")),
55
+ (
56
+ np.array([0, 1], dtype=np.int16),
57
+ types.Array(types.int16, 1, "C"),
58
+ ),
59
+ (
60
+ np.array([0, 1], dtype=np.uint8),
61
+ types.Array(types.uint8, 1, "C"),
62
+ ),
63
+ (
64
+ np.array([0, 1], dtype=np.uint16),
65
+ types.Array(types.uint16, 1, "C"),
66
+ ),
67
+ ]
68
+
69
+ @functools.lru_cache(maxsize=None)
70
+ def _compile(self, pyfunc, args, nrt=False):
71
+ # NOTE: to test the implementation of Numpy ufuncs, we disable
72
+ # rewriting of array expressions.
73
+ return njit(args, _nrt=nrt, no_rewrites=True)(pyfunc)
74
+
75
+ def _determine_output_type(
76
+ self, input_type, int_output_type=None, float_output_type=None
77
+ ):
78
+ ty = input_type
79
+ if isinstance(ty, types.Array):
80
+ ndim = ty.ndim
81
+ ty = ty.dtype
82
+ else:
83
+ ndim = 1
84
+
85
+ if ty in types.signed_domain:
86
+ if int_output_type:
87
+ output_type = types.Array(int_output_type, ndim, "C")
88
+ else:
89
+ output_type = types.Array(ty, ndim, "C")
90
+ elif ty in types.unsigned_domain:
91
+ if int_output_type:
92
+ output_type = types.Array(int_output_type, ndim, "C")
93
+ else:
94
+ output_type = types.Array(ty, ndim, "C")
95
+ else:
96
+ if float_output_type:
97
+ output_type = types.Array(float_output_type, ndim, "C")
98
+ else:
99
+ output_type = types.Array(ty, ndim, "C")
100
+ return output_type
101
+
102
+
103
+ class BasicUFuncTest(BaseUFuncTest):
104
+ def _make_ufunc_usecase(self, ufunc):
105
+ return _make_ufunc_usecase(ufunc)
106
+
107
+ def basic_ufunc_test(
108
+ self,
109
+ ufunc,
110
+ skip_inputs=[],
111
+ additional_inputs=[],
112
+ int_output_type=None,
113
+ float_output_type=None,
114
+ kinds="ifc",
115
+ positive_only=False,
116
+ ):
117
+ # Necessary to avoid some Numpy warnings being silenced, despite
118
+ # the simplefilter() call below.
119
+ self.reset_module_warnings(__name__)
120
+
121
+ pyfunc = self._make_ufunc_usecase(ufunc)
122
+
123
+ inputs = list(self.inputs) + additional_inputs
124
+
125
+ for input_tuple in inputs:
126
+ input_operand = input_tuple[0]
127
+ input_type = input_tuple[1]
128
+
129
+ is_tuple = isinstance(input_operand, tuple)
130
+ if is_tuple:
131
+ args = input_operand
132
+ else:
133
+ args = (input_operand,) * ufunc.nin
134
+
135
+ if input_type in skip_inputs:
136
+ continue
137
+ if positive_only and np.any(args[0] < 0):
138
+ continue
139
+
140
+ # Some ufuncs don't allow all kinds of arguments
141
+ if args[0].dtype.kind not in kinds:
142
+ continue
143
+
144
+ output_type = self._determine_output_type(
145
+ input_type, int_output_type, float_output_type
146
+ )
147
+
148
+ input_types = (input_type,) * ufunc.nin
149
+ output_types = (output_type,) * ufunc.nout
150
+ argtys = input_types + output_types
151
+ cfunc = self._compile(pyfunc, argtys)
152
+
153
+ if isinstance(args[0], np.ndarray):
154
+ results = [
155
+ np.zeros(args[0].shape, dtype=out_ty.dtype.name)
156
+ for out_ty in output_types
157
+ ]
158
+ expected = [
159
+ np.zeros(args[0].shape, dtype=out_ty.dtype.name)
160
+ for out_ty in output_types
161
+ ]
162
+ else:
163
+ results = [
164
+ np.zeros(1, dtype=out_ty.dtype.name)
165
+ for out_ty in output_types
166
+ ]
167
+ expected = [
168
+ np.zeros(1, dtype=out_ty.dtype.name)
169
+ for out_ty in output_types
170
+ ]
171
+
172
+ invalid_flag = False
173
+ with warnings.catch_warnings(record=True) as warnlist:
174
+ warnings.simplefilter("always")
175
+ pyfunc(*args, *expected)
176
+
177
+ warnmsg = "invalid value encountered"
178
+ for thiswarn in warnlist:
179
+ if issubclass(thiswarn.category, RuntimeWarning) and str(
180
+ thiswarn.message
181
+ ).startswith(warnmsg):
182
+ invalid_flag = True
183
+
184
+ cfunc(*args, *results)
185
+
186
+ for expected_i, result_i in zip(expected, results):
187
+ msg = "\n".join(
188
+ [
189
+ "ufunc '{0}' failed",
190
+ "inputs ({1}):",
191
+ "{2}",
192
+ "got({3})",
193
+ "{4}",
194
+ "expected ({5}):",
195
+ "{6}",
196
+ ]
197
+ ).format(
198
+ ufunc.__name__,
199
+ input_type,
200
+ input_operand,
201
+ output_type,
202
+ result_i,
203
+ expected_i.dtype,
204
+ expected_i,
205
+ )
206
+ try:
207
+ np.testing.assert_array_almost_equal(
208
+ expected_i, result_i, decimal=5, err_msg=msg
209
+ )
210
+ except AssertionError:
211
+ if invalid_flag:
212
+ # Allow output to mismatch for invalid input
213
+ print(
214
+ "Output mismatch for invalid input",
215
+ input_tuple,
216
+ result_i,
217
+ expected_i,
218
+ )
219
+ else:
220
+ raise
221
+
222
+ def signed_unsigned_cmp_test(self, comparison_ufunc):
223
+ self.basic_ufunc_test(comparison_ufunc)
224
+
225
+ if numpy_support.numpy_version < (1, 25):
226
+ return
227
+
228
+ # Test additional implementations that specifically handle signed /
229
+ # unsigned comparisons added in NumPy 1.25:
230
+ # https://github.com/numpy/numpy/pull/23713
231
+ additional_inputs = (
232
+ (np.int64(-1), np.uint64(0)),
233
+ (np.int64(-1), np.uint64(1)),
234
+ (np.int64(0), np.uint64(0)),
235
+ (np.int64(0), np.uint64(1)),
236
+ (np.int64(1), np.uint64(0)),
237
+ (np.int64(1), np.uint64(1)),
238
+ (np.uint64(0), np.int64(-1)),
239
+ (np.uint64(0), np.int64(0)),
240
+ (np.uint64(0), np.int64(1)),
241
+ (np.uint64(1), np.int64(-1)),
242
+ (np.uint64(1), np.int64(0)),
243
+ (np.uint64(1), np.int64(1)),
244
+ (
245
+ np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64),
246
+ np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64),
247
+ ),
248
+ (
249
+ np.array([0, 1, 0, 1, 0, 1], dtype=np.uint64),
250
+ np.array([-1, -1, 0, 0, 1, 1], dtype=np.int64),
251
+ ),
252
+ )
253
+
254
+ pyfunc = self._make_ufunc_usecase(comparison_ufunc)
255
+
256
+ for a, b in additional_inputs:
257
+ input_types = (typeof(a), typeof(b))
258
+ output_type = types.Array(types.bool_, 1, "C")
259
+ argtys = input_types + (output_type,)
260
+ cfunc = self._compile(pyfunc, argtys)
261
+
262
+ if isinstance(a, np.ndarray):
263
+ result = np.zeros(a.shape, dtype=np.bool_)
264
+ else:
265
+ result = np.zeros(1, dtype=np.bool_)
266
+
267
+ expected = np.zeros_like(result)
268
+
269
+ pyfunc(a, b, expected)
270
+ cfunc(a, b, result)
271
+ np.testing.assert_equal(expected, result)
11
272
 
12
273
 
13
274
  def _make_ufunc_usecase(ufunc):
@@ -3,7 +3,7 @@
3
3
 
4
4
  from numba.cuda.testing import unittest, CUDATestCase
5
5
  from numba import cuda
6
- from numba.core import config
6
+ from numba.cuda.core import config
7
7
 
8
8
 
9
9
  class MyError(Exception):
@@ -11,7 +11,7 @@ corresponding vector type from `cuda` module in kernel to use them.
11
11
 
12
12
  import numpy as np
13
13
 
14
- from numba.core import config
14
+ from numba.cuda.core import config
15
15
  from numba.cuda.testing import CUDATestCase
16
16
 
17
17
  from numba import cuda
@@ -2,17 +2,123 @@
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
4
  import numpy as np
5
+ import math
5
6
 
6
- from numba import vectorize, cuda
7
- from numba.tests.npyufunc.test_vectorize_decor import (
8
- BaseVectorizeDecor,
9
- BaseVectorizeNopythonArg,
10
- BaseVectorizeUnrecognizedArg,
11
- )
7
+ from numba import vectorize, cuda, int32, uint32, float32, float64
12
8
  from numba.cuda.testing import skip_on_cudasim, CUDATestCase
9
+ from numba.cuda.tests.support import CheckWarningsMixin
10
+
13
11
  import unittest
14
12
 
15
13
 
14
+ pi = math.pi
15
+
16
+
17
+ def sinc(x):
18
+ if x == 0.0:
19
+ return 1.0
20
+ else:
21
+ return math.sin(x * pi) / (pi * x)
22
+
23
+
24
+ def scaled_sinc(x, scale):
25
+ if x == 0.0:
26
+ return scale
27
+ else:
28
+ return scale * (math.sin(x * pi) / (pi * x))
29
+
30
+
31
+ def vector_add(a, b):
32
+ return a + b
33
+
34
+
35
+ class BaseVectorizeDecor(object):
36
+ target = None
37
+ wrapper = None
38
+ funcs = {
39
+ "func1": sinc,
40
+ "func2": scaled_sinc,
41
+ "func3": vector_add,
42
+ }
43
+
44
+ @classmethod
45
+ def _run_and_compare(cls, func, sig, A, *args, **kwargs):
46
+ if cls.wrapper is not None:
47
+ func = cls.wrapper(func)
48
+ numba_func = vectorize(sig, target=cls.target)(func)
49
+ numpy_func = np.vectorize(func)
50
+ result = numba_func(A, *args)
51
+ gold = numpy_func(A, *args)
52
+ np.testing.assert_allclose(result, gold, **kwargs)
53
+
54
+ def test_1(self):
55
+ sig = ["float64(float64)", "float32(float32)"]
56
+ func = self.funcs["func1"]
57
+ A = np.arange(100, dtype=np.float64)
58
+ self._run_and_compare(func, sig, A)
59
+
60
+ def test_2(self):
61
+ sig = [float64(float64), float32(float32)]
62
+ func = self.funcs["func1"]
63
+ A = np.arange(100, dtype=np.float64)
64
+ self._run_and_compare(func, sig, A)
65
+
66
+ def test_3(self):
67
+ sig = ["float64(float64, uint32)"]
68
+ func = self.funcs["func2"]
69
+ A = np.arange(100, dtype=np.float64)
70
+ scale = np.uint32(3)
71
+ self._run_and_compare(func, sig, A, scale, atol=1e-8)
72
+
73
+ def test_4(self):
74
+ sig = [
75
+ int32(int32, int32),
76
+ uint32(uint32, uint32),
77
+ float32(float32, float32),
78
+ float64(float64, float64),
79
+ ]
80
+ func = self.funcs["func3"]
81
+ A = np.arange(100, dtype=np.float64)
82
+ self._run_and_compare(func, sig, A, A)
83
+ A = A.astype(np.float32)
84
+ self._run_and_compare(func, sig, A, A)
85
+ A = A.astype(np.int32)
86
+ self._run_and_compare(func, sig, A, A)
87
+ A = A.astype(np.uint32)
88
+ self._run_and_compare(func, sig, A, A)
89
+
90
+
91
+ class BaseVectorizeNopythonArg(unittest.TestCase, CheckWarningsMixin):
92
+ """
93
+ Test passing the nopython argument to the vectorize decorator.
94
+ """
95
+
96
+ def _test_target_nopython(self, target, warnings, with_sig=True):
97
+ a = np.array([2.0], dtype=np.float32)
98
+ b = np.array([3.0], dtype=np.float32)
99
+ sig = [float32(float32, float32)]
100
+ args = with_sig and [sig] or []
101
+ with self.check_warnings(warnings):
102
+ f = vectorize(*args, target=target, nopython=True)(vector_add)
103
+ f(a, b)
104
+
105
+
106
+ class BaseVectorizeUnrecognizedArg(unittest.TestCase, CheckWarningsMixin):
107
+ """
108
+ Test passing an unrecognized argument to the vectorize decorator.
109
+ """
110
+
111
+ def _test_target_unrecognized_arg(self, target, with_sig=True):
112
+ a = np.array([2.0], dtype=np.float32)
113
+ b = np.array([3.0], dtype=np.float32)
114
+ sig = [float32(float32, float32)]
115
+ args = with_sig and [sig] or []
116
+ with self.assertRaises(KeyError) as raises:
117
+ f = vectorize(*args, target=target, nonexistent=2)(vector_add)
118
+ f(a, b)
119
+ self.assertIn("Unrecognized options", str(raises.exception))
120
+
121
+
16
122
  @skip_on_cudasim("ufunc API unsupported in the simulator")
17
123
  class TestVectorizeDecor(CUDATestCase, BaseVectorizeDecor):
18
124
  """
@@ -15,7 +15,7 @@ from numba.cuda.tests.support import (
15
15
  run_in_subprocess,
16
16
  )
17
17
  from numba.core.errors import NumbaPerformanceWarning
18
- from numba.core import config
18
+ from numba.cuda.core import config
19
19
  import warnings
20
20
 
21
21
 
@@ -7,7 +7,7 @@ import numpy as np
7
7
  from numba import cuda, int32, int64, float32, float64
8
8
  from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
9
9
  from numba.cuda.compiler import compile_ptx
10
- from numba.core import config
10
+ from numba.cuda.core import config
11
11
 
12
12
 
13
13
  def useful_syncwarp(ary):
@@ -10,13 +10,11 @@ from numba.cuda.testing import (
10
10
  skip_on_cudasim,
11
11
  skip_if_cudadevrt_missing,
12
12
  skip_unless_cc_60,
13
- skip_if_mvc_enabled,
14
13
  )
15
14
 
16
15
 
17
16
  @skip_if_cudadevrt_missing
18
17
  @skip_unless_cc_60
19
- @skip_if_mvc_enabled("CG not supported with MVC")
20
18
  @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
21
19
  class TestCooperativeGroups(CUDATestCase):
22
20
  def test_ex_grid_sync(self):
@@ -90,7 +90,8 @@ class TestFFI(CUDATestCase):
90
90
 
91
91
  def test_ex_extra_includes(self):
92
92
  import numpy as np
93
- from numba import cuda, config
93
+ from numba import cuda
94
+ from numba.cuda import config
94
95
  import os
95
96
 
96
97
  basedir = os.path.dirname(os.path.abspath(__file__))
@@ -103,7 +104,7 @@ class TestFFI(CUDATestCase):
103
104
  includedir = ":".join([mul_dir, add_dir])
104
105
  with override_config("CUDA_NVRTC_EXTRA_SEARCH_PATHS", includedir):
105
106
  # magictoken.ex_extra_search_paths.begin
106
- from numba import config
107
+ from numba.cuda import config
107
108
 
108
109
  includedir = ":".join([mul_dir, add_dir])
109
110
  config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = includedir
@@ -8,14 +8,12 @@ from numba.cuda.testing import (
8
8
  skip_if_cudadevrt_missing,
9
9
  skip_on_cudasim,
10
10
  skip_unless_cc_60,
11
- skip_if_mvc_enabled,
12
11
  )
13
12
  from numba.cuda.tests.support import captured_stdout
14
13
 
15
14
 
16
15
  @skip_if_cudadevrt_missing
17
16
  @skip_unless_cc_60
18
- @skip_if_mvc_enabled("CG not supported with MVC")
19
17
  @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
20
18
  class TestLaplace(CUDATestCase):
21
19
  """
@@ -8,14 +8,12 @@ from numba.cuda.testing import (
8
8
  skip_if_cudadevrt_missing,
9
9
  skip_on_cudasim,
10
10
  skip_unless_cc_60,
11
- skip_if_mvc_enabled,
12
11
  )
13
12
  from numba.cuda.tests.support import captured_stdout
14
13
 
15
14
 
16
15
  @skip_if_cudadevrt_missing
17
16
  @skip_unless_cc_60
18
- @skip_if_mvc_enabled("CG not supported with MVC")
19
17
  @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
20
18
  class TestSessionization(CUDATestCase):
21
19
  """
@@ -26,6 +26,9 @@ class TestImport(unittest.TestCase):
26
26
  "numba.cpython.mathimpl",
27
27
  "numba.cpython.printimpl",
28
28
  "numba.cpython.randomimpl",
29
+ "numba.cuda.cpython.numbers",
30
+ "numba.cuda.cpython.cmathimpl",
31
+ "numba.cuda.cpython.mathimpl",
29
32
  "numba.core.optional",
30
33
  "numba.misc.gdb_hook",
31
34
  "numba.misc.literal",
@@ -37,7 +40,6 @@ class TestImport(unittest.TestCase):
37
40
  "numba.np.npyimpl",
38
41
  "numba.typed.typeddict",
39
42
  "numba.typed.typedlist",
40
- "numba.experimental.jitclass.base",
41
43
  )
42
44
 
43
45
  code = "import sys; from numba import cuda; print(list(sys.modules))"
@@ -6,7 +6,8 @@ import os
6
6
  import multiprocessing as mp
7
7
  import warnings
8
8
 
9
- from numba.core.config import IS_WIN32, IS_OSX
9
+
10
+ from numba.cuda.core.config import IS_WIN32
10
11
  from numba.core.errors import NumbaWarning
11
12
  from numba.cuda.cudadrv import nvvm
12
13
  from numba.cuda.testing import (
@@ -19,6 +20,7 @@ from numba.cuda.cuda_paths import (
19
20
  _get_nvvm_path_decision,
20
21
  _get_cudalib_dir_path_decision,
21
22
  get_system_ctk,
23
+ get_system_ctk_libdir,
22
24
  )
23
25
 
24
26
 
@@ -102,10 +104,12 @@ class TestLibDeviceLookUp(LibraryLookupBase):
102
104
  # Check that CUDA_HOME works by removing conda-env
103
105
  by, info, warns = self.remote_do(self.do_set_cuda_home)
104
106
  self.assertEqual(by, "CUDA_HOME")
105
- self.assertEqual(info, os.path.join("mycudahome", "nvvm", "libdevice"))
107
+ self.assertTrue(
108
+ info.startswith(os.path.join("mycudahome", "nvvm", "libdevice"))
109
+ )
106
110
  self.assertFalse(warns)
107
111
 
108
- if get_system_ctk() is None:
112
+ if get_system_ctk("nvvm", "libdevice") is None:
109
113
  # Fake remove conda environment so no cudatoolkit is available
110
114
  by, info, warns = self.remote_do(self.do_clear_envs)
111
115
  self.assertEqual(by, "<unknown>")
@@ -148,13 +152,16 @@ class TestNvvmLookUp(LibraryLookupBase):
148
152
  self.assertEqual(by, "CUDA_HOME")
149
153
  self.assertFalse(warns)
150
154
  if IS_WIN32:
151
- self.assertEqual(info, os.path.join("mycudahome", "nvvm", "bin"))
152
- elif IS_OSX:
153
- self.assertEqual(info, os.path.join("mycudahome", "nvvm", "lib"))
155
+ self.assertEqual(
156
+ os.path.dirname(info), os.path.join("mycudahome", "nvvm", "bin")
157
+ )
154
158
  else:
155
- self.assertEqual(info, os.path.join("mycudahome", "nvvm", "lib64"))
159
+ self.assertEqual(
160
+ os.path.dirname(info),
161
+ os.path.join("mycudahome", "nvvm", "lib64"),
162
+ )
156
163
 
157
- if get_system_ctk() is None:
164
+ if get_system_ctk("nvvm") is None:
158
165
  # Fake remove conda environment so no cudatoolkit is available
159
166
  by, info, warns = self.remote_do(self.do_clear_envs)
160
167
  self.assertEqual(by, "<unknown>")
@@ -199,12 +206,17 @@ class TestCudaLibLookUp(LibraryLookupBase):
199
206
  self.assertEqual(by, "CUDA_HOME")
200
207
  self.assertFalse(warns)
201
208
  if IS_WIN32:
202
- self.assertEqual(info, os.path.join("mycudahome", "bin"))
203
- elif IS_OSX:
204
- self.assertEqual(info, os.path.join("mycudahome", "lib"))
209
+ # I think only wheels don't have the "Library" directory?
210
+ self.assertTrue(
211
+ info
212
+ in (
213
+ os.path.join("mycudahome", "bin"),
214
+ os.path.join("mycudahome", "Library", "bin"),
215
+ )
216
+ )
205
217
  else:
206
218
  self.assertEqual(info, os.path.join("mycudahome", "lib64"))
207
- if get_system_ctk() is None:
219
+ if get_system_ctk_libdir() is None:
208
220
  # Fake remove conda environment so no cudatoolkit is available
209
221
  by, info, warns = self.remote_do(self.do_clear_envs)
210
222
  self.assertEqual(by, "<unknown>")
@@ -10,9 +10,10 @@ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
10
10
  from numba.cuda.tests.support import run_in_subprocess, override_config
11
11
  from numba.cuda import get_current_device
12
12
  from numba.cuda.cudadrv.nvrtc import compile
13
- from numba import config, types
13
+ from numba import types
14
14
  from numba.core.typing import signature
15
15
  from numba import cuda
16
+ from numba.cuda import config
16
17
  from numba.cuda.typing.templates import AbstractTemplate
17
18
  from numba.cuda.cudadrv.linkable_code import (
18
19
  CUSource,