numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,18 @@
|
|
1
1
|
import unittest
|
2
2
|
|
3
|
-
from numba.cuda.testing import (
|
4
|
-
|
5
|
-
|
3
|
+
from numba.cuda.testing import (
|
4
|
+
CUDATestCase,
|
5
|
+
skip_if_cudadevrt_missing,
|
6
|
+
skip_on_cudasim,
|
7
|
+
skip_unless_cc_60,
|
8
|
+
skip_if_mvc_enabled,
|
9
|
+
)
|
6
10
|
from numba.tests.support import captured_stdout
|
7
11
|
|
8
12
|
|
9
13
|
@skip_if_cudadevrt_missing
|
10
14
|
@skip_unless_cc_60
|
11
|
-
@skip_if_mvc_enabled(
|
15
|
+
@skip_if_mvc_enabled("CG not supported with MVC")
|
12
16
|
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
13
17
|
class TestSessionization(CUDATestCase):
|
14
18
|
"""
|
@@ -40,26 +44,71 @@ class TestSessionization(CUDATestCase):
|
|
40
44
|
ids = cuda.to_device(
|
41
45
|
np.array(
|
42
46
|
[
|
43
|
-
1,
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
+
1,
|
48
|
+
1,
|
49
|
+
1,
|
50
|
+
1,
|
51
|
+
1,
|
52
|
+
1,
|
53
|
+
2,
|
54
|
+
2,
|
55
|
+
2,
|
56
|
+
3,
|
57
|
+
3,
|
58
|
+
3,
|
59
|
+
3,
|
60
|
+
3,
|
61
|
+
3,
|
62
|
+
3,
|
63
|
+
3,
|
64
|
+
3,
|
65
|
+
3,
|
66
|
+
4,
|
67
|
+
4,
|
68
|
+
4,
|
69
|
+
4,
|
70
|
+
4,
|
71
|
+
4,
|
72
|
+
4,
|
73
|
+
4,
|
74
|
+
4,
|
47
75
|
]
|
48
76
|
)
|
49
77
|
)
|
50
78
|
sec = cuda.to_device(
|
51
79
|
np.array(
|
52
80
|
[
|
53
|
-
1,
|
54
|
-
2,
|
55
|
-
|
56
|
-
|
57
|
-
|
81
|
+
1,
|
82
|
+
2,
|
83
|
+
3,
|
84
|
+
5000,
|
85
|
+
5001,
|
86
|
+
5002,
|
87
|
+
1,
|
88
|
+
2,
|
89
|
+
3,
|
90
|
+
1,
|
91
|
+
2,
|
92
|
+
5000,
|
93
|
+
5001,
|
94
|
+
10000,
|
95
|
+
10001,
|
96
|
+
10002,
|
97
|
+
10003,
|
98
|
+
15000,
|
99
|
+
150001,
|
100
|
+
1,
|
101
|
+
5000,
|
102
|
+
50001,
|
103
|
+
15000,
|
104
|
+
20000,
|
105
|
+
25000,
|
106
|
+
25001,
|
107
|
+
25002,
|
108
|
+
25003,
|
58
109
|
],
|
59
110
|
dtype="datetime64[ns]",
|
60
|
-
).astype(
|
61
|
-
"int64"
|
62
|
-
) # Cast to int64 for compatibility
|
111
|
+
).astype("int64") # Cast to int64 for compatibility
|
63
112
|
)
|
64
113
|
# Create a vector to hold the results
|
65
114
|
results = cuda.to_device(np.zeros(len(ids)))
|
@@ -105,6 +154,7 @@ class TestSessionization(CUDATestCase):
|
|
105
154
|
if gid + look_ahead == size - 1:
|
106
155
|
results[gid + look_ahead] = gid
|
107
156
|
break
|
157
|
+
|
108
158
|
# ex_sessionize.kernel.end
|
109
159
|
|
110
160
|
# ex_sessionize.launch.begin
|
@@ -119,9 +169,34 @@ class TestSessionization(CUDATestCase):
|
|
119
169
|
# ex_sessionize.launch.end
|
120
170
|
|
121
171
|
expect = [
|
122
|
-
0,
|
123
|
-
|
124
|
-
|
172
|
+
0,
|
173
|
+
0,
|
174
|
+
0,
|
175
|
+
3,
|
176
|
+
3,
|
177
|
+
3,
|
178
|
+
6,
|
179
|
+
6,
|
180
|
+
6,
|
181
|
+
9,
|
182
|
+
9,
|
183
|
+
11,
|
184
|
+
11,
|
185
|
+
13,
|
186
|
+
13,
|
187
|
+
13,
|
188
|
+
13,
|
189
|
+
17,
|
190
|
+
18,
|
191
|
+
19,
|
192
|
+
20,
|
193
|
+
21,
|
194
|
+
21,
|
195
|
+
23,
|
196
|
+
24,
|
197
|
+
24,
|
198
|
+
24,
|
199
|
+
24,
|
125
200
|
]
|
126
201
|
np.testing.assert_equal(expect, results.copy_to_host())
|
127
202
|
|
@@ -37,6 +37,7 @@ class TestVecAdd(CUDATestCase):
|
|
37
37
|
|
38
38
|
if tid < size:
|
39
39
|
c[tid] = a[tid] + b[tid]
|
40
|
+
|
40
41
|
# ex_vecadd.kernel.end
|
41
42
|
|
42
43
|
# Seed RNG for test repeatability
|
@@ -64,8 +65,7 @@ class TestVecAdd(CUDATestCase):
|
|
64
65
|
# ex_vecadd.launch.end
|
65
66
|
|
66
67
|
np.testing.assert_equal(
|
67
|
-
c.copy_to_host(),
|
68
|
-
a.copy_to_host() + b.copy_to_host()
|
68
|
+
c.copy_to_host(), a.copy_to_host() + b.copy_to_host()
|
69
69
|
)
|
70
70
|
|
71
71
|
|
@@ -7,9 +7,8 @@ from numba.cuda.testing import skip_on_cudasim
|
|
7
7
|
|
8
8
|
@skip_on_cudasim("Tests internals of the CUDA driver device array")
|
9
9
|
class TestSlicing(unittest.TestCase):
|
10
|
-
|
11
10
|
def assertSameContig(self, arr, nparr):
|
12
|
-
attrs =
|
11
|
+
attrs = "C_CONTIGUOUS", "F_CONTIGUOUS"
|
13
12
|
for attr in attrs:
|
14
13
|
if arr.flags[attr] != nparr.flags[attr]:
|
15
14
|
if arr.size == 0 and nparr.size == 0:
|
@@ -17,15 +16,18 @@ class TestSlicing(unittest.TestCase):
|
|
17
16
|
# some are not
|
18
17
|
pass
|
19
18
|
else:
|
20
|
-
self.fail(
|
21
|
-
|
19
|
+
self.fail(
|
20
|
+
"contiguous flag mismatch:\ngot=%s\nexpect=%s"
|
21
|
+
% (arr.flags, nparr.flags)
|
22
|
+
)
|
22
23
|
|
23
24
|
#### 1D
|
24
25
|
|
25
26
|
def test_slice0_1d(self):
|
26
27
|
nparr = np.empty(4)
|
27
|
-
arr = Array.from_desc(
|
28
|
-
|
28
|
+
arr = Array.from_desc(
|
29
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
30
|
+
)
|
29
31
|
self.assertSameContig(arr, nparr)
|
30
32
|
xx = -2, -1, 0, 1, 2
|
31
33
|
for x in xx:
|
@@ -37,8 +39,9 @@ class TestSlicing(unittest.TestCase):
|
|
37
39
|
|
38
40
|
def test_slice1_1d(self):
|
39
41
|
nparr = np.empty(4)
|
40
|
-
arr = Array.from_desc(
|
41
|
-
|
42
|
+
arr = Array.from_desc(
|
43
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
44
|
+
)
|
42
45
|
xx = -2, -1, 0, 1, 2
|
43
46
|
for x in xx:
|
44
47
|
expect = nparr[:x]
|
@@ -49,8 +52,9 @@ class TestSlicing(unittest.TestCase):
|
|
49
52
|
|
50
53
|
def test_slice2_1d(self):
|
51
54
|
nparr = np.empty(4)
|
52
|
-
arr = Array.from_desc(
|
53
|
-
|
55
|
+
arr = Array.from_desc(
|
56
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
57
|
+
)
|
54
58
|
xx = -2, -1, 0, 1, 2
|
55
59
|
for x, y in itertools.product(xx, xx):
|
56
60
|
expect = nparr[x:y]
|
@@ -63,8 +67,9 @@ class TestSlicing(unittest.TestCase):
|
|
63
67
|
|
64
68
|
def test_slice0_2d(self):
|
65
69
|
nparr = np.empty((4, 5))
|
66
|
-
arr = Array.from_desc(
|
67
|
-
|
70
|
+
arr = Array.from_desc(
|
71
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
72
|
+
)
|
68
73
|
xx = -2, 0, 1, 2
|
69
74
|
for x in xx:
|
70
75
|
expect = nparr[x:]
|
@@ -82,8 +87,9 @@ class TestSlicing(unittest.TestCase):
|
|
82
87
|
|
83
88
|
def test_slice1_2d(self):
|
84
89
|
nparr = np.empty((4, 5))
|
85
|
-
arr = Array.from_desc(
|
86
|
-
|
90
|
+
arr = Array.from_desc(
|
91
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
92
|
+
)
|
87
93
|
xx = -2, 0, 2
|
88
94
|
for x in xx:
|
89
95
|
expect = nparr[:x]
|
@@ -101,8 +107,9 @@ class TestSlicing(unittest.TestCase):
|
|
101
107
|
|
102
108
|
def test_slice2_2d(self):
|
103
109
|
nparr = np.empty((4, 5))
|
104
|
-
arr = Array.from_desc(
|
105
|
-
|
110
|
+
arr = Array.from_desc(
|
111
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
112
|
+
)
|
106
113
|
xx = -2, 0, 2
|
107
114
|
for s, t, u, v in itertools.product(xx, xx, xx, xx):
|
108
115
|
expect = nparr[s:t, u:v]
|
@@ -122,8 +129,9 @@ class TestSlicing(unittest.TestCase):
|
|
122
129
|
|
123
130
|
def test_strided_1d(self):
|
124
131
|
nparr = np.empty(4)
|
125
|
-
arr = Array.from_desc(
|
126
|
-
|
132
|
+
arr = Array.from_desc(
|
133
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
134
|
+
)
|
127
135
|
xx = -2, -1, 1, 2
|
128
136
|
for x in xx:
|
129
137
|
expect = nparr[::x]
|
@@ -134,8 +142,9 @@ class TestSlicing(unittest.TestCase):
|
|
134
142
|
|
135
143
|
def test_strided_2d(self):
|
136
144
|
nparr = np.empty((4, 5))
|
137
|
-
arr = Array.from_desc(
|
138
|
-
|
145
|
+
arr = Array.from_desc(
|
146
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
147
|
+
)
|
139
148
|
xx = -2, -1, 1, 2
|
140
149
|
for a, b in itertools.product(xx, xx):
|
141
150
|
expect = nparr[::a, ::b]
|
@@ -146,8 +155,9 @@ class TestSlicing(unittest.TestCase):
|
|
146
155
|
|
147
156
|
def test_strided_3d(self):
|
148
157
|
nparr = np.empty((4, 5, 6))
|
149
|
-
arr = Array.from_desc(
|
150
|
-
|
158
|
+
arr = Array.from_desc(
|
159
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
160
|
+
)
|
151
161
|
xx = -2, -1, 1, 2
|
152
162
|
for a, b, c in itertools.product(xx, xx, xx):
|
153
163
|
expect = nparr[::a, ::b, ::c]
|
@@ -160,16 +170,17 @@ class TestSlicing(unittest.TestCase):
|
|
160
170
|
z = np.empty((1, 2, 3))
|
161
171
|
z = np.transpose(z, axes=(2, 0, 1))
|
162
172
|
arr = Array.from_desc(0, z.shape, z.strides, z.itemsize)
|
163
|
-
self.assertEqual(z.flags[
|
164
|
-
self.assertEqual(z.flags[
|
173
|
+
self.assertEqual(z.flags["C_CONTIGUOUS"], arr.flags["C_CONTIGUOUS"])
|
174
|
+
self.assertEqual(z.flags["F_CONTIGUOUS"], arr.flags["F_CONTIGUOUS"])
|
165
175
|
|
166
176
|
|
167
177
|
@skip_on_cudasim("Tests internals of the CUDA driver device array")
|
168
178
|
class TestReshape(unittest.TestCase):
|
169
179
|
def test_reshape_2d2d(self):
|
170
180
|
nparr = np.empty((4, 5))
|
171
|
-
arr = Array.from_desc(
|
172
|
-
|
181
|
+
arr = Array.from_desc(
|
182
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
183
|
+
)
|
173
184
|
expect = nparr.reshape(5, 4)
|
174
185
|
got = arr.reshape(5, 4)[0]
|
175
186
|
self.assertEqual(got.shape, expect.shape)
|
@@ -177,8 +188,9 @@ class TestReshape(unittest.TestCase):
|
|
177
188
|
|
178
189
|
def test_reshape_2d1d(self):
|
179
190
|
nparr = np.empty((4, 5))
|
180
|
-
arr = Array.from_desc(
|
181
|
-
|
191
|
+
arr = Array.from_desc(
|
192
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
193
|
+
)
|
182
194
|
expect = nparr.reshape(5 * 4)
|
183
195
|
got = arr.reshape(5 * 4)[0]
|
184
196
|
self.assertEqual(got.shape, expect.shape)
|
@@ -186,8 +198,9 @@ class TestReshape(unittest.TestCase):
|
|
186
198
|
|
187
199
|
def test_reshape_3d3d(self):
|
188
200
|
nparr = np.empty((3, 4, 5))
|
189
|
-
arr = Array.from_desc(
|
190
|
-
|
201
|
+
arr = Array.from_desc(
|
202
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
203
|
+
)
|
191
204
|
expect = nparr.reshape(5, 3, 4)
|
192
205
|
got = arr.reshape(5, 3, 4)[0]
|
193
206
|
self.assertEqual(got.shape, expect.shape)
|
@@ -195,8 +208,9 @@ class TestReshape(unittest.TestCase):
|
|
195
208
|
|
196
209
|
def test_reshape_3d2d(self):
|
197
210
|
nparr = np.empty((3, 4, 5))
|
198
|
-
arr = Array.from_desc(
|
199
|
-
|
211
|
+
arr = Array.from_desc(
|
212
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
213
|
+
)
|
200
214
|
expect = nparr.reshape(3 * 4, 5)
|
201
215
|
got = arr.reshape(3 * 4, 5)[0]
|
202
216
|
self.assertEqual(got.shape, expect.shape)
|
@@ -204,8 +218,9 @@ class TestReshape(unittest.TestCase):
|
|
204
218
|
|
205
219
|
def test_reshape_3d1d(self):
|
206
220
|
nparr = np.empty((3, 4, 5))
|
207
|
-
arr = Array.from_desc(
|
208
|
-
|
221
|
+
arr = Array.from_desc(
|
222
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
223
|
+
)
|
209
224
|
expect = nparr.reshape(3 * 4 * 5)
|
210
225
|
got = arr.reshape(3 * 4 * 5)[0]
|
211
226
|
self.assertEqual(got.shape, expect.shape)
|
@@ -213,8 +228,9 @@ class TestReshape(unittest.TestCase):
|
|
213
228
|
|
214
229
|
def test_reshape_infer2d2d(self):
|
215
230
|
nparr = np.empty((4, 5))
|
216
|
-
arr = Array.from_desc(
|
217
|
-
|
231
|
+
arr = Array.from_desc(
|
232
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
233
|
+
)
|
218
234
|
expect = nparr.reshape(-1, 4)
|
219
235
|
got = arr.reshape(-1, 4)[0]
|
220
236
|
self.assertEqual(got.shape, expect.shape)
|
@@ -222,8 +238,9 @@ class TestReshape(unittest.TestCase):
|
|
222
238
|
|
223
239
|
def test_reshape_infer2d1d(self):
|
224
240
|
nparr = np.empty((4, 5))
|
225
|
-
arr = Array.from_desc(
|
226
|
-
|
241
|
+
arr = Array.from_desc(
|
242
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
243
|
+
)
|
227
244
|
expect = nparr.reshape(-1)
|
228
245
|
got = arr.reshape(-1)[0]
|
229
246
|
self.assertEqual(got.shape, expect.shape)
|
@@ -231,8 +248,9 @@ class TestReshape(unittest.TestCase):
|
|
231
248
|
|
232
249
|
def test_reshape_infer3d3d(self):
|
233
250
|
nparr = np.empty((3, 4, 5))
|
234
|
-
arr = Array.from_desc(
|
235
|
-
|
251
|
+
arr = Array.from_desc(
|
252
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
253
|
+
)
|
236
254
|
expect = nparr.reshape(5, -1, 4)
|
237
255
|
got = arr.reshape(5, -1, 4)[0]
|
238
256
|
self.assertEqual(got.shape, expect.shape)
|
@@ -240,8 +258,9 @@ class TestReshape(unittest.TestCase):
|
|
240
258
|
|
241
259
|
def test_reshape_infer3d2d(self):
|
242
260
|
nparr = np.empty((3, 4, 5))
|
243
|
-
arr = Array.from_desc(
|
244
|
-
|
261
|
+
arr = Array.from_desc(
|
262
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
263
|
+
)
|
245
264
|
expect = nparr.reshape(3, -1)
|
246
265
|
got = arr.reshape(3, -1)[0]
|
247
266
|
self.assertEqual(got.shape, expect.shape)
|
@@ -249,8 +268,9 @@ class TestReshape(unittest.TestCase):
|
|
249
268
|
|
250
269
|
def test_reshape_infer3d1d(self):
|
251
270
|
nparr = np.empty((3, 4, 5))
|
252
|
-
arr = Array.from_desc(
|
253
|
-
|
271
|
+
arr = Array.from_desc(
|
272
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
273
|
+
)
|
254
274
|
expect = nparr.reshape(-1)
|
255
275
|
got = arr.reshape(-1)[0]
|
256
276
|
self.assertEqual(got.shape, expect.shape)
|
@@ -258,23 +278,26 @@ class TestReshape(unittest.TestCase):
|
|
258
278
|
|
259
279
|
def test_reshape_infer_two_unknowns(self):
|
260
280
|
nparr = np.empty((3, 4, 5))
|
261
|
-
arr = Array.from_desc(
|
262
|
-
|
281
|
+
arr = Array.from_desc(
|
282
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
283
|
+
)
|
263
284
|
|
264
285
|
with self.assertRaises(ValueError) as raises:
|
265
286
|
arr.reshape(-1, -1, 3)
|
266
|
-
self.assertIn(
|
267
|
-
|
287
|
+
self.assertIn(
|
288
|
+
"can only specify one unknown dimension", str(raises.exception)
|
289
|
+
)
|
268
290
|
|
269
291
|
def test_reshape_infer_invalid_shape(self):
|
270
292
|
nparr = np.empty((3, 4, 5))
|
271
|
-
arr = Array.from_desc(
|
272
|
-
|
293
|
+
arr = Array.from_desc(
|
294
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
295
|
+
)
|
273
296
|
|
274
297
|
with self.assertRaises(ValueError) as raises:
|
275
298
|
arr.reshape(-1, 7)
|
276
299
|
|
277
|
-
expected_message =
|
300
|
+
expected_message = "cannot infer valid shape for unknown dimension"
|
278
301
|
self.assertIn(expected_message, str(raises.exception))
|
279
302
|
|
280
303
|
|
@@ -289,6 +312,7 @@ class TestSqueeze(unittest.TestCase):
|
|
289
312
|
def _assert_equal_shape_strides(arr1, arr2):
|
290
313
|
self.assertEqual(arr1.shape, arr2.shape)
|
291
314
|
self.assertEqual(arr1.strides, arr2.strides)
|
315
|
+
|
292
316
|
_assert_equal_shape_strides(arr, nparr)
|
293
317
|
_assert_equal_shape_strides(arr.squeeze()[0], nparr.squeeze())
|
294
318
|
for axis in (0, 2, 4, (0, 2), (0, 4), (2, 4), (0, 2, 4)):
|
@@ -311,29 +335,33 @@ class TestSqueeze(unittest.TestCase):
|
|
311
335
|
class TestExtent(unittest.TestCase):
|
312
336
|
def test_extent_1d(self):
|
313
337
|
nparr = np.empty(4)
|
314
|
-
arr = Array.from_desc(
|
315
|
-
|
338
|
+
arr = Array.from_desc(
|
339
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
340
|
+
)
|
316
341
|
s, e = arr.extent
|
317
342
|
self.assertEqual(e - s, nparr.size * nparr.dtype.itemsize)
|
318
343
|
|
319
344
|
def test_extent_2d(self):
|
320
345
|
nparr = np.empty((4, 5))
|
321
|
-
arr = Array.from_desc(
|
322
|
-
|
346
|
+
arr = Array.from_desc(
|
347
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
348
|
+
)
|
323
349
|
s, e = arr.extent
|
324
350
|
self.assertEqual(e - s, nparr.size * nparr.dtype.itemsize)
|
325
351
|
|
326
352
|
def test_extent_iter_1d(self):
|
327
353
|
nparr = np.empty(4)
|
328
|
-
arr = Array.from_desc(
|
329
|
-
|
354
|
+
arr = Array.from_desc(
|
355
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
356
|
+
)
|
330
357
|
[ext] = list(arr.iter_contiguous_extent())
|
331
358
|
self.assertEqual(ext, arr.extent)
|
332
359
|
|
333
360
|
def test_extent_iter_2d(self):
|
334
361
|
nparr = np.empty((4, 5))
|
335
|
-
arr = Array.from_desc(
|
336
|
-
|
362
|
+
arr = Array.from_desc(
|
363
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
364
|
+
)
|
337
365
|
[ext] = list(arr.iter_contiguous_extent())
|
338
366
|
self.assertEqual(ext, arr.extent)
|
339
367
|
|
@@ -346,8 +374,9 @@ class TestIterate(unittest.TestCase):
|
|
346
374
|
# for #4201
|
347
375
|
N = 5
|
348
376
|
nparr = np.empty(N)
|
349
|
-
arr = Array.from_desc(
|
350
|
-
|
377
|
+
arr = Array.from_desc(
|
378
|
+
0, nparr.shape, nparr.strides, nparr.dtype.itemsize
|
379
|
+
)
|
351
380
|
|
352
381
|
x = 0 # just a placeholder
|
353
382
|
# this loop should not raise AssertionError
|
@@ -355,5 +384,5 @@ class TestIterate(unittest.TestCase):
|
|
355
384
|
x = val # noqa: F841
|
356
385
|
|
357
386
|
|
358
|
-
if __name__ ==
|
387
|
+
if __name__ == "__main__":
|
359
388
|
unittest.main()
|
@@ -9,19 +9,28 @@ from numba.cuda.cudadrv import nvvm
|
|
9
9
|
class TestFunctionResolution(unittest.TestCase):
|
10
10
|
def test_fp16_binary_operators(self):
|
11
11
|
from numba.cuda.descriptor import cuda_target
|
12
|
-
|
13
|
-
|
12
|
+
|
13
|
+
ops = (
|
14
|
+
operator.add,
|
15
|
+
operator.iadd,
|
16
|
+
operator.sub,
|
17
|
+
operator.isub,
|
18
|
+
operator.mul,
|
19
|
+
operator.imul,
|
20
|
+
)
|
14
21
|
for op in ops:
|
15
22
|
fp16 = types.float16
|
16
23
|
typingctx = cuda_target.typing_context
|
17
24
|
typingctx.refresh()
|
18
25
|
fnty = typingctx.resolve_value_type(op)
|
19
26
|
out = typingctx.resolve_function_type(fnty, (fp16, fp16), {})
|
20
|
-
self.assertEqual(
|
21
|
-
|
27
|
+
self.assertEqual(
|
28
|
+
out, typing.signature(fp16, fp16, fp16), msg=str(out)
|
29
|
+
)
|
22
30
|
|
23
31
|
def test_fp16_unary_operators(self):
|
24
32
|
from numba.cuda.descriptor import cuda_target
|
33
|
+
|
25
34
|
ops = (operator.neg, abs)
|
26
35
|
for op in ops:
|
27
36
|
fp16 = types.float16
|
@@ -32,5 +41,5 @@ class TestFunctionResolution(unittest.TestCase):
|
|
32
41
|
self.assertEqual(out, typing.signature(fp16, fp16), msg=str(out))
|
33
42
|
|
34
43
|
|
35
|
-
if __name__ ==
|
44
|
+
if __name__ == "__main__":
|
36
45
|
unittest.main()
|
@@ -11,30 +11,30 @@ class TestImport(unittest.TestCase):
|
|
11
11
|
"""
|
12
12
|
|
13
13
|
banlist = (
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
14
|
+
"numba.cpython.slicing",
|
15
|
+
"numba.cpython.tupleobj",
|
16
|
+
"numba.cpython.enumimpl",
|
17
|
+
"numba.cpython.hashing",
|
18
|
+
"numba.cpython.heapq",
|
19
|
+
"numba.cpython.iterators",
|
20
|
+
"numba.cpython.numbers",
|
21
|
+
"numba.cpython.rangeobj",
|
22
|
+
"numba.cpython.cmathimpl",
|
23
|
+
"numba.cpython.mathimpl",
|
24
|
+
"numba.cpython.printimpl",
|
25
|
+
"numba.cpython.randomimpl",
|
26
|
+
"numba.core.optional",
|
27
|
+
"numba.misc.gdb_hook",
|
28
|
+
"numba.misc.literal",
|
29
|
+
"numba.misc.cffiimpl",
|
30
|
+
"numba.np.linalg",
|
31
|
+
"numba.np.polynomial",
|
32
|
+
"numba.np.arraymath",
|
33
|
+
"numba.np.npdatetime",
|
34
|
+
"numba.np.npyimpl",
|
35
|
+
"numba.typed.typeddict",
|
36
|
+
"numba.typed.typedlist",
|
37
|
+
"numba.experimental.jitclass.base",
|
38
38
|
)
|
39
39
|
|
40
40
|
code = "import sys; from numba import cuda; print(list(sys.modules))"
|
@@ -45,5 +45,5 @@ class TestImport(unittest.TestCase):
|
|
45
45
|
self.assertFalse(unexpected, "some modules unexpectedly imported")
|
46
46
|
|
47
47
|
|
48
|
-
if __name__ ==
|
48
|
+
if __name__ == "__main__":
|
49
49
|
unittest.main()
|