numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -21,12 +21,12 @@ from numba.cuda.cuda_paths import (
|
|
21
21
|
|
22
22
|
|
23
23
|
has_cuda = nvvm.is_available()
|
24
|
-
has_mp_get_context = hasattr(mp,
|
24
|
+
has_mp_get_context = hasattr(mp, "get_context")
|
25
25
|
|
26
26
|
|
27
27
|
class LibraryLookupBase(SerialMixin, unittest.TestCase):
|
28
28
|
def setUp(self):
|
29
|
-
ctx = mp.get_context(
|
29
|
+
ctx = mp.get_context("spawn")
|
30
30
|
|
31
31
|
qrecv = ctx.Queue()
|
32
32
|
qsend = ctx.Queue()
|
@@ -84,108 +84,108 @@ def check_lib_lookup(qout, qin):
|
|
84
84
|
status = False
|
85
85
|
|
86
86
|
|
87
|
-
@skip_on_cudasim(
|
88
|
-
@unittest.skipUnless(has_mp_get_context,
|
89
|
-
@skip_unless_conda_cudatoolkit(
|
87
|
+
@skip_on_cudasim("Library detection unsupported in the simulator")
|
88
|
+
@unittest.skipUnless(has_mp_get_context, "mp.get_context not available")
|
89
|
+
@skip_unless_conda_cudatoolkit("test assumes conda installed cudatoolkit")
|
90
90
|
class TestLibDeviceLookUp(LibraryLookupBase):
|
91
91
|
def test_libdevice_path_decision(self):
|
92
92
|
# Check that the default is using conda environment
|
93
93
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
94
94
|
if has_cuda:
|
95
|
-
self.assertEqual(by,
|
95
|
+
self.assertEqual(by, "Conda environment")
|
96
96
|
else:
|
97
97
|
self.assertEqual(by, "<unknown>")
|
98
98
|
self.assertIsNone(info)
|
99
99
|
self.assertFalse(warns)
|
100
100
|
# Check that CUDA_HOME works by removing conda-env
|
101
101
|
by, info, warns = self.remote_do(self.do_set_cuda_home)
|
102
|
-
self.assertEqual(by,
|
103
|
-
self.assertEqual(info, os.path.join(
|
102
|
+
self.assertEqual(by, "CUDA_HOME")
|
103
|
+
self.assertEqual(info, os.path.join("mycudahome", "nvvm", "libdevice"))
|
104
104
|
self.assertFalse(warns)
|
105
105
|
|
106
106
|
if get_system_ctk() is None:
|
107
107
|
# Fake remove conda environment so no cudatoolkit is available
|
108
108
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
109
|
-
self.assertEqual(by,
|
109
|
+
self.assertEqual(by, "<unknown>")
|
110
110
|
self.assertIsNone(info)
|
111
111
|
self.assertFalse(warns)
|
112
112
|
else:
|
113
113
|
# Use system available cudatoolkit
|
114
114
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
115
|
-
self.assertEqual(by,
|
115
|
+
self.assertEqual(by, "System")
|
116
116
|
self.assertFalse(warns)
|
117
117
|
|
118
118
|
@staticmethod
|
119
119
|
def do_clear_envs():
|
120
|
-
remove_env(
|
121
|
-
remove_env(
|
120
|
+
remove_env("CUDA_HOME")
|
121
|
+
remove_env("CUDA_PATH")
|
122
122
|
return True, _get_libdevice_path_decision()
|
123
123
|
|
124
124
|
@staticmethod
|
125
125
|
def do_set_cuda_home():
|
126
|
-
os.environ[
|
126
|
+
os.environ["CUDA_HOME"] = os.path.join("mycudahome")
|
127
127
|
_fake_non_conda_env()
|
128
128
|
return True, _get_libdevice_path_decision()
|
129
129
|
|
130
130
|
|
131
|
-
@skip_on_cudasim(
|
132
|
-
@unittest.skipUnless(has_mp_get_context,
|
133
|
-
@skip_unless_conda_cudatoolkit(
|
131
|
+
@skip_on_cudasim("Library detection unsupported in the simulator")
|
132
|
+
@unittest.skipUnless(has_mp_get_context, "mp.get_context not available")
|
133
|
+
@skip_unless_conda_cudatoolkit("test assumes conda installed cudatoolkit")
|
134
134
|
class TestNvvmLookUp(LibraryLookupBase):
|
135
135
|
def test_nvvm_path_decision(self):
|
136
136
|
# Check that the default is using conda environment
|
137
137
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
138
138
|
if has_cuda:
|
139
|
-
self.assertEqual(by,
|
139
|
+
self.assertEqual(by, "Conda environment")
|
140
140
|
else:
|
141
141
|
self.assertEqual(by, "<unknown>")
|
142
142
|
self.assertIsNone(info)
|
143
143
|
self.assertFalse(warns)
|
144
144
|
# Check that CUDA_HOME works by removing conda-env
|
145
145
|
by, info, warns = self.remote_do(self.do_set_cuda_home)
|
146
|
-
self.assertEqual(by,
|
146
|
+
self.assertEqual(by, "CUDA_HOME")
|
147
147
|
self.assertFalse(warns)
|
148
148
|
if IS_WIN32:
|
149
|
-
self.assertEqual(info, os.path.join(
|
149
|
+
self.assertEqual(info, os.path.join("mycudahome", "nvvm", "bin"))
|
150
150
|
elif IS_OSX:
|
151
|
-
self.assertEqual(info, os.path.join(
|
151
|
+
self.assertEqual(info, os.path.join("mycudahome", "nvvm", "lib"))
|
152
152
|
else:
|
153
|
-
self.assertEqual(info, os.path.join(
|
153
|
+
self.assertEqual(info, os.path.join("mycudahome", "nvvm", "lib64"))
|
154
154
|
|
155
155
|
if get_system_ctk() is None:
|
156
156
|
# Fake remove conda environment so no cudatoolkit is available
|
157
157
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
158
|
-
self.assertEqual(by,
|
158
|
+
self.assertEqual(by, "<unknown>")
|
159
159
|
self.assertIsNone(info)
|
160
160
|
self.assertFalse(warns)
|
161
161
|
else:
|
162
162
|
# Use system available cudatoolkit
|
163
163
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
164
|
-
self.assertEqual(by,
|
164
|
+
self.assertEqual(by, "System")
|
165
165
|
self.assertFalse(warns)
|
166
166
|
|
167
167
|
@staticmethod
|
168
168
|
def do_clear_envs():
|
169
|
-
remove_env(
|
170
|
-
remove_env(
|
169
|
+
remove_env("CUDA_HOME")
|
170
|
+
remove_env("CUDA_PATH")
|
171
171
|
return True, _get_nvvm_path_decision()
|
172
172
|
|
173
173
|
@staticmethod
|
174
174
|
def do_set_cuda_home():
|
175
|
-
os.environ[
|
175
|
+
os.environ["CUDA_HOME"] = os.path.join("mycudahome")
|
176
176
|
_fake_non_conda_env()
|
177
177
|
return True, _get_nvvm_path_decision()
|
178
178
|
|
179
179
|
|
180
|
-
@skip_on_cudasim(
|
181
|
-
@unittest.skipUnless(has_mp_get_context,
|
182
|
-
@skip_unless_conda_cudatoolkit(
|
180
|
+
@skip_on_cudasim("Library detection unsupported in the simulator")
|
181
|
+
@unittest.skipUnless(has_mp_get_context, "mp.get_context not available")
|
182
|
+
@skip_unless_conda_cudatoolkit("test assumes conda installed cudatoolkit")
|
183
183
|
class TestCudaLibLookUp(LibraryLookupBase):
|
184
184
|
def test_cudalib_path_decision(self):
|
185
185
|
# Check that the default is using conda environment
|
186
186
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
187
187
|
if has_cuda:
|
188
|
-
self.assertEqual(by,
|
188
|
+
self.assertEqual(by, "Conda environment")
|
189
189
|
else:
|
190
190
|
self.assertEqual(by, "<unknown>")
|
191
191
|
self.assertIsNone(info)
|
@@ -194,14 +194,14 @@ class TestCudaLibLookUp(LibraryLookupBase):
|
|
194
194
|
# Check that CUDA_HOME works by removing conda-env
|
195
195
|
self.remote_do(self.do_clear_envs)
|
196
196
|
by, info, warns = self.remote_do(self.do_set_cuda_home)
|
197
|
-
self.assertEqual(by,
|
197
|
+
self.assertEqual(by, "CUDA_HOME")
|
198
198
|
self.assertFalse(warns)
|
199
199
|
if IS_WIN32:
|
200
|
-
self.assertEqual(info, os.path.join(
|
200
|
+
self.assertEqual(info, os.path.join("mycudahome", "bin"))
|
201
201
|
elif IS_OSX:
|
202
|
-
self.assertEqual(info, os.path.join(
|
202
|
+
self.assertEqual(info, os.path.join("mycudahome", "lib"))
|
203
203
|
else:
|
204
|
-
self.assertEqual(info, os.path.join(
|
204
|
+
self.assertEqual(info, os.path.join("mycudahome", "lib64"))
|
205
205
|
if get_system_ctk() is None:
|
206
206
|
# Fake remove conda environment so no cudatoolkit is available
|
207
207
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
@@ -211,18 +211,18 @@ class TestCudaLibLookUp(LibraryLookupBase):
|
|
211
211
|
else:
|
212
212
|
# Use system available cudatoolkit
|
213
213
|
by, info, warns = self.remote_do(self.do_clear_envs)
|
214
|
-
self.assertEqual(by,
|
214
|
+
self.assertEqual(by, "System")
|
215
215
|
self.assertFalse(warns)
|
216
216
|
|
217
217
|
@staticmethod
|
218
218
|
def do_clear_envs():
|
219
|
-
remove_env(
|
220
|
-
remove_env(
|
219
|
+
remove_env("CUDA_HOME")
|
220
|
+
remove_env("CUDA_PATH")
|
221
221
|
return True, _get_cudalib_dir_path_decision()
|
222
222
|
|
223
223
|
@staticmethod
|
224
224
|
def do_set_cuda_home():
|
225
|
-
os.environ[
|
225
|
+
os.environ["CUDA_HOME"] = os.path.join("mycudahome")
|
226
226
|
_fake_non_conda_env()
|
227
227
|
return True, _get_cudalib_dir_path_decision()
|
228
228
|
|
@@ -231,8 +231,8 @@ def _fake_non_conda_env():
|
|
231
231
|
"""
|
232
232
|
Monkeypatch sys.prefix to hide the fact we are in a conda-env
|
233
233
|
"""
|
234
|
-
sys.prefix =
|
234
|
+
sys.prefix = ""
|
235
235
|
|
236
236
|
|
237
|
-
if __name__ ==
|
237
|
+
if __name__ == "__main__":
|
238
238
|
unittest.main()
|
@@ -8,14 +8,17 @@ from llvmlite import binding as llvm
|
|
8
8
|
import unittest
|
9
9
|
|
10
10
|
|
11
|
-
original =
|
12
|
-
|
11
|
+
original = (
|
12
|
+
"call void @llvm.memset.p0i8.i64("
|
13
|
+
"i8* align 4 %arg.x.41, i8 0, i64 %0, i1 false)"
|
14
|
+
)
|
13
15
|
|
14
|
-
missing_align =
|
15
|
-
|
16
|
+
missing_align = (
|
17
|
+
"call void @llvm.memset.p0i8.i64(i8* %arg.x.41, i8 0, i64 %0, i1 false)"
|
18
|
+
)
|
16
19
|
|
17
20
|
|
18
|
-
@skip_on_cudasim(
|
21
|
+
@skip_on_cudasim("libNVVM not supported in simulator")
|
19
22
|
@unittest.skipIf(utils.MACHINE_BITS == 32, "CUDA not support for 32-bit")
|
20
23
|
@unittest.skipIf(not nvvm.is_available(), "No libNVVM")
|
21
24
|
class TestNvvmWithoutCuda(unittest.TestCase):
|
@@ -30,10 +33,9 @@ class TestNvvmWithoutCuda(unittest.TestCase):
|
|
30
33
|
# NVVM that it cannot parse correctly
|
31
34
|
|
32
35
|
# Create a module with a constant containing all 8-bit characters
|
33
|
-
c = ir.Constant(ir.ArrayType(ir.IntType(8), 256),
|
34
|
-
bytearray(range(256)))
|
36
|
+
c = ir.Constant(ir.ArrayType(ir.IntType(8), 256), bytearray(range(256)))
|
35
37
|
m = ir.Module()
|
36
|
-
m.triple =
|
38
|
+
m.triple = "nvptx64-nvidia-cuda"
|
37
39
|
nvvm.add_ir_version(m)
|
38
40
|
gv = ir.GlobalVariable(m, c.type, "myconstant")
|
39
41
|
gv.global_constant = True
|
@@ -46,9 +48,9 @@ class TestNvvmWithoutCuda(unittest.TestCase):
|
|
46
48
|
|
47
49
|
# Ensure all characters appear in the generated constant array.
|
48
50
|
elements = ", ".join([str(i) for i in range(256)])
|
49
|
-
myconstant = f"myconstant[256] = {{{elements}}}".encode(
|
51
|
+
myconstant = f"myconstant[256] = {{{elements}}}".encode("utf-8")
|
50
52
|
self.assertIn(myconstant, ptx)
|
51
53
|
|
52
54
|
|
53
|
-
if __name__ ==
|
55
|
+
if __name__ == "__main__":
|
54
56
|
unittest.main()
|
@@ -26,7 +26,7 @@ class TestNrtBasic(CUDATestCase):
|
|
26
26
|
x = np.empty(10, np.int64)
|
27
27
|
f(x)
|
28
28
|
|
29
|
-
g[1,1]()
|
29
|
+
g[1, 1]()
|
30
30
|
cuda.synchronize()
|
31
31
|
|
32
32
|
def test_nrt_ptx_contains_refcount(self):
|
@@ -39,7 +39,7 @@ class TestNrtBasic(CUDATestCase):
|
|
39
39
|
x = np.empty(10, np.int64)
|
40
40
|
f(x)
|
41
41
|
|
42
|
-
g[1,1]()
|
42
|
+
g[1, 1]()
|
43
43
|
|
44
44
|
ptx = next(iter(g.inspect_asm().values()))
|
45
45
|
|
@@ -72,13 +72,12 @@ class TestNrtBasic(CUDATestCase):
|
|
72
72
|
|
73
73
|
out_ary = np.zeros(1, dtype=np.int64)
|
74
74
|
|
75
|
-
g[1,1](out_ary)
|
75
|
+
g[1, 1](out_ary)
|
76
76
|
|
77
77
|
self.assertEqual(out_ary[0], 1)
|
78
78
|
|
79
79
|
|
80
80
|
class TestNrtStatistics(CUDATestCase):
|
81
|
-
|
82
81
|
def setUp(self):
|
83
82
|
self._stream = cuda.default_stream()
|
84
83
|
# Store the current stats state
|
@@ -126,12 +125,11 @@ class TestNrtStatistics(CUDATestCase):
|
|
126
125
|
|
127
126
|
# Check env var explicitly being set works
|
128
127
|
env = os.environ.copy()
|
129
|
-
env[
|
130
|
-
env[
|
128
|
+
env["NUMBA_CUDA_NRT_STATS"] = "1"
|
129
|
+
env["NUMBA_CUDA_ENABLE_NRT"] = "1"
|
131
130
|
run_in_subprocess(src, env=env)
|
132
131
|
|
133
132
|
def check_env_var_off(self, env):
|
134
|
-
|
135
133
|
src = """if 1:
|
136
134
|
from numba import cuda
|
137
135
|
import numpy as np
|
@@ -152,27 +150,26 @@ class TestNrtStatistics(CUDATestCase):
|
|
152
150
|
def test_stats_env_var_explicit_off(self):
|
153
151
|
# Checks that explicitly turning the stats off via the env var works.
|
154
152
|
env = os.environ.copy()
|
155
|
-
env[
|
153
|
+
env["NUMBA_CUDA_NRT_STATS"] = "0"
|
156
154
|
self.check_env_var_off(env)
|
157
155
|
|
158
156
|
def test_stats_env_var_default_off(self):
|
159
157
|
# Checks that the env var not being set is the same as "off", i.e.
|
160
158
|
# default for Numba is off.
|
161
159
|
env = os.environ.copy()
|
162
|
-
env.pop(
|
160
|
+
env.pop("NUMBA_CUDA_NRT_STATS", None)
|
163
161
|
self.check_env_var_off(env)
|
164
162
|
|
165
163
|
def test_stats_status_toggle(self):
|
166
|
-
|
167
164
|
@cuda.jit
|
168
165
|
def foo():
|
169
166
|
tmp = np.ones(3)
|
170
|
-
arr = np.arange(5 * tmp[0])
|
167
|
+
arr = np.arange(5 * tmp[0]) # noqa: F841
|
171
168
|
return None
|
172
169
|
|
173
170
|
with (
|
174
|
-
override_config(
|
175
|
-
override_config(
|
171
|
+
override_config("CUDA_ENABLE_NRT", True),
|
172
|
+
override_config("CUDA_NRT_STATS", True),
|
176
173
|
):
|
177
174
|
# Switch on stats
|
178
175
|
rtsys.memsys_enable_stats()
|
@@ -218,9 +215,9 @@ class TestNrtStatistics(CUDATestCase):
|
|
218
215
|
def test_nrt_explicit_stats_query_raises_exception_when_disabled(self):
|
219
216
|
# Checks the various memsys_get_stats functions raise if queried when
|
220
217
|
# the stats counters are disabled.
|
221
|
-
method_variations = (
|
218
|
+
method_variations = ("alloc", "free", "mi_alloc", "mi_free")
|
222
219
|
for meth in method_variations:
|
223
|
-
stats_func = getattr(rtsys, f
|
220
|
+
stats_func = getattr(rtsys, f"memsys_get_stats_{meth}")
|
224
221
|
with self.subTest(stats_func=stats_func):
|
225
222
|
# Turn stats off
|
226
223
|
rtsys.memsys_disable_stats()
|
@@ -233,14 +230,13 @@ class TestNrtStatistics(CUDATestCase):
|
|
233
230
|
@cuda.jit
|
234
231
|
def foo():
|
235
232
|
tmp = np.ones(3)
|
236
|
-
arr = np.arange(5 * tmp[0])
|
233
|
+
arr = np.arange(5 * tmp[0]) # noqa: F841
|
237
234
|
return None
|
238
235
|
|
239
236
|
with (
|
240
|
-
override_config(
|
241
|
-
override_config(
|
237
|
+
override_config("CUDA_ENABLE_NRT", True),
|
238
|
+
override_config("CUDA_NRT_STATS", True),
|
242
239
|
):
|
243
|
-
|
244
240
|
# Switch on stats
|
245
241
|
rtsys.memsys_enable_stats()
|
246
242
|
|
@@ -262,5 +258,5 @@ class TestNrtStatistics(CUDATestCase):
|
|
262
258
|
self.assertEqual(stats.mi_free, stats_mi_free)
|
263
259
|
|
264
260
|
|
265
|
-
if __name__ ==
|
261
|
+
if __name__ == "__main__":
|
266
262
|
unittest.main()
|
@@ -9,7 +9,6 @@ from numba import cuda
|
|
9
9
|
|
10
10
|
|
11
11
|
class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
12
|
-
|
13
12
|
def setUp(self):
|
14
13
|
super(TestNrtRefCt, self).setUp()
|
15
14
|
|
@@ -19,7 +18,7 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
19
18
|
def run(self, result=None):
|
20
19
|
with (
|
21
20
|
override_config("CUDA_ENABLE_NRT", True),
|
22
|
-
override_config(
|
21
|
+
override_config("CUDA_NRT_STATS", True),
|
23
22
|
):
|
24
23
|
super(TestNrtRefCt, self).run(result)
|
25
24
|
|
@@ -33,7 +32,7 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
33
32
|
@cuda.jit
|
34
33
|
def kernel():
|
35
34
|
for i in range(n):
|
36
|
-
temp = np.empty(2)
|
35
|
+
temp = np.empty(2) # noqa: F841
|
37
36
|
return None
|
38
37
|
|
39
38
|
init_stats = rtsys.get_allocation_stats()
|
@@ -49,14 +48,13 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
49
48
|
|
50
49
|
@cuda.jit
|
51
50
|
def g(n):
|
52
|
-
|
53
51
|
x = np.empty((n, 2))
|
54
52
|
|
55
53
|
for i in range(n):
|
56
54
|
y = x[i]
|
57
55
|
|
58
56
|
for i in range(n):
|
59
|
-
y = x[i]
|
57
|
+
y = x[i] # noqa: F841
|
60
58
|
|
61
59
|
return None
|
62
60
|
|
@@ -70,6 +68,7 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
70
68
|
"""
|
71
69
|
Test issue #1573
|
72
70
|
"""
|
71
|
+
|
73
72
|
@cuda.jit
|
74
73
|
def if_with_allocation_and_initialization(arr1, test1):
|
75
74
|
tmp_arr = np.empty_like(arr1)
|
@@ -85,13 +84,15 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
85
84
|
init_stats = rtsys.get_allocation_stats()
|
86
85
|
if_with_allocation_and_initialization[1, 1](arr, False)
|
87
86
|
cur_stats = rtsys.get_allocation_stats()
|
88
|
-
self.assertEqual(
|
89
|
-
|
87
|
+
self.assertEqual(
|
88
|
+
cur_stats.alloc - init_stats.alloc, cur_stats.free - init_stats.free
|
89
|
+
)
|
90
90
|
|
91
91
|
def test_del_at_beginning_of_loop(self):
|
92
92
|
"""
|
93
93
|
Test issue #1734
|
94
94
|
"""
|
95
|
+
|
95
96
|
@cuda.jit
|
96
97
|
def f(arr):
|
97
98
|
res = 0
|
@@ -108,9 +109,10 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
108
109
|
init_stats = rtsys.get_allocation_stats()
|
109
110
|
f[1, 1](arr)
|
110
111
|
cur_stats = rtsys.get_allocation_stats()
|
111
|
-
self.assertEqual(
|
112
|
-
|
112
|
+
self.assertEqual(
|
113
|
+
cur_stats.alloc - init_stats.alloc, cur_stats.free - init_stats.free
|
114
|
+
)
|
113
115
|
|
114
116
|
|
115
|
-
if __name__ ==
|
117
|
+
if __name__ == "__main__":
|
116
118
|
unittest.main()
|
@@ -58,7 +58,7 @@ def determine_include_flags():
|
|
58
58
|
return None
|
59
59
|
|
60
60
|
# NVCC writes to stdout on Windows and stderr on Linux
|
61
|
-
if platform.system() ==
|
61
|
+
if platform.system() == "Windows":
|
62
62
|
stream = cp.stdout
|
63
63
|
else:
|
64
64
|
stream = cp.stderr
|
@@ -157,7 +157,7 @@ if __name__ == "__main__":
|
|
157
157
|
parser.add_argument(
|
158
158
|
"-a",
|
159
159
|
"--arch",
|
160
|
-
help="compute arch to target (e.g. sm_87).
|
160
|
+
help="compute arch to target (e.g. sm_87). Defaults to sm_50.",
|
161
161
|
default="sm_50",
|
162
162
|
)
|
163
163
|
|
numba_cuda/numba/cuda/types.py
CHANGED
@@ -5,16 +5,18 @@ class Dim3(types.Type):
|
|
5
5
|
"""
|
6
6
|
A 3-tuple (x, y, z) representing the position of a block or thread.
|
7
7
|
"""
|
8
|
+
|
8
9
|
def __init__(self):
|
9
|
-
super().__init__(name=
|
10
|
+
super().__init__(name="Dim3")
|
10
11
|
|
11
12
|
|
12
13
|
class GridGroup(types.Type):
|
13
14
|
"""
|
14
15
|
The grid of all threads in a cooperative kernel launch.
|
15
16
|
"""
|
17
|
+
|
16
18
|
def __init__(self):
|
17
|
-
super().__init__(name=
|
19
|
+
super().__init__(name="GridGroup")
|
18
20
|
|
19
21
|
|
20
22
|
dim3 = Dim3()
|
@@ -23,6 +25,7 @@ grid_group = GridGroup()
|
|
23
25
|
|
24
26
|
class CUDADispatcher(types.Dispatcher):
|
25
27
|
"""The type of CUDA dispatchers"""
|
28
|
+
|
26
29
|
# This type exists (instead of using types.Dispatcher as the type of CUDA
|
27
30
|
# dispatchers) so that we can have an alternative lowering for them to the
|
28
31
|
# lowering of CPU dispatchers - the CPU target lowers all dispatchers as a
|