PyPI - numba-cuda - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl - Mend

numba-cuda 0.0.1py3-none-any.whl → 0.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (233) hide show

_numba_cuda_redirector.pth +1 -0
_numba_cuda_redirector.py +74 -0
numba_cuda/VERSION +1 -0
numba_cuda/__init__.py +5 -0
numba_cuda/_version.py +19 -0
numba_cuda/numba/cuda/__init__.py +22 -0
numba_cuda/numba/cuda/api.py +526 -0
numba_cuda/numba/cuda/api_util.py +30 -0
numba_cuda/numba/cuda/args.py +77 -0
numba_cuda/numba/cuda/cg.py +62 -0
numba_cuda/numba/cuda/codegen.py +378 -0
numba_cuda/numba/cuda/compiler.py +422 -0
numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
numba_cuda/numba/cuda/cuda_paths.py +258 -0
numba_cuda/numba/cuda/cudadecl.py +806 -0
numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
numba_cuda/numba/cuda/cudadrv/error.py +36 -0
numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
numba_cuda/numba/cuda/cudaimpl.py +1055 -0
numba_cuda/numba/cuda/cudamath.py +140 -0
numba_cuda/numba/cuda/decorators.py +189 -0
numba_cuda/numba/cuda/descriptor.py +33 -0
numba_cuda/numba/cuda/device_init.py +89 -0
numba_cuda/numba/cuda/deviceufunc.py +908 -0
numba_cuda/numba/cuda/dispatcher.py +1057 -0
numba_cuda/numba/cuda/errors.py +59 -0
numba_cuda/numba/cuda/extending.py +7 -0
numba_cuda/numba/cuda/initialize.py +13 -0
numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
numba_cuda/numba/cuda/intrinsics.py +198 -0
numba_cuda/numba/cuda/kernels/__init__.py +0 -0
numba_cuda/numba/cuda/kernels/reduction.py +262 -0
numba_cuda/numba/cuda/kernels/transpose.py +65 -0
numba_cuda/numba/cuda/libdevice.py +3382 -0
numba_cuda/numba/cuda/libdevicedecl.py +17 -0
numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
numba_cuda/numba/cuda/mathimpl.py +448 -0
numba_cuda/numba/cuda/models.py +48 -0
numba_cuda/numba/cuda/nvvmutils.py +235 -0
numba_cuda/numba/cuda/printimpl.py +86 -0
numba_cuda/numba/cuda/random.py +292 -0
numba_cuda/numba/cuda/simulator/__init__.py +38 -0
numba_cuda/numba/cuda/simulator/api.py +110 -0
numba_cuda/numba/cuda/simulator/compiler.py +9 -0
numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
numba_cuda/numba/cuda/simulator/kernel.py +308 -0
numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
numba_cuda/numba/cuda/simulator/reduction.py +15 -0
numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
numba_cuda/numba/cuda/simulator_init.py +17 -0
numba_cuda/numba/cuda/stubs.py +902 -0
numba_cuda/numba/cuda/target.py +440 -0
numba_cuda/numba/cuda/testing.py +202 -0
numba_cuda/numba/cuda/tests/__init__.py +58 -0
numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
numba_cuda/numba/cuda/tests/data/error.cu +7 -0
numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
numba_cuda/numba/cuda/types.py +37 -0
numba_cuda/numba/cuda/ufuncs.py +662 -0
numba_cuda/numba/cuda/vector_types.py +209 -0
numba_cuda/numba/cuda/vectorizers.py +252 -0
numba_cuda-0.0.12.dist-info/LICENSE +25 -0
numba_cuda-0.0.12.dist-info/METADATA +68 -0
numba_cuda-0.0.12.dist-info/RECORD +231 -0
{numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
numba_cuda-0.0.1.dist-info/METADATA +0 -10
numba_cuda-0.0.1.dist-info/RECORD +0 -5
{numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py ADDED Viewed

@@ -0,0 +1,199 @@
+import warnings
+from llvmlite import ir
+from numba.cuda.cudadrv import nvvm, runtime
+from numba.cuda.testing import unittest
+from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError, NVVM
+from numba.cuda.testing import skip_on_cudasim
+@skip_on_cudasim('NVVM Driver unsupported in the simulator')
+class TestNvvmDriver(unittest.TestCase):
+    def get_nvvmir(self):
+        versions = NVVM().get_ir_version()
+        data_layout = NVVM().data_layout
+        return nvvmir_generic.format(data_layout=data_layout, v=versions)
+    def test_nvvm_compile_simple(self):
+        nvvmir = self.get_nvvmir()
+        ptx = nvvm.compile_ir(nvvmir).decode('utf8')
+        self.assertTrue('simple' in ptx)
+        self.assertTrue('ave' in ptx)
+    def test_nvvm_compile_nullary_option(self):
+        # Tests compilation with an option that doesn't take an argument
+        # ("-gen-lto") - all other NVVM options are of the form
+        # "-<name>=<value>"
+        # -gen-lto is not available prior to CUDA 11.5
+        if runtime.get_version() < (11, 5):
+            self.skipTest("-gen-lto unavailable in this toolkit version")
+        nvvmir = self.get_nvvmir()
+        ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch="compute_52")
+        # Verify we correctly passed the option by checking if we got LTOIR
+        # from NVVM (by looking for the expected magic number for LTOIR)
+        self.assertEqual(ltoir[:4], b'\xed\x43\x4e\x7f')
+    def test_nvvm_bad_option(self):
+        # Ensure that unsupported / non-existent options are reported as such
+        # to the user / caller
+        msg = "-made-up-option=2 is an unsupported option"
+        with self.assertRaisesRegex(NvvmError, msg):
+            nvvm.compile_ir("", made_up_option=2)
+    def test_nvvm_from_llvm(self):
+        m = ir.Module("test_nvvm_from_llvm")
+        m.triple = 'nvptx64-nvidia-cuda'
+        nvvm.add_ir_version(m)
+        fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
+        kernel = ir.Function(m, fty, name='mycudakernel')
+        bldr = ir.IRBuilder(kernel.append_basic_block('entry'))
+        bldr.ret_void()
+        nvvm.set_cuda_kernel(kernel)
+        m.data_layout = NVVM().data_layout
+        ptx = nvvm.compile_ir(str(m)).decode('utf8')
+        self.assertTrue('mycudakernel' in ptx)
+        self.assertTrue('.address_size 64' in ptx)
+    def test_used_list(self):
+        # Construct a module
+        m = ir.Module("test_used_list")
+        m.triple = 'nvptx64-nvidia-cuda'
+        m.data_layout = NVVM().data_layout
+        nvvm.add_ir_version(m)
+        # Add a function and mark it as a kernel
+        fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
+        kernel = ir.Function(m, fty, name='mycudakernel')
+        bldr = ir.IRBuilder(kernel.append_basic_block('entry'))
+        bldr.ret_void()
+        nvvm.set_cuda_kernel(kernel)
+        # Verify that the used list was correctly constructed
+        used_lines = [line for line in str(m).splitlines()
+                      if 'llvm.used' in line]
+        msg = 'Expected exactly one @"llvm.used" array'
+        self.assertEqual(len(used_lines), 1, msg)
+        used_line = used_lines[0]
+        # Kernel should be referenced in the used list
+        self.assertIn("mycudakernel", used_line)
+        # Check linkage of the used list
+        self.assertIn("appending global", used_line)
+        # Ensure used list is in the metadata section
+        self.assertIn('section "llvm.metadata"', used_line)
+    def test_nvvm_ir_verify_fail(self):
+        m = ir.Module("test_bad_ir")
+        m.triple = "unknown-unknown-unknown"
+        m.data_layout = NVVM().data_layout
+        nvvm.add_ir_version(m)
+        with self.assertRaisesRegex(NvvmError, 'Invalid target triple'):
+            nvvm.compile_ir(str(m))
+    def _test_nvvm_support(self, arch):
+        compute_xx = 'compute_{0}{1}'.format(*arch)
+        nvvmir = self.get_nvvmir()
+        ptx = nvvm.compile_ir(nvvmir, arch=compute_xx, ftz=1, prec_sqrt=0,
+                              prec_div=0).decode('utf8')
+        self.assertIn(".target sm_{0}{1}".format(*arch), ptx)
+        self.assertIn('simple', ptx)
+        self.assertIn('ave', ptx)
+    def test_nvvm_support(self):
+        """Test supported CC by NVVM
+        """
+        for arch in nvvm.get_supported_ccs():
+            self._test_nvvm_support(arch=arch)
+    def test_nvvm_warning(self):
+        m = ir.Module("test_nvvm_warning")
+        m.triple = 'nvptx64-nvidia-cuda'
+        m.data_layout = NVVM().data_layout
+        nvvm.add_ir_version(m)
+        fty = ir.FunctionType(ir.VoidType(), [])
+        kernel = ir.Function(m, fty, name='inlinekernel')
+        builder = ir.IRBuilder(kernel.append_basic_block('entry'))
+        builder.ret_void()
+        nvvm.set_cuda_kernel(kernel)
+        # Add the noinline attribute to trigger NVVM to generate a warning
+        kernel.attributes.add('noinline')
+        with warnings.catch_warnings(record=True) as w:
+            nvvm.compile_ir(str(m))
+        self.assertEqual(len(w), 1)
+        self.assertIn('overriding noinline attribute', str(w[0]))
+@skip_on_cudasim('NVVM Driver unsupported in the simulator')
+class TestArchOption(unittest.TestCase):
+    def test_get_arch_option(self):
+        # Test returning the nearest lowest arch.
+        self.assertEqual(nvvm.get_arch_option(5, 3), 'compute_53')
+        self.assertEqual(nvvm.get_arch_option(7, 5), 'compute_75')
+        self.assertEqual(nvvm.get_arch_option(7, 7), 'compute_75')
+        # Test known arch.
+        supported_cc = nvvm.get_supported_ccs()
+        for arch in supported_cc:
+            self.assertEqual(nvvm.get_arch_option(*arch), 'compute_%d%d' % arch)
+        self.assertEqual(nvvm.get_arch_option(1000, 0),
+                         'compute_%d%d' % supported_cc[-1])
+@skip_on_cudasim('NVVM Driver unsupported in the simulator')
+class TestLibDevice(unittest.TestCase):
+    def test_libdevice_load(self):
+        # Test that constructing LibDevice gives a bitcode file
+        libdevice = LibDevice()
+        self.assertEqual(libdevice.bc[:4], b'BC\xc0\xde')
+nvvmir_generic = '''\
+target triple="nvptx64-nvidia-cuda"
+target datalayout = "{data_layout}"
+define i32 @ave(i32 %a, i32 %b) {{
+entry:
+%add = add nsw i32 %a, %b
+%div = sdiv i32 %add, 2
+ret i32 %div
+}}
+define void @simple(i32* %data) {{
+entry:
+%0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+%1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+%mul = mul i32 %0, %1
+%2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+%add = add i32 %mul, %2
+%call = call i32 @ave(i32 %add, i32 %add)
+%idxprom = sext i32 %add to i64
+%arrayidx = getelementptr inbounds i32, i32* %data, i64 %idxprom
+store i32 %call, i32* %arrayidx, align 4
+ret void
+}}
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone
+!nvvmir.version = !{{!1}}
+!1 = !{{i32 {v[0]}, i32 {v[1]}, i32 {v[2]}, i32 {v[3]}}}
+!nvvm.annotations = !{{!2}}
+!2 = !{{void (i32*)* @simple, !"kernel", i32 1}}
+@"llvm.used" = appending global [1 x i8*] [i8* bitcast (void (i32*)* @simple to i8*)], section "llvm.metadata"
+'''  # noqa: E501
+if __name__ == '__main__':
+    unittest.main()

numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py ADDED Viewed

@@ -0,0 +1,37 @@
+import numpy as np
+import platform
+from numba import cuda
+from numba.cuda.testing import unittest, ContextResettingTestCase
+class TestPinned(ContextResettingTestCase):
+    def _run_copies(self, A):
+        A0 = np.copy(A)
+        stream = cuda.stream()
+        ptr = cuda.to_device(A, copy=False, stream=stream)
+        ptr.copy_to_device(A, stream=stream)
+        ptr.copy_to_host(A, stream=stream)
+        stream.synchronize()
+        self.assertTrue(np.allclose(A, A0))
+    def test_pinned(self):
+        machine = platform.machine()
+        if machine.startswith('arm') or machine.startswith('aarch64'):
+            count = 262144   # 2MB
+        else:
+            count = 2097152  # 16MB
+        A = np.arange(count)
+        with cuda.pinned(A):
+            self._run_copies(A)
+    def test_unpinned(self):
+        A = np.arange(2 * 1024 * 1024) # 16 MB
+        self._run_copies(A)
+if __name__ == '__main__':
+    unittest.main()

numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py ADDED Viewed

@@ -0,0 +1,20 @@
+import unittest
+from numba.cuda.testing import ContextResettingTestCase
+from numba import cuda
+from numba.cuda.testing import skip_on_cudasim
+@skip_on_cudasim('CUDA Profiler unsupported in the simulator')
+class TestProfiler(ContextResettingTestCase):
+    def test_profiling(self):
+        with cuda.profiling():
+            a = cuda.device_array(10)
+            del a
+        with cuda.profiling():
+            a = cuda.device_array(100)
+            del a
+if __name__ == '__main__':
+    unittest.main()

numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py ADDED Viewed

@@ -0,0 +1,149 @@
+import multiprocessing as mp
+import logging
+import traceback
+from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import (skip_on_cudasim, skip_with_cuda_python,
+                                skip_under_cuda_memcheck)
+from numba.tests.support import linux_only
+def child_test():
+    from numba import cuda, int32, void
+    from numba.core import config
+    import io
+    import numpy as np
+    import threading
+    # Enable PTDS before we make any CUDA driver calls.  Enabling it first
+    # ensures that PTDS APIs are used because the CUDA driver looks up API
+    # functions on first use and memoizes them.
+    config.CUDA_PER_THREAD_DEFAULT_STREAM = 1
+    # Set up log capture for the Driver API so we can see what API calls were
+    # used.
+    logbuf = io.StringIO()
+    handler = logging.StreamHandler(logbuf)
+    cudadrv_logger = logging.getLogger('numba.cuda.cudadrv.driver')
+    cudadrv_logger.addHandler(handler)
+    cudadrv_logger.setLevel(logging.DEBUG)
+    # Set up data for our test, and copy over to the device
+    N = 2 ** 16
+    N_THREADS = 10
+    N_ADDITIONS = 4096
+    # Seed the RNG for repeatability
+    np.random.seed(1)
+    x = np.random.randint(low=0, high=1000, size=N, dtype=np.int32)
+    r = np.zeros_like(x)
+    # One input and output array for each thread
+    xs = [cuda.to_device(x) for _ in range(N_THREADS)]
+    rs = [cuda.to_device(r) for _ in range(N_THREADS)]
+    # Compute the grid size and get the [per-thread] default stream
+    n_threads = 256
+    n_blocks = N // n_threads
+    stream = cuda.default_stream()
+    # A simple multiplication-by-addition kernel. What it does exactly is not
+    # too important; only that we have a kernel that does something.
+    @cuda.jit(void(int32[::1], int32[::1]))
+    def f(r, x):
+        i = cuda.grid(1)
+        if i > len(r):
+            return
+        # Accumulate x into r
+        for j in range(N_ADDITIONS):
+            r[i] += x[i]
+    # This function will be used to launch the kernel from each thread on its
+    # own unique data.
+    def kernel_thread(n):
+        f[n_blocks, n_threads, stream](rs[n], xs[n])
+    # Create threads
+    threads = [threading.Thread(target=kernel_thread, args=(i,))
+               for i in range(N_THREADS)]
+    # Start all threads
+    for thread in threads:
+        thread.start()
+    # Wait for all threads to finish, to ensure that we don't synchronize with
+    # the device until all kernels are scheduled.
+    for thread in threads:
+        thread.join()
+    # Synchronize with the device
+    cuda.synchronize()
+    # Check output is as expected
+    expected = x * N_ADDITIONS
+    for i in range(N_THREADS):
+        np.testing.assert_equal(rs[i].copy_to_host(), expected)
+    # Return the driver log output to the calling process for checking
+    handler.flush()
+    return logbuf.getvalue()
+def child_test_wrapper(result_queue):
+    try:
+        output = child_test()
+        success = True
+    # Catch anything raised so it can be propagated
+    except: # noqa: E722
+        output = traceback.format_exc()
+        success = False
+    result_queue.put((success, output))
+# Run on Linux only until the reason for test hangs on Windows (Issue #8635,
+# https://github.com/numba/numba/issues/8635) is diagnosed
+@linux_only
+@skip_under_cuda_memcheck('Hangs cuda-memcheck')
+@skip_on_cudasim('Streams not supported on the simulator')
+class TestPTDS(CUDATestCase):
+    @skip_with_cuda_python('Function names unchanged for PTDS with NV Binding')
+    def test_ptds(self):
+        # Run a test with PTDS enabled in a child process
+        ctx = mp.get_context('spawn')
+        result_queue = ctx.Queue()
+        proc = ctx.Process(target=child_test_wrapper, args=(result_queue,))
+        proc.start()
+        proc.join()
+        success, output = result_queue.get()
+        # Ensure the child process ran to completion before checking its output
+        if not success:
+            self.fail(output)
+        # Functions with a per-thread default stream variant that we expect to
+        # see in the output
+        ptds_functions = ('cuMemcpyHtoD_v2_ptds', 'cuLaunchKernel_ptsz',
+                          'cuMemcpyDtoH_v2_ptds')
+        for fn in ptds_functions:
+            with self.subTest(fn=fn, expected=True):
+                self.assertIn(fn, output)
+        # Non-PTDS versions of the functions that we should not see in the
+        # output:
+        legacy_functions = ('cuMemcpyHtoD_v2', 'cuLaunchKernel',
+                            'cuMemcpyDtoH_v2')
+        for fn in legacy_functions:
+            with self.subTest(fn=fn, expected=False):
+                # Ensure we only spot these function names appearing without a
+                # _ptds or _ptsz suffix by checking including the end of the
+                # line in the log
+                fn_at_end = f'{fn}\n'
+                self.assertNotIn(fn_at_end, output)
+if __name__ == '__main__':
+    unittest.main()

numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py ADDED Viewed

@@ -0,0 +1,36 @@
+import threading
+from numba import cuda
+from numba.cuda.cudadrv.driver import driver
+from numba.cuda.testing import unittest, ContextResettingTestCase
+from queue import Queue
+class TestResetDevice(ContextResettingTestCase):
+    def test_reset_device(self):
+        def newthread(exception_queue):
+            try:
+                devices = range(driver.get_device_count())
+                for _ in range(2):
+                    for d in devices:
+                        cuda.select_device(d)
+                        cuda.close()
+            except Exception as e:
+                exception_queue.put(e)
+        # Do test on a separate thread so that we don't affect
+        # the current context in the main thread.
+        exception_queue = Queue()
+        t = threading.Thread(target=newthread, args=(exception_queue,))
+        t.start()
+        t.join()
+        exceptions = []
+        while not exception_queue.empty():
+            exceptions.append(exception_queue.get())
+        self.assertEqual(exceptions, [])
+if __name__ == '__main__':
+    unittest.main()

numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py ADDED Viewed

@@ -0,0 +1,85 @@
+import multiprocessing
+import os
+from numba.core import config
+from numba.cuda.cudadrv.runtime import runtime
+from numba.cuda.testing import unittest, SerialMixin, skip_on_cudasim
+from unittest.mock import patch
+def set_visible_devices_and_check(q):
+    try:
+        from numba import cuda
+        import os
+        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+        q.put(len(cuda.gpus.lst))
+    except: # noqa: E722
+        # Sentinel value for error executing test code
+        q.put(-1)
+if config.ENABLE_CUDASIM:
+    SUPPORTED_VERSIONS = (-1, -1),
+else:
+    SUPPORTED_VERSIONS = ((11, 0), (11, 1), (11, 2), (11, 3), (11, 4), (11, 5),
+                          (11, 6), (11, 7))
+class TestRuntime(unittest.TestCase):
+    def test_is_supported_version_true(self):
+        for v in SUPPORTED_VERSIONS:
+            with patch.object(runtime, 'get_version', return_value=v):
+                self.assertTrue(runtime.is_supported_version())
+    @skip_on_cudasim('The simulator always simulates a supported runtime')
+    def test_is_supported_version_false(self):
+        # Check with an old unsupported version and some potential future
+        # versions
+        for v in ((10, 2), (11, 8), (12, 0)):
+            with patch.object(runtime, 'get_version', return_value=v):
+                self.assertFalse(runtime.is_supported_version())
+    def test_supported_versions(self):
+        self.assertEqual(SUPPORTED_VERSIONS, runtime.supported_versions)
+class TestVisibleDevices(unittest.TestCase, SerialMixin):
+    def test_visible_devices_set_after_import(self):
+        # See Issue #6149. This test checks that we can set
+        # CUDA_VISIBLE_DEVICES after importing Numba and have the value
+        # reflected in the available list of GPUs. Prior to the fix for this
+        # issue, Numba made a call to runtime.get_version() on import that
+        # initialized the driver and froze the list of available devices before
+        # CUDA_VISIBLE_DEVICES could be set by the user.
+        # Avoid importing cuda at the top level so that
+        # set_visible_devices_and_check gets to import it first in its process
+        from numba import cuda
+        if len(cuda.gpus.lst) in (0, 1):
+            self.skipTest('This test requires multiple GPUs')
+        if os.environ.get('CUDA_VISIBLE_DEVICES'):
+            msg = 'Cannot test when CUDA_VISIBLE_DEVICES already set'
+            self.skipTest(msg)
+        ctx = multiprocessing.get_context('spawn')
+        q = ctx.Queue()
+        p = ctx.Process(target=set_visible_devices_and_check, args=(q,))
+        p.start()
+        try:
+            visible_gpu_count = q.get()
+        finally:
+            p.join()
+        # Make an obvious distinction between an error running the test code
+        # and an incorrect number of GPUs in the list
+        msg = 'Error running set_visible_devices_and_check'
+        self.assertNotEqual(visible_gpu_count, -1, msg=msg)
+        # The actual check that we see only one GPU
+        self.assertEqual(visible_gpu_count, 1)
+if __name__ == '__main__':
+    unittest.main()

numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py ADDED Viewed

@@ -0,0 +1,41 @@
+#
+# Test does not work on some cards.
+#
+import threading
+from queue import Queue
+import numpy as np
+from numba import cuda
+from numba.cuda.testing import unittest, ContextResettingTestCase
+def newthread(exception_queue):
+    try:
+        cuda.select_device(0)
+        stream = cuda.stream()
+        A = np.arange(100)
+        dA = cuda.to_device(A, stream=stream)
+        stream.synchronize()
+        del dA
+        del stream
+        cuda.close()
+    except Exception as e:
+        exception_queue.put(e)
+class TestSelectDevice(ContextResettingTestCase):
+    def test_select_device(self):
+        exception_queue = Queue()
+        for i in range(10):
+            t = threading.Thread(target=newthread, args=(exception_queue,))
+            t.start()
+            t.join()
+        exceptions = []
+        while not exception_queue.empty():
+            exceptions.append(exception_queue.get())
+        self.assertEqual(exceptions, [])
+if __name__ == '__main__':
+    unittest.main()

numba_cuda/numba/cuda/tests/cudadrv/test_streams.py ADDED Viewed

@@ -0,0 +1,122 @@
+import asyncio
+import functools
+import threading
+import numpy as np
+from numba import cuda
+from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+def with_asyncio_loop(f):
+    @functools.wraps(f)
+    def runner(*args, **kwds):
+        loop = asyncio.new_event_loop()
+        loop.set_debug(True)
+        try:
+            return loop.run_until_complete(f(*args, **kwds))
+        finally:
+            loop.close()
+    return runner
+@skip_on_cudasim('CUDA Driver API unsupported in the simulator')
+class TestCudaStream(CUDATestCase):
+    def test_add_callback(self):
+        def callback(stream, status, event):
+            event.set()
+        stream = cuda.stream()
+        callback_event = threading.Event()
+        stream.add_callback(callback, callback_event)
+        self.assertTrue(callback_event.wait(1.0))
+    def test_add_callback_with_default_arg(self):
+        callback_event = threading.Event()
+        def callback(stream, status, arg):
+            self.assertIsNone(arg)
+            callback_event.set()
+        stream = cuda.stream()
+        stream.add_callback(callback)
+        self.assertTrue(callback_event.wait(1.0))
+    @with_asyncio_loop
+    async def test_async_done(self):
+        stream = cuda.stream()
+        await stream.async_done()
+    @with_asyncio_loop
+    async def test_parallel_tasks(self):
+        async def async_cuda_fn(value_in: float) -> float:
+            stream = cuda.stream()
+            h_src, h_dst = cuda.pinned_array(8), cuda.pinned_array(8)
+            h_src[:] = value_in
+            d_ary = cuda.to_device(h_src, stream=stream)
+            d_ary.copy_to_host(h_dst, stream=stream)
+            done_result = await stream.async_done()
+            self.assertEqual(done_result, stream)
+            return h_dst.mean()
+        values_in = [1, 2, 3, 4]
+        tasks = [asyncio.create_task(async_cuda_fn(v)) for v in values_in]
+        values_out = await asyncio.gather(*tasks)
+        self.assertTrue(np.allclose(values_in, values_out))
+    @with_asyncio_loop
+    async def test_multiple_async_done(self):
+        stream = cuda.stream()
+        done_aws = [stream.async_done() for _ in range(4)]
+        done = await asyncio.gather(*done_aws)
+        for d in done:
+            self.assertEqual(d, stream)
+    @with_asyncio_loop
+    async def test_multiple_async_done_multiple_streams(self):
+        streams = [cuda.stream() for _ in range(4)]
+        done_aws = [stream.async_done() for stream in streams]
+        done = await asyncio.gather(*done_aws)
+        # Ensure we got the four original streams in done
+        self.assertSetEqual(set(done), set(streams))
+    @with_asyncio_loop
+    async def test_cancelled_future(self):
+        stream = cuda.stream()
+        done1, done2 = stream.async_done(), stream.async_done()
+        done1.cancel()
+        await done2
+        self.assertTrue(done1.cancelled())
+        self.assertTrue(done2.done())
+@skip_on_cudasim('CUDA Driver API unsupported in the simulator')
+class TestFailingStream(CUDATestCase):
+    # This test can only be run in isolation because it corrupts the CUDA
+    # context, which cannot be recovered from within the same process. It is
+    # left here so that it can be run manually for debugging / testing purposes
+    # - or may be re-enabled if in future there is infrastructure added for
+    # running tests in a separate process (a subprocess cannot be used because
+    # CUDA will have been initialized before the fork, so it cannot be used in
+    # the child process).
+    @unittest.skip
+    @with_asyncio_loop
+    async def test_failed_stream(self):
+        ctx = cuda.current_context()
+        module = ctx.create_module_ptx("""
+            .version 6.5
+            .target sm_30
+            .address_size 64
+            .visible .entry failing_kernel() { trap; }
+        """)
+        failing_kernel = module.get_function("failing_kernel")
+        stream = cuda.stream()
+        failing_kernel.configure((1,), (1,), stream=stream).__call__()
+        done = stream.async_done()
+        with self.assertRaises(Exception):
+            await done
+        self.assertIsNotNone(done.exception())
+if __name__ == '__main__':
+    unittest.main()

numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl

numba-cuda 0.0.1py3-none-any.whl → 0.0.12py3-none-any.whl