numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.0.dist-info/METADATA +0 -6
  232. numba_cuda-0.0.0.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,179 @@
1
+ import numpy as np
2
+ import ctypes
3
+ from numba.cuda.cudadrv.devicearray import (DeviceRecord, from_record_like,
4
+ auto_device)
5
+ from numba.cuda.testing import unittest, CUDATestCase
6
+ from numba.cuda.testing import skip_on_cudasim
7
+ from numba.np import numpy_support
8
+ from numba import cuda
9
+
10
+ N_CHARS = 5
11
+
12
+ recordtype = np.dtype(
13
+ [
14
+ ('a', np.float64),
15
+ ('b', np.int32),
16
+ ('c', np.complex64),
17
+ ('d', (np.str_, N_CHARS))
18
+ ],
19
+ align=True
20
+ )
21
+
22
+ recordwitharray = np.dtype(
23
+ [
24
+ ('g', np.int32),
25
+ ('h', np.float32, 2)
26
+ ],
27
+ align=True
28
+ )
29
+
30
+ recwithmat = np.dtype([('i', np.int32),
31
+ ('j', np.float32, (3, 3))])
32
+
33
+ recwithrecwithmat = np.dtype([('x', np.int32), ('y', recwithmat)])
34
+
35
+
36
+ @skip_on_cudasim('Device Record API unsupported in the simulator')
37
+ class TestCudaDeviceRecord(CUDATestCase):
38
+ """
39
+ Tests the DeviceRecord class with np.void host types.
40
+ """
41
+ def setUp(self):
42
+ super().setUp()
43
+ self._create_data(np.zeros)
44
+
45
+ def _create_data(self, array_ctor):
46
+ self.dtype = np.dtype([('a', np.int32), ('b', np.float32)], align=True)
47
+ self.hostz = array_ctor(1, self.dtype)[0]
48
+ self.hostnz = array_ctor(1, self.dtype)[0]
49
+ self.hostnz['a'] = 10
50
+ self.hostnz['b'] = 11.0
51
+
52
+ def _check_device_record(self, reference, rec):
53
+ self.assertEqual(rec.shape, tuple())
54
+ self.assertEqual(rec.strides, tuple())
55
+ self.assertEqual(rec.dtype, reference.dtype)
56
+ self.assertEqual(rec.alloc_size, reference.dtype.itemsize)
57
+ self.assertIsNotNone(rec.gpu_data)
58
+ self.assertNotEqual(rec.device_ctypes_pointer, ctypes.c_void_p(0))
59
+
60
+ numba_type = numpy_support.from_dtype(reference.dtype)
61
+ self.assertEqual(rec._numba_type_, numba_type)
62
+
63
+ def test_device_record_interface(self):
64
+ hostrec = self.hostz.copy()
65
+ devrec = DeviceRecord(self.dtype)
66
+ self._check_device_record(hostrec, devrec)
67
+
68
+ def test_device_record_copy(self):
69
+ hostrec = self.hostz.copy()
70
+ devrec = DeviceRecord(self.dtype)
71
+ devrec.copy_to_device(hostrec)
72
+
73
+ # Copy back and check values are all zeros
74
+ hostrec2 = self.hostnz.copy()
75
+ devrec.copy_to_host(hostrec2)
76
+ np.testing.assert_equal(self.hostz, hostrec2)
77
+
78
+ # Copy non-zero values to GPU and back and check values
79
+ hostrec3 = self.hostnz.copy()
80
+ devrec.copy_to_device(hostrec3)
81
+
82
+ hostrec4 = self.hostz.copy()
83
+ devrec.copy_to_host(hostrec4)
84
+ np.testing.assert_equal(hostrec4, self.hostnz)
85
+
86
+ def test_from_record_like(self):
87
+ # Create record from host record
88
+ hostrec = self.hostz.copy()
89
+ devrec = from_record_like(hostrec)
90
+ self._check_device_record(hostrec, devrec)
91
+
92
+ # Create record from device record and check for distinct data
93
+ devrec2 = from_record_like(devrec)
94
+ self._check_device_record(devrec, devrec2)
95
+ self.assertNotEqual(devrec.gpu_data, devrec2.gpu_data)
96
+
97
+ def test_auto_device(self):
98
+ # Create record from host record
99
+ hostrec = self.hostnz.copy()
100
+ devrec, new_gpu_obj = auto_device(hostrec)
101
+ self._check_device_record(hostrec, devrec)
102
+ self.assertTrue(new_gpu_obj)
103
+
104
+ # Copy data back and check it is equal to auto_device arg
105
+ hostrec2 = self.hostz.copy()
106
+ devrec.copy_to_host(hostrec2)
107
+ np.testing.assert_equal(hostrec2, hostrec)
108
+
109
+
110
+ class TestCudaDeviceRecordWithRecord(TestCudaDeviceRecord):
111
+ """
112
+ Tests the DeviceRecord class with np.record host types
113
+ """
114
+ def setUp(self):
115
+ CUDATestCase.setUp(self)
116
+ self._create_data(np.recarray)
117
+
118
+
119
+ @skip_on_cudasim('Structured array attr access not supported in simulator')
120
+ class TestRecordDtypeWithStructArrays(CUDATestCase):
121
+ '''
122
+ Test operation of device arrays on structured arrays.
123
+ '''
124
+
125
+ def _createSampleArrays(self):
126
+ self.sample1d = cuda.device_array(3, dtype=recordtype)
127
+ self.samplerec1darr = cuda.device_array(1, dtype=recordwitharray)[0]
128
+ self.samplerecmat = cuda.device_array(1,dtype=recwithmat)[0]
129
+
130
+ def setUp(self):
131
+ super().setUp()
132
+ self._createSampleArrays()
133
+
134
+ ary = self.sample1d
135
+ for i in range(ary.size):
136
+ x = i + 1
137
+ ary[i]['a'] = x / 2
138
+ ary[i]['b'] = x
139
+ ary[i]['c'] = x * 1j
140
+ ary[i]['d'] = str(x) * N_CHARS
141
+
142
+ def test_structured_array1(self):
143
+ ary = self.sample1d
144
+ for i in range(self.sample1d.size):
145
+ x = i + 1
146
+ self.assertEqual(ary[i]['a'], x / 2)
147
+ self.assertEqual(ary[i]['b'], x)
148
+ self.assertEqual(ary[i]['c'], x * 1j)
149
+ self.assertEqual(ary[i]['d'], str(x) * N_CHARS)
150
+
151
+ def test_structured_array2(self):
152
+ ary = self.samplerec1darr
153
+ ary['g'] = 2
154
+ ary['h'][0] = 3.0
155
+ ary['h'][1] = 4.0
156
+ self.assertEqual(ary['g'], 2)
157
+ self.assertEqual(ary['h'][0], 3.0)
158
+ self.assertEqual(ary['h'][1], 4.0)
159
+
160
+ def test_structured_array3(self):
161
+ ary = self.samplerecmat
162
+ mat = np.array([[5.0, 10.0, 15.0],
163
+ [20.0, 25.0, 30.0],
164
+ [35.0, 40.0, 45.0]],
165
+ dtype=np.float32).reshape(3,3)
166
+ ary['j'][:] = mat
167
+ np.testing.assert_equal(ary['j'], mat)
168
+
169
+ def test_structured_array4(self):
170
+ arr = np.zeros(1, dtype=recwithrecwithmat)
171
+ d_arr = cuda.to_device(arr)
172
+ d_arr[0]['y']['i'] = 1
173
+ self.assertEqual(d_arr[0]['y']['i'], 1)
174
+ d_arr[0]['y']['j'][0, 0] = 2.0
175
+ self.assertEqual(d_arr[0]['y']['j'][0, 0], 2.0)
176
+
177
+
178
+ if __name__ == '__main__':
179
+ unittest.main()
@@ -0,0 +1,235 @@
1
+ from ctypes import byref, c_int, c_void_p, sizeof
2
+
3
+ from numba.cuda.cudadrv.driver import (host_to_device, device_to_host, driver,
4
+ launch_kernel)
5
+ from numba.cuda.cudadrv import devices, drvapi, driver as _driver
6
+ from numba.cuda.testing import unittest, CUDATestCase
7
+ from numba.cuda.testing import skip_on_cudasim
8
+
9
+
10
+ ptx1 = '''
11
+ .version 1.4
12
+ .target sm_10, map_f64_to_f32
13
+
14
+ .entry _Z10helloworldPi (
15
+ .param .u64 __cudaparm__Z10helloworldPi_A)
16
+ {
17
+ .reg .u32 %r<3>;
18
+ .reg .u64 %rd<6>;
19
+ .loc 14 4 0
20
+ $LDWbegin__Z10helloworldPi:
21
+ .loc 14 6 0
22
+ cvt.s32.u16 %r1, %tid.x;
23
+ ld.param.u64 %rd1, [__cudaparm__Z10helloworldPi_A];
24
+ cvt.u64.u16 %rd2, %tid.x;
25
+ mul.lo.u64 %rd3, %rd2, 4;
26
+ add.u64 %rd4, %rd1, %rd3;
27
+ st.global.s32 [%rd4+0], %r1;
28
+ .loc 14 7 0
29
+ exit;
30
+ $LDWend__Z10helloworldPi:
31
+ } // _Z10helloworldPi
32
+ '''
33
+
34
+ ptx2 = '''
35
+ .version 3.0
36
+ .target sm_20
37
+ .address_size 64
38
+
39
+ .file 1 "/tmp/tmpxft_000012c7_00000000-9_testcuda.cpp3.i"
40
+ .file 2 "testcuda.cu"
41
+
42
+ .entry _Z10helloworldPi(
43
+ .param .u64 _Z10helloworldPi_param_0
44
+ )
45
+ {
46
+ .reg .s32 %r<3>;
47
+ .reg .s64 %rl<5>;
48
+
49
+
50
+ ld.param.u64 %rl1, [_Z10helloworldPi_param_0];
51
+ cvta.to.global.u64 %rl2, %rl1;
52
+ .loc 2 6 1
53
+ mov.u32 %r1, %tid.x;
54
+ mul.wide.u32 %rl3, %r1, 4;
55
+ add.s64 %rl4, %rl2, %rl3;
56
+ st.global.u32 [%rl4], %r1;
57
+ .loc 2 7 2
58
+ ret;
59
+ }
60
+ '''
61
+
62
+
63
+ @skip_on_cudasim('CUDA Driver API unsupported in the simulator')
64
+ class TestCudaDriver(CUDATestCase):
65
+ def setUp(self):
66
+ super().setUp()
67
+ self.assertTrue(len(devices.gpus) > 0)
68
+ self.context = devices.get_context()
69
+ device = self.context.device
70
+ ccmajor, _ = device.compute_capability
71
+ if ccmajor >= 2:
72
+ self.ptx = ptx2
73
+ else:
74
+ self.ptx = ptx1
75
+
76
+ def tearDown(self):
77
+ super().tearDown()
78
+ del self.context
79
+
80
+ def test_cuda_driver_basic(self):
81
+ module = self.context.create_module_ptx(self.ptx)
82
+ function = module.get_function('_Z10helloworldPi')
83
+
84
+ array = (c_int * 100)()
85
+
86
+ memory = self.context.memalloc(sizeof(array))
87
+ host_to_device(memory, array, sizeof(array))
88
+
89
+ ptr = memory.device_ctypes_pointer
90
+ stream = 0
91
+
92
+ if _driver.USE_NV_BINDING:
93
+ ptr = c_void_p(int(ptr))
94
+ stream = _driver.binding.CUstream(stream)
95
+
96
+ launch_kernel(function.handle, # Kernel
97
+ 1, 1, 1, # gx, gy, gz
98
+ 100, 1, 1, # bx, by, bz
99
+ 0, # dynamic shared mem
100
+ stream, # stream
101
+ [ptr]) # arguments
102
+
103
+ device_to_host(array, memory, sizeof(array))
104
+ for i, v in enumerate(array):
105
+ self.assertEqual(i, v)
106
+
107
+ module.unload()
108
+
109
+ def test_cuda_driver_stream_operations(self):
110
+ module = self.context.create_module_ptx(self.ptx)
111
+ function = module.get_function('_Z10helloworldPi')
112
+
113
+ array = (c_int * 100)()
114
+
115
+ stream = self.context.create_stream()
116
+
117
+ with stream.auto_synchronize():
118
+ memory = self.context.memalloc(sizeof(array))
119
+ host_to_device(memory, array, sizeof(array), stream=stream)
120
+
121
+ ptr = memory.device_ctypes_pointer
122
+ if _driver.USE_NV_BINDING:
123
+ ptr = c_void_p(int(ptr))
124
+
125
+ launch_kernel(function.handle, # Kernel
126
+ 1, 1, 1, # gx, gy, gz
127
+ 100, 1, 1, # bx, by, bz
128
+ 0, # dynamic shared mem
129
+ stream.handle, # stream
130
+ [ptr]) # arguments
131
+
132
+ device_to_host(array, memory, sizeof(array), stream=stream)
133
+
134
+ for i, v in enumerate(array):
135
+ self.assertEqual(i, v)
136
+
137
+ def test_cuda_driver_default_stream(self):
138
+ # Test properties of the default stream
139
+ ds = self.context.get_default_stream()
140
+ self.assertIn("Default CUDA stream", repr(ds))
141
+ self.assertEqual(0, int(ds))
142
+ # bool(stream) is the check that is done in memcpy to decide if async
143
+ # version should be used. So the default (0) stream should be true-ish
144
+ # even though 0 is usually false-ish in Python.
145
+ self.assertTrue(ds)
146
+ self.assertFalse(ds.external)
147
+
148
+ def test_cuda_driver_legacy_default_stream(self):
149
+ # Test properties of the legacy default stream
150
+ ds = self.context.get_legacy_default_stream()
151
+ self.assertIn("Legacy default CUDA stream", repr(ds))
152
+ self.assertEqual(1, int(ds))
153
+ self.assertTrue(ds)
154
+ self.assertFalse(ds.external)
155
+
156
+ def test_cuda_driver_per_thread_default_stream(self):
157
+ # Test properties of the per-thread default stream
158
+ ds = self.context.get_per_thread_default_stream()
159
+ self.assertIn("Per-thread default CUDA stream", repr(ds))
160
+ self.assertEqual(2, int(ds))
161
+ self.assertTrue(ds)
162
+ self.assertFalse(ds.external)
163
+
164
+ def test_cuda_driver_stream(self):
165
+ # Test properties of non-default streams
166
+ s = self.context.create_stream()
167
+ self.assertIn("CUDA stream", repr(s))
168
+ self.assertNotIn("Default", repr(s))
169
+ self.assertNotIn("External", repr(s))
170
+ self.assertNotEqual(0, int(s))
171
+ self.assertTrue(s)
172
+ self.assertFalse(s.external)
173
+
174
+ def test_cuda_driver_external_stream(self):
175
+ # Test properties of a stream created from an external stream object.
176
+ # We use the driver API directly to create a stream, to emulate an
177
+ # external library creating a stream
178
+ if _driver.USE_NV_BINDING:
179
+ handle = driver.cuStreamCreate(0)
180
+ ptr = int(handle)
181
+ else:
182
+ handle = drvapi.cu_stream()
183
+ driver.cuStreamCreate(byref(handle), 0)
184
+ ptr = handle.value
185
+ s = self.context.create_external_stream(ptr)
186
+
187
+ self.assertIn("External CUDA stream", repr(s))
188
+ # Ensure neither "Default" nor "default"
189
+ self.assertNotIn("efault", repr(s))
190
+ self.assertEqual(ptr, int(s))
191
+ self.assertTrue(s)
192
+ self.assertTrue(s.external)
193
+
194
+ def test_cuda_driver_occupancy(self):
195
+ module = self.context.create_module_ptx(self.ptx)
196
+ function = module.get_function('_Z10helloworldPi')
197
+
198
+ value = self.context.get_active_blocks_per_multiprocessor(function,
199
+ 128, 128)
200
+ self.assertTrue(value > 0)
201
+
202
+ def b2d(bs):
203
+ return bs
204
+
205
+ grid, block = self.context.get_max_potential_block_size(function, b2d,
206
+ 128, 128)
207
+ self.assertTrue(grid > 0)
208
+ self.assertTrue(block > 0)
209
+
210
+
211
+ class TestDevice(CUDATestCase):
212
+ def test_device_get_uuid(self):
213
+ # A device UUID looks like:
214
+ #
215
+ # GPU-e6489c45-5b68-3b03-bab7-0e7c8e809643
216
+ #
217
+ # To test, we construct an RE that matches this form and verify that
218
+ # the returned UUID matches.
219
+ #
220
+ # Device UUIDs may not conform to parts of the UUID specification (RFC
221
+ # 4122) pertaining to versions and variants, so we do not extract and
222
+ # validate the values of these bits.
223
+
224
+ h = '[0-9a-f]{%d}'
225
+ h4 = h % 4
226
+ h8 = h % 8
227
+ h12 = h % 12
228
+ uuid_format = f'^GPU-{h8}-{h4}-{h4}-{h4}-{h12}$'
229
+
230
+ dev = devices.get_context().device
231
+ self.assertRegex(dev.uuid, uuid_format)
232
+
233
+
234
+ if __name__ == '__main__':
235
+ unittest.main()
@@ -0,0 +1,22 @@
1
+ from numba.cuda.testing import unittest
2
+ from numba.cuda.testing import skip_on_cudasim, skip_unless_conda_cudatoolkit
3
+ from numba.misc.findlib import find_lib
4
+
5
+
6
+ @skip_on_cudasim('Library detection unsupported in the simulator')
7
+ @skip_unless_conda_cudatoolkit
8
+ class TestLibraryDetection(unittest.TestCase):
9
+ def test_detect(self):
10
+ """
11
+ This test is solely present to ensure that shipped cudatoolkits have
12
+ additional core libraries in locations that Numba scans by default.
13
+ PyCulib (and potentially others) rely on Numba's library finding
14
+ capacity to find and subsequently load these libraries.
15
+ """
16
+ core_libs = ['nvvm']
17
+ for l in core_libs:
18
+ self.assertNotEqual(find_lib(l), [])
19
+
20
+
21
+ if __name__ == '__main__':
22
+ unittest.main()
@@ -0,0 +1,193 @@
1
+ import ctypes
2
+
3
+ import numpy as np
4
+
5
+ from numba.cuda.cudadrv import driver, drvapi, devices
6
+ from numba.cuda.testing import unittest, ContextResettingTestCase
7
+ from numba.cuda.testing import skip_on_cudasim
8
+
9
+
10
+ @skip_on_cudasim('CUDA Memory API unsupported in the simulator')
11
+ class TestCudaMemory(ContextResettingTestCase):
12
+ def setUp(self):
13
+ super().setUp()
14
+ self.context = devices.get_context()
15
+
16
+ def tearDown(self):
17
+ del self.context
18
+ super(TestCudaMemory, self).tearDown()
19
+
20
+ def _template(self, obj):
21
+ self.assertTrue(driver.is_device_memory(obj))
22
+ driver.require_device_memory(obj)
23
+ if driver.USE_NV_BINDING:
24
+ expected_class = driver.binding.CUdeviceptr
25
+ else:
26
+ expected_class = drvapi.cu_device_ptr
27
+ self.assertTrue(isinstance(obj.device_ctypes_pointer,
28
+ expected_class))
29
+
30
+ def test_device_memory(self):
31
+ devmem = self.context.memalloc(1024)
32
+ self._template(devmem)
33
+
34
+ def test_device_view(self):
35
+ devmem = self.context.memalloc(1024)
36
+ self._template(devmem.view(10))
37
+
38
+ def test_host_alloc(self):
39
+ devmem = self.context.memhostalloc(1024, mapped=True)
40
+ self._template(devmem)
41
+
42
+ def test_pinned_memory(self):
43
+ ary = np.arange(10)
44
+ devmem = self.context.mempin(ary, ary.ctypes.data,
45
+ ary.size * ary.dtype.itemsize,
46
+ mapped=True)
47
+ self._template(devmem)
48
+
49
+ def test_managed_memory(self):
50
+ devmem = self.context.memallocmanaged(1024)
51
+ self._template(devmem)
52
+
53
+ def test_derived_pointer(self):
54
+ # Use MemoryPointer.view to create derived pointer
55
+
56
+ def handle_val(mem):
57
+ if driver.USE_NV_BINDING:
58
+ return int(mem.handle)
59
+ else:
60
+ return mem.handle.value
61
+
62
+ def check(m, offset):
63
+ # create view
64
+ v1 = m.view(offset)
65
+ self.assertEqual(handle_val(v1.owner), handle_val(m))
66
+ self.assertEqual(m.refct, 2)
67
+ self.assertEqual(handle_val(v1) - offset, handle_val(v1.owner))
68
+ # create a view
69
+ v2 = v1.view(offset)
70
+ self.assertEqual(handle_val(v2.owner), handle_val(m))
71
+ self.assertEqual(handle_val(v2.owner), handle_val(m))
72
+ self.assertEqual(handle_val(v2) - offset * 2,
73
+ handle_val(v2.owner))
74
+ self.assertEqual(m.refct, 3)
75
+ del v2
76
+ self.assertEqual(m.refct, 2)
77
+ del v1
78
+ self.assertEqual(m.refct, 1)
79
+
80
+ m = self.context.memalloc(1024)
81
+ check(m=m, offset=0)
82
+ check(m=m, offset=1)
83
+
84
+ def test_user_extension(self):
85
+ # User can use MemoryPointer to wrap externally defined pointers.
86
+ # This test checks if the finalizer is invokded at correct time
87
+ fake_ptr = ctypes.c_void_p(0xdeadbeef)
88
+ dtor_invoked = [0]
89
+
90
+ def dtor():
91
+ dtor_invoked[0] += 1
92
+
93
+ # Ensure finalizer is called when pointer is deleted
94
+ ptr = driver.MemoryPointer(context=self.context, pointer=fake_ptr,
95
+ size=40, finalizer=dtor)
96
+ self.assertEqual(dtor_invoked[0], 0)
97
+ del ptr
98
+ self.assertEqual(dtor_invoked[0], 1)
99
+
100
+ # Ensure removing derived pointer doesn't call finalizer
101
+ ptr = driver.MemoryPointer(context=self.context, pointer=fake_ptr,
102
+ size=40, finalizer=dtor)
103
+ owned = ptr.own()
104
+ del owned
105
+ self.assertEqual(dtor_invoked[0], 1)
106
+ del ptr
107
+ self.assertEqual(dtor_invoked[0], 2)
108
+
109
+
110
+ class TestCudaMemoryFunctions(ContextResettingTestCase):
111
+ def setUp(self):
112
+ super().setUp()
113
+ self.context = devices.get_context()
114
+
115
+ def tearDown(self):
116
+ del self.context
117
+ super(TestCudaMemoryFunctions, self).tearDown()
118
+
119
+ def test_memcpy(self):
120
+ hstary = np.arange(100, dtype=np.uint32)
121
+ hstary2 = np.arange(100, dtype=np.uint32)
122
+ sz = hstary.size * hstary.dtype.itemsize
123
+ devary = self.context.memalloc(sz)
124
+
125
+ driver.host_to_device(devary, hstary, sz)
126
+ driver.device_to_host(hstary2, devary, sz)
127
+
128
+ self.assertTrue(np.all(hstary == hstary2))
129
+
130
+ def test_memset(self):
131
+ dtype = np.dtype('uint32')
132
+ n = 10
133
+ sz = dtype.itemsize * 10
134
+ devary = self.context.memalloc(sz)
135
+ driver.device_memset(devary, 0xab, sz)
136
+
137
+ hstary = np.empty(n, dtype=dtype)
138
+ driver.device_to_host(hstary, devary, sz)
139
+
140
+ hstary2 = np.array([0xabababab] * n, dtype=np.dtype('uint32'))
141
+ self.assertTrue(np.all(hstary == hstary2))
142
+
143
+ def test_d2d(self):
144
+ hst = np.arange(100, dtype=np.uint32)
145
+ hst2 = np.empty_like(hst)
146
+ sz = hst.size * hst.dtype.itemsize
147
+ dev1 = self.context.memalloc(sz)
148
+ dev2 = self.context.memalloc(sz)
149
+ driver.host_to_device(dev1, hst, sz)
150
+ driver.device_to_device(dev2, dev1, sz)
151
+ driver.device_to_host(hst2, dev2, sz)
152
+ self.assertTrue(np.all(hst == hst2))
153
+
154
+
155
+ @skip_on_cudasim('CUDA Memory API unsupported in the simulator')
156
+ class TestMVExtent(ContextResettingTestCase):
157
+ def test_c_contiguous_array(self):
158
+ ary = np.arange(100)
159
+ arysz = ary.dtype.itemsize * ary.size
160
+ s, e = driver.host_memory_extents(ary)
161
+ self.assertTrue(ary.ctypes.data == s)
162
+ self.assertTrue(arysz == driver.host_memory_size(ary))
163
+
164
+ def test_f_contiguous_array(self):
165
+ ary = np.asfortranarray(np.arange(100).reshape(2, 50))
166
+ arysz = ary.dtype.itemsize * np.prod(ary.shape)
167
+ s, e = driver.host_memory_extents(ary)
168
+ self.assertTrue(ary.ctypes.data == s)
169
+ self.assertTrue(arysz == driver.host_memory_size(ary))
170
+
171
+ def test_single_element_array(self):
172
+ ary = np.asarray(np.uint32(1234))
173
+ arysz = ary.dtype.itemsize
174
+ s, e = driver.host_memory_extents(ary)
175
+ self.assertTrue(ary.ctypes.data == s)
176
+ self.assertTrue(arysz == driver.host_memory_size(ary))
177
+
178
+ def test_ctypes_struct(self):
179
+ class mystruct(ctypes.Structure):
180
+ _fields_ = [('x', ctypes.c_int), ('y', ctypes.c_int)]
181
+
182
+ data = mystruct(x=123, y=432)
183
+ sz = driver.host_memory_size(data)
184
+ self.assertTrue(ctypes.sizeof(data) == sz)
185
+
186
+ def test_ctypes_double(self):
187
+ data = ctypes.c_double(1.234)
188
+ sz = driver.host_memory_size(data)
189
+ self.assertTrue(ctypes.sizeof(data) == sz)
190
+
191
+
192
+ if __name__ == '__main__':
193
+ unittest.main()