numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.0.dist-info/METADATA +0 -6
  232. numba_cuda-0.0.0.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,249 @@
1
+ from contextlib import contextmanager
2
+
3
+ import numpy as np
4
+
5
+ from numba import cuda
6
+ from numba.cuda.testing import (unittest, skip_on_cudasim,
7
+ skip_if_external_memmgr, CUDATestCase)
8
+ from numba.tests.support import captured_stderr
9
+ from numba.core import config
10
+
11
+
12
+ @skip_on_cudasim('not supported on CUDASIM')
13
+ @skip_if_external_memmgr('Deallocation specific to Numba memory management')
14
+ class TestDeallocation(CUDATestCase):
15
+ def test_max_pending_count(self):
16
+ # get deallocation manager and flush it
17
+ deallocs = cuda.current_context().memory_manager.deallocations
18
+ deallocs.clear()
19
+ self.assertEqual(len(deallocs), 0)
20
+ # deallocate to maximum count
21
+ for i in range(config.CUDA_DEALLOCS_COUNT):
22
+ cuda.to_device(np.arange(1))
23
+ self.assertEqual(len(deallocs), i + 1)
24
+ # one more to trigger .clear()
25
+ cuda.to_device(np.arange(1))
26
+ self.assertEqual(len(deallocs), 0)
27
+
28
+ def test_max_pending_bytes(self):
29
+ # get deallocation manager and flush it
30
+ ctx = cuda.current_context()
31
+ deallocs = ctx.memory_manager.deallocations
32
+ deallocs.clear()
33
+ self.assertEqual(len(deallocs), 0)
34
+
35
+ mi = ctx.get_memory_info()
36
+
37
+ max_pending = 10**6 # 1MB
38
+ old_ratio = config.CUDA_DEALLOCS_RATIO
39
+ try:
40
+ # change to a smaller ratio
41
+ config.CUDA_DEALLOCS_RATIO = max_pending / mi.total
42
+ # due to round off error (floor is used in calculating
43
+ # _max_pending_bytes) it can be off by 1.
44
+ self.assertAlmostEqual(deallocs._max_pending_bytes, max_pending,
45
+ delta=1)
46
+
47
+ # allocate half the max size
48
+ # this will not trigger deallocation
49
+ cuda.to_device(np.ones(max_pending // 2, dtype=np.int8))
50
+ self.assertEqual(len(deallocs), 1)
51
+
52
+ # allocate another remaining
53
+ # this will not trigger deallocation
54
+ cuda.to_device(np.ones(deallocs._max_pending_bytes -
55
+ deallocs._size, dtype=np.int8))
56
+ self.assertEqual(len(deallocs), 2)
57
+
58
+ # another byte to trigger .clear()
59
+ cuda.to_device(np.ones(1, dtype=np.int8))
60
+ self.assertEqual(len(deallocs), 0)
61
+ finally:
62
+ # restore old ratio
63
+ config.CUDA_DEALLOCS_RATIO = old_ratio
64
+
65
+
66
+ @skip_on_cudasim("defer_cleanup has no effect in CUDASIM")
67
+ @skip_if_external_memmgr('Deallocation specific to Numba memory management')
68
+ class TestDeferCleanup(CUDATestCase):
69
+ def test_basic(self):
70
+ harr = np.arange(5)
71
+ darr1 = cuda.to_device(harr)
72
+ deallocs = cuda.current_context().memory_manager.deallocations
73
+ deallocs.clear()
74
+ self.assertEqual(len(deallocs), 0)
75
+ with cuda.defer_cleanup():
76
+ darr2 = cuda.to_device(harr)
77
+ del darr1
78
+ self.assertEqual(len(deallocs), 1)
79
+ del darr2
80
+ self.assertEqual(len(deallocs), 2)
81
+ deallocs.clear()
82
+ self.assertEqual(len(deallocs), 2)
83
+
84
+ deallocs.clear()
85
+ self.assertEqual(len(deallocs), 0)
86
+
87
+ def test_nested(self):
88
+ harr = np.arange(5)
89
+ darr1 = cuda.to_device(harr)
90
+ deallocs = cuda.current_context().memory_manager.deallocations
91
+ deallocs.clear()
92
+ self.assertEqual(len(deallocs), 0)
93
+ with cuda.defer_cleanup():
94
+ with cuda.defer_cleanup():
95
+ darr2 = cuda.to_device(harr)
96
+ del darr1
97
+ self.assertEqual(len(deallocs), 1)
98
+ del darr2
99
+ self.assertEqual(len(deallocs), 2)
100
+ deallocs.clear()
101
+ self.assertEqual(len(deallocs), 2)
102
+ deallocs.clear()
103
+ self.assertEqual(len(deallocs), 2)
104
+
105
+ deallocs.clear()
106
+ self.assertEqual(len(deallocs), 0)
107
+
108
+ def test_exception(self):
109
+ harr = np.arange(5)
110
+ darr1 = cuda.to_device(harr)
111
+ deallocs = cuda.current_context().memory_manager.deallocations
112
+ deallocs.clear()
113
+ self.assertEqual(len(deallocs), 0)
114
+
115
+ class CustomError(Exception):
116
+ pass
117
+
118
+ with self.assertRaises(CustomError):
119
+ with cuda.defer_cleanup():
120
+ darr2 = cuda.to_device(harr)
121
+ del darr2
122
+ self.assertEqual(len(deallocs), 1)
123
+ deallocs.clear()
124
+ self.assertEqual(len(deallocs), 1)
125
+ raise CustomError
126
+ deallocs.clear()
127
+ self.assertEqual(len(deallocs), 0)
128
+ del darr1
129
+ self.assertEqual(len(deallocs), 1)
130
+ deallocs.clear()
131
+ self.assertEqual(len(deallocs), 0)
132
+
133
+
134
+ class TestDeferCleanupAvail(CUDATestCase):
135
+ def test_context_manager(self):
136
+ # just make sure the API is available
137
+ with cuda.defer_cleanup():
138
+ pass
139
+
140
+
141
+ @skip_on_cudasim('not supported on CUDASIM')
142
+ class TestDel(CUDATestCase):
143
+ """
144
+ Ensure resources are deleted properly without ignored exception.
145
+ """
146
+ @contextmanager
147
+ def check_ignored_exception(self, ctx):
148
+ with captured_stderr() as cap:
149
+ yield
150
+ ctx.deallocations.clear()
151
+ self.assertFalse(cap.getvalue())
152
+
153
+ def test_stream(self):
154
+ ctx = cuda.current_context()
155
+ stream = ctx.create_stream()
156
+ with self.check_ignored_exception(ctx):
157
+ del stream
158
+
159
+ def test_event(self):
160
+ ctx = cuda.current_context()
161
+ event = ctx.create_event()
162
+ with self.check_ignored_exception(ctx):
163
+ del event
164
+
165
+ def test_pinned_memory(self):
166
+ ctx = cuda.current_context()
167
+ mem = ctx.memhostalloc(32)
168
+ with self.check_ignored_exception(ctx):
169
+ del mem
170
+
171
+ def test_mapped_memory(self):
172
+ ctx = cuda.current_context()
173
+ mem = ctx.memhostalloc(32, mapped=True)
174
+ with self.check_ignored_exception(ctx):
175
+ del mem
176
+
177
+ def test_device_memory(self):
178
+ ctx = cuda.current_context()
179
+ mem = ctx.memalloc(32)
180
+ with self.check_ignored_exception(ctx):
181
+ del mem
182
+
183
+ def test_managed_memory(self):
184
+ ctx = cuda.current_context()
185
+ mem = ctx.memallocmanaged(32)
186
+ with self.check_ignored_exception(ctx):
187
+ del mem
188
+
189
+ def test_pinned_contextmanager(self):
190
+ # Check that temporarily pinned memory is unregistered immediately,
191
+ # such that it can be re-pinned at any time
192
+ class PinnedException(Exception):
193
+ pass
194
+
195
+ arr = np.zeros(1)
196
+ ctx = cuda.current_context()
197
+ ctx.deallocations.clear()
198
+ with self.check_ignored_exception(ctx):
199
+ with cuda.pinned(arr):
200
+ pass
201
+ with cuda.pinned(arr):
202
+ pass
203
+ # Should also work inside a `defer_cleanup` block
204
+ with cuda.defer_cleanup():
205
+ with cuda.pinned(arr):
206
+ pass
207
+ with cuda.pinned(arr):
208
+ pass
209
+ # Should also work when breaking out of the block due to an
210
+ # exception
211
+ try:
212
+ with cuda.pinned(arr):
213
+ raise PinnedException
214
+ except PinnedException:
215
+ with cuda.pinned(arr):
216
+ pass
217
+
218
+ def test_mapped_contextmanager(self):
219
+ # Check that temporarily mapped memory is unregistered immediately,
220
+ # such that it can be re-mapped at any time
221
+ class MappedException(Exception):
222
+ pass
223
+
224
+ arr = np.zeros(1)
225
+ ctx = cuda.current_context()
226
+ ctx.deallocations.clear()
227
+ with self.check_ignored_exception(ctx):
228
+ with cuda.mapped(arr):
229
+ pass
230
+ with cuda.mapped(arr):
231
+ pass
232
+ # Should also work inside a `defer_cleanup` block
233
+ with cuda.defer_cleanup():
234
+ with cuda.mapped(arr):
235
+ pass
236
+ with cuda.mapped(arr):
237
+ pass
238
+ # Should also work when breaking out of the block due to an
239
+ # exception
240
+ try:
241
+ with cuda.mapped(arr):
242
+ raise MappedException
243
+ except MappedException:
244
+ with cuda.mapped(arr):
245
+ pass
246
+
247
+
248
+ if __name__ == '__main__':
249
+ unittest.main()
@@ -0,0 +1,81 @@
1
+ import os
2
+ import sys
3
+ import subprocess
4
+ import threading
5
+ from numba import cuda
6
+ from numba.cuda.testing import (unittest, CUDATestCase, skip_on_cudasim,
7
+ skip_under_cuda_memcheck)
8
+ from numba.tests.support import captured_stdout
9
+
10
+
11
+ class TestCudaDetect(CUDATestCase):
12
+ def test_cuda_detect(self):
13
+ # exercise the code path
14
+ with captured_stdout() as out:
15
+ cuda.detect()
16
+ output = out.getvalue()
17
+ self.assertIn('Found', output)
18
+ self.assertIn('CUDA devices', output)
19
+
20
+
21
+ @skip_under_cuda_memcheck('Hangs cuda-memcheck')
22
+ class TestCUDAFindLibs(CUDATestCase):
23
+
24
+ def run_cmd(self, cmdline, env):
25
+ popen = subprocess.Popen(cmdline,
26
+ stdout=subprocess.PIPE,
27
+ stderr=subprocess.PIPE,
28
+ env=env)
29
+
30
+ # finish in 5 minutes or kill it
31
+ timeout = threading.Timer(5 * 60., popen.kill)
32
+ try:
33
+ timeout.start()
34
+ out, err = popen.communicate()
35
+ # the process should exit with an error
36
+ return out.decode(), err.decode()
37
+ finally:
38
+ timeout.cancel()
39
+ return None, None
40
+
41
+ def run_test_in_separate_process(self, envvar, envvar_value):
42
+ env_copy = os.environ.copy()
43
+ env_copy[envvar] = str(envvar_value)
44
+ code = """if 1:
45
+ from numba import cuda
46
+ @cuda.jit('(int64,)')
47
+ def kernel(x):
48
+ pass
49
+ kernel(1,)
50
+ """
51
+ cmdline = [sys.executable, "-c", code]
52
+ return self.run_cmd(cmdline, env_copy)
53
+
54
+ @skip_on_cudasim('Simulator does not hit device library search code path')
55
+ @unittest.skipIf(not sys.platform.startswith('linux'), "linux only")
56
+ def test_cuda_find_lib_errors(self):
57
+ """
58
+ This tests that the find_libs works as expected in the case of an
59
+ environment variable being used to set the path.
60
+ """
61
+ # one of these is likely to exist on linux, it's also unlikely that
62
+ # someone has extracted the contents of libdevice into here!
63
+ locs = ['lib', 'lib64']
64
+
65
+ looking_for = None
66
+ for l in locs:
67
+ looking_for = os.path.join(os.path.sep, l)
68
+ if os.path.exists(looking_for):
69
+ break
70
+
71
+ # This is the testing part, the test will only run if there's a valid
72
+ # path in which to look
73
+ if looking_for is not None:
74
+ out, err = self.run_test_in_separate_process("NUMBA_CUDA_DRIVER",
75
+ looking_for)
76
+ self.assertTrue(out is not None)
77
+ self.assertTrue(err is not None)
78
+
79
+
80
+ if __name__ == '__main__':
81
+ unittest.main()
@@ -0,0 +1,192 @@
1
+ import ctypes
2
+ import numpy as np
3
+ import weakref
4
+
5
+ from numba import cuda
6
+ from numba.core import config
7
+ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
8
+ from numba.tests.support import linux_only
9
+
10
+ if not config.ENABLE_CUDASIM:
11
+ class DeviceOnlyEMMPlugin(cuda.HostOnlyCUDAMemoryManager):
12
+ """
13
+ Dummy EMM Plugin implementation for testing. It memorises which plugin
14
+ API methods have been called so that the tests can check that Numba
15
+ called into the plugin as expected.
16
+ """
17
+
18
+ def __init__(self, *args, **kwargs):
19
+ super().__init__(*args, **kwargs)
20
+
21
+ # For tracking our dummy allocations
22
+ self.allocations = {}
23
+ self.count = 0
24
+
25
+ # For tracking which methods have been called
26
+ self.initialized = False
27
+ self.memalloc_called = False
28
+ self.reset_called = False
29
+ self.get_memory_info_called = False
30
+ self.get_ipc_handle_called = False
31
+
32
+ def memalloc(self, size):
33
+ # We maintain a list of allocations and keep track of them, so that
34
+ # we can test that the finalizers of objects returned by memalloc
35
+ # get called.
36
+
37
+ # Numba should have initialized the memory manager when preparing
38
+ # the context for use, prior to any memalloc call.
39
+ if not self.initialized:
40
+ raise RuntimeError("memalloc called before initialize")
41
+ self.memalloc_called = True
42
+
43
+ # Create an allocation and record it
44
+ self.count += 1
45
+ alloc_count = self.count
46
+ self.allocations[alloc_count] = size
47
+
48
+ # The finalizer deletes the record from our internal dict of
49
+ # allocations.
50
+ finalizer_allocs = self.allocations
51
+
52
+ def finalizer():
53
+ del finalizer_allocs[alloc_count]
54
+
55
+ # We use an AutoFreePointer so that the finalizer will be run when
56
+ # the reference count drops to zero.
57
+ ctx = weakref.proxy(self.context)
58
+ ptr = ctypes.c_void_p(alloc_count)
59
+ return cuda.cudadrv.driver.AutoFreePointer(ctx, ptr, size,
60
+ finalizer=finalizer)
61
+
62
+ def initialize(self):
63
+ # No special initialization needed.
64
+ self.initialized = True
65
+
66
+ def reset(self):
67
+ # We remove all allocations on reset, just as a real EMM Plugin
68
+ # would do. Note that our finalizers in memalloc don't check
69
+ # whether the allocations are still alive, so running them after
70
+ # reset will detect any allocations that are floating around at
71
+ # exit time; however, the atexit finalizer for weakref will only
72
+ # print a traceback, not terminate the interpreter abnormally.
73
+ self.reset_called = True
74
+
75
+ def get_memory_info(self):
76
+ # Return some dummy memory information
77
+ self.get_memory_info_called = True
78
+ return cuda.MemoryInfo(free=32, total=64)
79
+
80
+ def get_ipc_handle(self, memory):
81
+ # The dummy IPC handle is only a string, so it is important that
82
+ # the tests don't try to do too much with it (e.g. open / close
83
+ # it).
84
+ self.get_ipc_handle_called = True
85
+ return "Dummy IPC handle for alloc %s" % memory.device_pointer.value
86
+
87
+ @property
88
+ def interface_version(self):
89
+ # The expected version for an EMM Plugin.
90
+ return 1
91
+
92
+ class BadVersionEMMPlugin(DeviceOnlyEMMPlugin):
93
+ """A plugin that claims to implement a different interface version"""
94
+
95
+ @property
96
+ def interface_version(self):
97
+ return 2
98
+
99
+
100
+ @skip_on_cudasim('EMM Plugins not supported on CUDA simulator')
101
+ class TestDeviceOnlyEMMPlugin(CUDATestCase):
102
+ """
103
+ Tests that the API of an EMM Plugin that implements device allocations
104
+ only is used correctly by Numba.
105
+ """
106
+
107
+ def setUp(self):
108
+ super().setUp()
109
+ # Always start afresh with a new context and memory manager
110
+ cuda.close()
111
+ cuda.set_memory_manager(DeviceOnlyEMMPlugin)
112
+
113
+ def tearDown(self):
114
+ super().tearDown()
115
+ # Unset the memory manager for subsequent tests
116
+ cuda.close()
117
+ cuda.cudadrv.driver._memory_manager = None
118
+
119
+ def test_memalloc(self):
120
+ mgr = cuda.current_context().memory_manager
121
+
122
+ # Allocate an array and check that memalloc was called with the correct
123
+ # size.
124
+ arr_1 = np.arange(10)
125
+ d_arr_1 = cuda.device_array_like(arr_1)
126
+ self.assertTrue(mgr.memalloc_called)
127
+ self.assertEqual(mgr.count, 1)
128
+ self.assertEqual(mgr.allocations[1], arr_1.nbytes)
129
+
130
+ # Allocate again, with a different size, and check that it is also
131
+ # correct.
132
+ arr_2 = np.arange(5)
133
+ d_arr_2 = cuda.device_array_like(arr_2)
134
+ self.assertEqual(mgr.count, 2)
135
+ self.assertEqual(mgr.allocations[2], arr_2.nbytes)
136
+
137
+ # Remove the first array, and check that our finalizer was called for
138
+ # the first array only.
139
+ del d_arr_1
140
+ self.assertNotIn(1, mgr.allocations)
141
+ self.assertIn(2, mgr.allocations)
142
+
143
+ # Remove the second array and check that its finalizer was also
144
+ # called.
145
+ del d_arr_2
146
+ self.assertNotIn(2, mgr.allocations)
147
+
148
+ def test_initialized_in_context(self):
149
+ # If we have a CUDA context, it should already have initialized its
150
+ # memory manager.
151
+ self.assertTrue(cuda.current_context().memory_manager.initialized)
152
+
153
+ def test_reset(self):
154
+ ctx = cuda.current_context()
155
+ ctx.reset()
156
+ self.assertTrue(ctx.memory_manager.reset_called)
157
+
158
+ def test_get_memory_info(self):
159
+ ctx = cuda.current_context()
160
+ meminfo = ctx.get_memory_info()
161
+ self.assertTrue(ctx.memory_manager.get_memory_info_called)
162
+ self.assertEqual(meminfo.free, 32)
163
+ self.assertEqual(meminfo.total, 64)
164
+
165
+ @linux_only
166
+ def test_get_ipc_handle(self):
167
+ # We don't attempt to close the IPC handle in this test because Numba
168
+ # will be expecting a real IpcHandle object to have been returned from
169
+ # get_ipc_handle, and it would cause problems to do so.
170
+ arr = np.arange(2)
171
+ d_arr = cuda.device_array_like(arr)
172
+ ipch = d_arr.get_ipc_handle()
173
+ ctx = cuda.current_context()
174
+ self.assertTrue(ctx.memory_manager.get_ipc_handle_called)
175
+ self.assertIn("Dummy IPC handle for alloc 1", ipch._ipc_handle)
176
+
177
+
178
+ @skip_on_cudasim('EMM Plugins not supported on CUDA simulator')
179
+ class TestBadEMMPluginVersion(CUDATestCase):
180
+ """
181
+ Ensure that Numba rejects EMM Plugins with incompatible version
182
+ numbers.
183
+ """
184
+
185
+ def test_bad_plugin_version(self):
186
+ with self.assertRaises(RuntimeError) as raises:
187
+ cuda.set_memory_manager(BadVersionEMMPlugin)
188
+ self.assertIn('version 1 required', str(raises.exception))
189
+
190
+
191
+ if __name__ == '__main__':
192
+ unittest.main()
@@ -0,0 +1,38 @@
1
+ import numpy as np
2
+ from numba import cuda
3
+ from numba.cuda.testing import unittest, CUDATestCase
4
+
5
+
6
+ class TestCudaEvent(CUDATestCase):
7
+ def test_event_elapsed(self):
8
+ N = 32
9
+ dary = cuda.device_array(N, dtype=np.double)
10
+ evtstart = cuda.event()
11
+ evtend = cuda.event()
12
+
13
+ evtstart.record()
14
+ cuda.to_device(np.arange(N, dtype=np.double), to=dary)
15
+ evtend.record()
16
+ evtend.wait()
17
+ evtend.synchronize()
18
+ # Exercise the code path
19
+ evtstart.elapsed_time(evtend)
20
+
21
+ def test_event_elapsed_stream(self):
22
+ N = 32
23
+ stream = cuda.stream()
24
+ dary = cuda.device_array(N, dtype=np.double)
25
+ evtstart = cuda.event()
26
+ evtend = cuda.event()
27
+
28
+ evtstart.record(stream=stream)
29
+ cuda.to_device(np.arange(N, dtype=np.double), to=dary, stream=stream)
30
+ evtend.record(stream=stream)
31
+ evtend.wait(stream=stream)
32
+ evtend.synchronize()
33
+ # Exercise the code path
34
+ evtstart.elapsed_time(evtend)
35
+
36
+
37
+ if __name__ == '__main__':
38
+ unittest.main()
@@ -0,0 +1,65 @@
1
+ import numpy as np
2
+ from numba.cuda.cudadrv import driver
3
+ from numba import cuda
4
+ from numba.cuda.testing import unittest, ContextResettingTestCase
5
+
6
+
7
+ class TestHostAlloc(ContextResettingTestCase):
8
+ def test_host_alloc_driver(self):
9
+ n = 32
10
+ mem = cuda.current_context().memhostalloc(n, mapped=True)
11
+
12
+ dtype = np.dtype(np.uint8)
13
+ ary = np.ndarray(shape=n // dtype.itemsize, dtype=dtype,
14
+ buffer=mem)
15
+
16
+ magic = 0xab
17
+ driver.device_memset(mem, magic, n)
18
+
19
+ self.assertTrue(np.all(ary == magic))
20
+
21
+ ary.fill(n)
22
+
23
+ recv = np.empty_like(ary)
24
+
25
+ driver.device_to_host(recv, mem, ary.size)
26
+
27
+ self.assertTrue(np.all(ary == recv))
28
+ self.assertTrue(np.all(recv == n))
29
+
30
+ def test_host_alloc_pinned(self):
31
+ ary = cuda.pinned_array(10, dtype=np.uint32)
32
+ ary.fill(123)
33
+ self.assertTrue(all(ary == 123))
34
+ devary = cuda.to_device(ary)
35
+ driver.device_memset(devary, 0, driver.device_memory_size(devary))
36
+ self.assertTrue(all(ary == 123))
37
+ devary.copy_to_host(ary)
38
+ self.assertTrue(all(ary == 0))
39
+
40
+ def test_host_alloc_mapped(self):
41
+ ary = cuda.mapped_array(10, dtype=np.uint32)
42
+ ary.fill(123)
43
+ self.assertTrue(all(ary == 123))
44
+ driver.device_memset(ary, 0, driver.device_memory_size(ary))
45
+ self.assertTrue(all(ary == 0))
46
+ self.assertTrue(sum(ary != 0) == 0)
47
+
48
+ def test_host_operators(self):
49
+ for ary in [cuda.mapped_array(10, dtype=np.uint32),
50
+ cuda.pinned_array(10, dtype=np.uint32)]:
51
+ ary[:] = range(10)
52
+ self.assertTrue(sum(ary + 1) == 55)
53
+ self.assertTrue(sum((ary + 1) * 2 - 1) == 100)
54
+ self.assertTrue(sum(ary < 5) == 5)
55
+ self.assertTrue(sum(ary <= 5) == 6)
56
+ self.assertTrue(sum(ary > 6) == 3)
57
+ self.assertTrue(sum(ary >= 6) == 4)
58
+ self.assertTrue(sum(ary ** 2) == 285)
59
+ self.assertTrue(sum(ary // 2) == 20)
60
+ self.assertTrue(sum(ary / 2.0) == 22.5)
61
+ self.assertTrue(sum(ary % 2) == 5)
62
+
63
+
64
+ if __name__ == '__main__':
65
+ unittest.main()