numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,94 @@
1
+ import numpy as np
2
+
3
+ from numba import cuda, vectorize, guvectorize
4
+ from numba.np.numpy_support import from_dtype
5
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
6
+ import unittest
7
+
8
+
9
+ class TestCudaDateTime(CUDATestCase):
10
+ def test_basic_datetime_kernel(self):
11
+ @cuda.jit
12
+ def foo(start, end, delta):
13
+ for i in range(cuda.grid(1), delta.size, cuda.gridsize(1)):
14
+ delta[i] = end[i] - start[i]
15
+
16
+ arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
17
+ arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
18
+ delta = np.zeros_like(arr1, dtype='timedelta64[D]')
19
+
20
+ foo[1, 32](arr1, arr2, delta)
21
+
22
+ self.assertPreciseEqual(delta, arr2 - arr1)
23
+
24
+ def test_scalar_datetime_kernel(self):
25
+ @cuda.jit
26
+ def foo(dates, target, delta, matches, outdelta):
27
+ for i in range(cuda.grid(1), matches.size, cuda.gridsize(1)):
28
+ matches[i] = dates[i] == target
29
+ outdelta[i] = dates[i] - delta
30
+ arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
31
+ target = arr1[5] # datetime
32
+ delta = arr1[6] - arr1[5] # timedelta
33
+ matches = np.zeros_like(arr1, dtype=np.bool_)
34
+ outdelta = np.zeros_like(arr1, dtype='datetime64[D]')
35
+
36
+ foo[1, 32](arr1, target, delta, matches, outdelta)
37
+ where = matches.nonzero()
38
+
39
+ self.assertEqual(list(where), [5])
40
+ self.assertPreciseEqual(outdelta, arr1 - delta)
41
+
42
+ @skip_on_cudasim('ufunc API unsupported in the simulator')
43
+ def test_ufunc(self):
44
+ datetime_t = from_dtype(np.dtype('datetime64[D]'))
45
+
46
+ @vectorize([(datetime_t, datetime_t)], target='cuda')
47
+ def timediff(start, end):
48
+ return end - start
49
+
50
+ arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
51
+ arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
52
+
53
+ delta = timediff(arr1, arr2)
54
+
55
+ self.assertPreciseEqual(delta, arr2 - arr1)
56
+
57
+ @skip_on_cudasim('ufunc API unsupported in the simulator')
58
+ def test_gufunc(self):
59
+ datetime_t = from_dtype(np.dtype('datetime64[D]'))
60
+ timedelta_t = from_dtype(np.dtype('timedelta64[D]'))
61
+
62
+ @guvectorize([(datetime_t, datetime_t, timedelta_t[:])], '(),()->()',
63
+ target='cuda')
64
+ def timediff(start, end, out):
65
+ out[0] = end - start
66
+
67
+ arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
68
+ arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
69
+
70
+ delta = timediff(arr1, arr2)
71
+
72
+ self.assertPreciseEqual(delta, arr2 - arr1)
73
+
74
+ @skip_on_cudasim('no .copy_to_host() in the simulator')
75
+ def test_datetime_view_as_int64(self):
76
+ arr = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
77
+ darr = cuda.to_device(arr)
78
+ viewed = darr.view(np.int64)
79
+ self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
80
+ self.assertEqual(viewed.gpu_data, darr.gpu_data)
81
+
82
+ @skip_on_cudasim('no .copy_to_host() in the simulator')
83
+ def test_timedelta_view_as_int64(self):
84
+ arr = np.arange('2005-02', '2006-02', dtype='datetime64[D]')
85
+ arr = arr - (arr - 1)
86
+ self.assertEqual(arr.dtype, np.dtype('timedelta64[D]'))
87
+ darr = cuda.to_device(arr)
88
+ viewed = darr.view(np.int64)
89
+ self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
90
+ self.assertEqual(viewed.gpu_data, darr.gpu_data)
91
+
92
+
93
+ if __name__ == '__main__':
94
+ unittest.main()
@@ -0,0 +1,101 @@
1
+ import numpy as np
2
+
3
+ from numba.core.utils import PYVERSION
4
+ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
5
+ from numba.tests.support import (override_config, captured_stderr,
6
+ captured_stdout)
7
+ from numba import cuda, float64
8
+ import unittest
9
+
10
+
11
+ def simple_cuda(A, B):
12
+ i = cuda.grid(1)
13
+ B[i] = A[i] + 1.5
14
+
15
+
16
+ @skip_on_cudasim('Simulator does not produce debug dumps')
17
+ class TestDebugOutput(CUDATestCase):
18
+
19
+ def compile_simple_cuda(self):
20
+ with captured_stderr() as err:
21
+ with captured_stdout() as out:
22
+ cfunc = cuda.jit((float64[:], float64[:]))(simple_cuda)
23
+ # Call compiled function (to ensure PTX is generated)
24
+ # and sanity-check results.
25
+ A = np.linspace(0, 1, 10).astype(np.float64)
26
+ B = np.zeros_like(A)
27
+ cfunc[1, 10](A, B)
28
+ self.assertTrue(np.allclose(A + 1.5, B))
29
+ # stderr shouldn't be affected by debug output
30
+ self.assertFalse(err.getvalue())
31
+ return out.getvalue()
32
+
33
+ def assert_fails(self, *args, **kwargs):
34
+ self.assertRaises(AssertionError, *args, **kwargs)
35
+
36
+ def check_debug_output(self, out, enabled_dumps):
37
+ all_dumps = dict.fromkeys(['bytecode', 'cfg', 'ir', 'llvm',
38
+ 'assembly'],
39
+ False)
40
+ for name in enabled_dumps:
41
+ assert name in all_dumps
42
+ all_dumps[name] = True
43
+ for name, enabled in sorted(all_dumps.items()):
44
+ check_meth = getattr(self, '_check_dump_%s' % name)
45
+ if enabled:
46
+ check_meth(out)
47
+ else:
48
+ self.assertRaises(AssertionError, check_meth, out)
49
+
50
+ def _check_dump_bytecode(self, out):
51
+ if PYVERSION in ((3, 11), (3, 12)):
52
+ # binop with arg=0 is binary add, see CPython dis.py and opcode.py
53
+ self.assertIn('BINARY_OP(arg=0', out)
54
+ elif PYVERSION in ((3, 9), (3, 10)):
55
+ self.assertIn('BINARY_ADD', out)
56
+ else:
57
+ raise NotImplementedError(PYVERSION)
58
+
59
+ def _check_dump_cfg(self, out):
60
+ self.assertIn('CFG dominators', out)
61
+
62
+ def _check_dump_ir(self, out):
63
+ self.assertIn('--IR DUMP: simple_cuda--', out)
64
+ self.assertIn('const(float, 1.5)', out)
65
+
66
+ def _check_dump_llvm(self, out):
67
+ self.assertIn('--LLVM DUMP', out)
68
+ self.assertIn('!"kernel", i32 1', out)
69
+
70
+ def _check_dump_assembly(self, out):
71
+ self.assertIn('--ASSEMBLY simple_cuda', out)
72
+ self.assertIn('Generated by NVIDIA NVVM Compiler', out)
73
+
74
+ def test_dump_bytecode(self):
75
+ with override_config('DUMP_BYTECODE', True):
76
+ out = self.compile_simple_cuda()
77
+ self.check_debug_output(out, ['bytecode'])
78
+
79
+ def test_dump_ir(self):
80
+ with override_config('DUMP_IR', True):
81
+ out = self.compile_simple_cuda()
82
+ self.check_debug_output(out, ['ir'])
83
+
84
+ def test_dump_cfg(self):
85
+ with override_config('DUMP_CFG', True):
86
+ out = self.compile_simple_cuda()
87
+ self.check_debug_output(out, ['cfg'])
88
+
89
+ def test_dump_llvm(self):
90
+ with override_config('DUMP_LLVM', True):
91
+ out = self.compile_simple_cuda()
92
+ self.check_debug_output(out, ['llvm'])
93
+
94
+ def test_dump_assembly(self):
95
+ with override_config('DUMP_ASSEMBLY', True):
96
+ out = self.compile_simple_cuda()
97
+ self.check_debug_output(out, ['assembly'])
98
+
99
+
100
+ if __name__ == '__main__':
101
+ unittest.main()
@@ -0,0 +1,221 @@
1
+ from numba.tests.support import override_config
2
+ from numba.cuda.testing import skip_on_cudasim
3
+ from numba import cuda
4
+ from numba.core import types
5
+ from numba.cuda.testing import CUDATestCase
6
+ import itertools
7
+ import re
8
+ import unittest
9
+
10
+
11
+ @skip_on_cudasim('Simulator does not produce debug dumps')
12
+ class TestCudaDebugInfo(CUDATestCase):
13
+ """
14
+ These tests only checks the compiled PTX for debuginfo section
15
+ """
16
+
17
+ def setUp(self):
18
+ super().setUp()
19
+ # If we're using LTO then we can't check the PTX in these tests,
20
+ # because we produce LTO-IR, which is opaque to the user.
21
+ # Additionally, LTO optimizes away the exception status due to an
22
+ # oversight in the way we generate it (it is not added to the used
23
+ # list).
24
+ self.skip_if_lto("Exceptions not supported with LTO")
25
+
26
+ def _getasm(self, fn, sig):
27
+ fn.compile(sig)
28
+ return fn.inspect_asm(sig)
29
+
30
+ def _check(self, fn, sig, expect):
31
+ asm = self._getasm(fn, sig=sig)
32
+ re_section_dbginfo = re.compile(r"\.section\s+\.debug_info\s+{")
33
+ match = re_section_dbginfo.search(asm)
34
+ assertfn = self.assertIsNotNone if expect else self.assertIsNone
35
+ assertfn(match, msg=asm)
36
+
37
+ def test_no_debuginfo_in_asm(self):
38
+ @cuda.jit(debug=False)
39
+ def foo(x):
40
+ x[0] = 1
41
+
42
+ self._check(foo, sig=(types.int32[:],), expect=False)
43
+
44
+ def test_debuginfo_in_asm(self):
45
+ @cuda.jit(debug=True, opt=False)
46
+ def foo(x):
47
+ x[0] = 1
48
+
49
+ self._check(foo, sig=(types.int32[:],), expect=True)
50
+
51
+ def test_environment_override(self):
52
+ with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
53
+ # Using default value
54
+ @cuda.jit(opt=False)
55
+ def foo(x):
56
+ x[0] = 1
57
+
58
+ self._check(foo, sig=(types.int32[:],), expect=True)
59
+
60
+ # User override default value
61
+ @cuda.jit(debug=False)
62
+ def bar(x):
63
+ x[0] = 1
64
+
65
+ self._check(bar, sig=(types.int32[:],), expect=False)
66
+
67
+ def test_issue_5835(self):
68
+ # Invalid debug metadata would segfault NVVM when any function was
69
+ # compiled with debug turned on and optimization off. This eager
70
+ # compilation should not crash anything.
71
+ @cuda.jit((types.int32[::1],), debug=True, opt=False)
72
+ def f(x):
73
+ x[0] = 0
74
+
75
+ def test_wrapper_has_debuginfo(self):
76
+ sig = (types.int32[::1],)
77
+
78
+ @cuda.jit(sig, debug=True, opt=0)
79
+ def f(x):
80
+ x[0] = 1
81
+
82
+ llvm_ir = f.inspect_llvm(sig)
83
+
84
+ defines = [line for line in llvm_ir.splitlines()
85
+ if 'define void @"_ZN6cudapy' in line]
86
+
87
+ # Make sure we only found one definition
88
+ self.assertEqual(len(defines), 1)
89
+
90
+ wrapper_define = defines[0]
91
+ self.assertIn('!dbg', wrapper_define)
92
+
93
+ def test_debug_function_calls_internal_impl(self):
94
+ # Calling a function in a module generated from an implementation
95
+ # internal to Numba requires multiple modules to be compiled with NVVM -
96
+ # the internal implementation, and the caller. This example uses two
97
+ # modules because the `in (2, 3)` is implemented with:
98
+ #
99
+ # numba::cpython::listobj::in_seq::$3clocals$3e::seq_contains_impl$242(
100
+ # UniTuple<long long, 2>,
101
+ # int
102
+ # )
103
+ #
104
+ # This is condensed from this reproducer in Issue 5311:
105
+ # https://github.com/numba/numba/issues/5311#issuecomment-674206587
106
+
107
+ @cuda.jit((types.int32[:], types.int32[:]), debug=True, opt=False)
108
+ def f(inp, outp):
109
+ outp[0] = 1 if inp[0] in (2, 3) else 3
110
+
111
+ def test_debug_function_calls_device_function(self):
112
+ # Calling a device function requires compilation of multiple modules
113
+ # with NVVM - one for the caller and one for the callee. This checks
114
+ # that we don't cause an NVVM error in this case.
115
+
116
+ @cuda.jit(device=True, debug=True, opt=0)
117
+ def threadid():
118
+ return cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
119
+
120
+ @cuda.jit((types.int32[:],), debug=True, opt=0)
121
+ def kernel(arr):
122
+ i = cuda.grid(1)
123
+ if i < len(arr):
124
+ arr[i] = threadid()
125
+
126
+ def _test_chained_device_function(self, kernel_debug, f1_debug, f2_debug):
127
+ @cuda.jit(device=True, debug=f2_debug, opt=False)
128
+ def f2(x):
129
+ return x + 1
130
+
131
+ @cuda.jit(device=True, debug=f1_debug, opt=False)
132
+ def f1(x, y):
133
+ return x - f2(y)
134
+
135
+ @cuda.jit((types.int32, types.int32), debug=kernel_debug, opt=False)
136
+ def kernel(x, y):
137
+ f1(x, y)
138
+
139
+ kernel[1, 1](1, 2)
140
+
141
+ def test_chained_device_function(self):
142
+ # Calling a device function that calls another device function from a
143
+ # kernel with should succeed regardless of which jit decorators have
144
+ # debug=True. See Issue #7159.
145
+
146
+ debug_opts = itertools.product(*[(True, False)] * 3)
147
+
148
+ for kernel_debug, f1_debug, f2_debug in debug_opts:
149
+ with self.subTest(kernel_debug=kernel_debug,
150
+ f1_debug=f1_debug,
151
+ f2_debug=f2_debug):
152
+ self._test_chained_device_function(kernel_debug,
153
+ f1_debug,
154
+ f2_debug)
155
+
156
+ def _test_chained_device_function_two_calls(self, kernel_debug, f1_debug,
157
+ f2_debug):
158
+
159
+ @cuda.jit(device=True, debug=f2_debug, opt=False)
160
+ def f2(x):
161
+ return x + 1
162
+
163
+ @cuda.jit(device=True, debug=f1_debug, opt=False)
164
+ def f1(x, y):
165
+ return x - f2(y)
166
+
167
+ @cuda.jit(debug=kernel_debug, opt=False)
168
+ def kernel(x, y):
169
+ f1(x, y)
170
+ f2(x)
171
+
172
+ kernel[1, 1](1, 2)
173
+
174
+ def test_chained_device_function_two_calls(self):
175
+ # Calling a device function that calls a leaf device function from a
176
+ # kernel, and calling the leaf device function from the kernel should
177
+ # succeed, regardless of which jit decorators have debug=True. See
178
+ # Issue #7159.
179
+
180
+ debug_opts = itertools.product(*[(True, False)] * 3)
181
+
182
+ for kernel_debug, f1_debug, f2_debug in debug_opts:
183
+ with self.subTest(kernel_debug=kernel_debug,
184
+ f1_debug=f1_debug,
185
+ f2_debug=f2_debug):
186
+ self._test_chained_device_function_two_calls(kernel_debug,
187
+ f1_debug,
188
+ f2_debug)
189
+
190
+ def test_chained_device_three_functions(self):
191
+ # Like test_chained_device_function, but with enough functions (three)
192
+ # to ensure that the recursion visits all the way down the call tree
193
+ # when fixing linkage of functions for debug.
194
+ def three_device_fns(kernel_debug, leaf_debug):
195
+ @cuda.jit(device=True, debug=leaf_debug, opt=False)
196
+ def f3(x):
197
+ return x * x
198
+
199
+ @cuda.jit(device=True)
200
+ def f2(x):
201
+ return f3(x) + 1
202
+
203
+ @cuda.jit(device=True)
204
+ def f1(x, y):
205
+ return x - f2(y)
206
+
207
+ @cuda.jit(debug=kernel_debug, opt=False)
208
+ def kernel(x, y):
209
+ f1(x, y)
210
+
211
+ kernel[1, 1](1, 2)
212
+
213
+ # Check when debug on the kernel, on the leaf, and not on any function.
214
+ three_device_fns(kernel_debug=True, leaf_debug=True)
215
+ three_device_fns(kernel_debug=True, leaf_debug=False)
216
+ three_device_fns(kernel_debug=False, leaf_debug=True)
217
+ three_device_fns(kernel_debug=False, leaf_debug=False)
218
+
219
+
220
+ if __name__ == '__main__':
221
+ unittest.main()
@@ -0,0 +1,222 @@
1
+ import re
2
+ import types
3
+
4
+ import numpy as np
5
+
6
+ from numba.cuda.testing import unittest, skip_on_cudasim, CUDATestCase
7
+ from numba import cuda, jit, float32, int32
8
+ from numba.core.errors import TypingError
9
+
10
+
11
+ class TestDeviceFunc(CUDATestCase):
12
+
13
+ def test_use_add2f(self):
14
+
15
+ @cuda.jit("float32(float32, float32)", device=True)
16
+ def add2f(a, b):
17
+ return a + b
18
+
19
+ def use_add2f(ary):
20
+ i = cuda.grid(1)
21
+ ary[i] = add2f(ary[i], ary[i])
22
+
23
+ compiled = cuda.jit("void(float32[:])")(use_add2f)
24
+
25
+ nelem = 10
26
+ ary = np.arange(nelem, dtype=np.float32)
27
+ exp = ary + ary
28
+ compiled[1, nelem](ary)
29
+
30
+ self.assertTrue(np.all(ary == exp), (ary, exp))
31
+
32
+ def test_indirect_add2f(self):
33
+
34
+ @cuda.jit("float32(float32, float32)", device=True)
35
+ def add2f(a, b):
36
+ return a + b
37
+
38
+ @cuda.jit("float32(float32, float32)", device=True)
39
+ def indirect(a, b):
40
+ return add2f(a, b)
41
+
42
+ def indirect_add2f(ary):
43
+ i = cuda.grid(1)
44
+ ary[i] = indirect(ary[i], ary[i])
45
+
46
+ compiled = cuda.jit("void(float32[:])")(indirect_add2f)
47
+
48
+ nelem = 10
49
+ ary = np.arange(nelem, dtype=np.float32)
50
+ exp = ary + ary
51
+ compiled[1, nelem](ary)
52
+
53
+ self.assertTrue(np.all(ary == exp), (ary, exp))
54
+
55
+ def _check_cpu_dispatcher(self, add):
56
+ @cuda.jit
57
+ def add_kernel(ary):
58
+ i = cuda.grid(1)
59
+ ary[i] = add(ary[i], 1)
60
+
61
+ ary = np.arange(10)
62
+ expect = ary + 1
63
+ add_kernel[1, ary.size](ary)
64
+ np.testing.assert_equal(expect, ary)
65
+
66
+ def test_cpu_dispatcher(self):
67
+ # Test correct usage
68
+ @jit
69
+ def add(a, b):
70
+ return a + b
71
+
72
+ self._check_cpu_dispatcher(add)
73
+
74
+ @skip_on_cudasim('not supported in cudasim')
75
+ def test_cpu_dispatcher_invalid(self):
76
+ # Test invalid usage
77
+ # Explicit signature disables compilation, which also disable
78
+ # compiling on CUDA.
79
+ @jit('(i4, i4)')
80
+ def add(a, b):
81
+ return a + b
82
+
83
+ # Check that the right error message is provided.
84
+ with self.assertRaises(TypingError) as raises:
85
+ self._check_cpu_dispatcher(add)
86
+ msg = "Untyped global name 'add':.*using cpu function on device"
87
+ expected = re.compile(msg)
88
+ self.assertTrue(expected.search(str(raises.exception)) is not None)
89
+
90
+ def test_cpu_dispatcher_other_module(self):
91
+ @jit
92
+ def add(a, b):
93
+ return a + b
94
+
95
+ mymod = types.ModuleType(name='mymod')
96
+ mymod.add = add
97
+ del add
98
+
99
+ @cuda.jit
100
+ def add_kernel(ary):
101
+ i = cuda.grid(1)
102
+ ary[i] = mymod.add(ary[i], 1)
103
+
104
+ ary = np.arange(10)
105
+ expect = ary + 1
106
+ add_kernel[1, ary.size](ary)
107
+ np.testing.assert_equal(expect, ary)
108
+
109
+ @skip_on_cudasim('not supported in cudasim')
110
+ def test_inspect_llvm(self):
111
+ @cuda.jit(device=True)
112
+ def foo(x, y):
113
+ return x + y
114
+
115
+ args = (int32, int32)
116
+ cres = foo.compile_device(args)
117
+
118
+ fname = cres.fndesc.mangled_name
119
+ # Verify that the function name has "foo" in it as in the python name
120
+ self.assertIn('foo', fname)
121
+
122
+ llvm = foo.inspect_llvm(args)
123
+ # Check that the compiled function name is in the LLVM.
124
+ self.assertIn(fname, llvm)
125
+
126
+ @skip_on_cudasim('not supported in cudasim')
127
+ def test_inspect_asm(self):
128
+ @cuda.jit(device=True)
129
+ def foo(x, y):
130
+ return x + y
131
+
132
+ args = (int32, int32)
133
+ cres = foo.compile_device(args)
134
+
135
+ fname = cres.fndesc.mangled_name
136
+ # Verify that the function name has "foo" in it as in the python name
137
+ self.assertIn('foo', fname)
138
+
139
+ ptx = foo.inspect_asm(args)
140
+ # Check that the compiled function name is in the PTX
141
+ self.assertIn(fname, ptx)
142
+
143
+ @skip_on_cudasim('not supported in cudasim')
144
+ def test_inspect_sass_disallowed(self):
145
+ @cuda.jit(device=True)
146
+ def foo(x, y):
147
+ return x + y
148
+
149
+ with self.assertRaises(RuntimeError) as raises:
150
+ foo.inspect_sass((int32, int32))
151
+
152
+ self.assertIn('Cannot inspect SASS of a device function',
153
+ str(raises.exception))
154
+
155
+ @skip_on_cudasim('cudasim will allow calling any function')
156
+ def test_device_func_as_kernel_disallowed(self):
157
+ @cuda.jit(device=True)
158
+ def f():
159
+ pass
160
+
161
+ with self.assertRaises(RuntimeError) as raises:
162
+ f[1, 1]()
163
+
164
+ self.assertIn('Cannot compile a device function as a kernel',
165
+ str(raises.exception))
166
+
167
+ @skip_on_cudasim('cudasim ignores casting by jit decorator signature')
168
+ def test_device_casting(self):
169
+ # Ensure that casts to the correct type are forced when calling a
170
+ # device function with a signature. This test ensures that:
171
+ #
172
+ # - We don't compile a new specialization of rgba for float32 when we
173
+ # shouldn't
174
+ # - We insert a cast when calling rgba, as opposed to failing to type.
175
+
176
+ @cuda.jit('int32(int32, int32, int32, int32)', device=True)
177
+ def rgba(r, g, b, a):
178
+ return (((r & 0xFF) << 16) |
179
+ ((g & 0xFF) << 8) |
180
+ ((b & 0xFF) << 0) |
181
+ ((a & 0xFF) << 24))
182
+
183
+ @cuda.jit
184
+ def rgba_caller(x, channels):
185
+ x[0] = rgba(channels[0], channels[1], channels[2], channels[3])
186
+
187
+ x = cuda.device_array(1, dtype=np.int32)
188
+ channels = cuda.to_device(np.asarray([1.0, 2.0, 3.0, 4.0],
189
+ dtype=np.float32))
190
+
191
+ rgba_caller[1, 1](x, channels)
192
+
193
+ self.assertEqual(0x04010203, x[0])
194
+
195
+ def _test_declare_device(self, decl):
196
+ self.assertEqual(decl.name, 'f1')
197
+ self.assertEqual(decl.sig.args, (float32[:],))
198
+ self.assertEqual(decl.sig.return_type, int32)
199
+
200
+ @skip_on_cudasim('cudasim does not check signatures')
201
+ def test_declare_device_signature(self):
202
+ f1 = cuda.declare_device('f1', int32(float32[:]))
203
+ self._test_declare_device(f1)
204
+
205
+ @skip_on_cudasim('cudasim does not check signatures')
206
+ def test_declare_device_string(self):
207
+ f1 = cuda.declare_device('f1', 'int32(float32[:])')
208
+ self._test_declare_device(f1)
209
+
210
+ @skip_on_cudasim('cudasim does not check signatures')
211
+ def test_bad_declare_device_tuple(self):
212
+ with self.assertRaisesRegex(TypeError, 'Return type'):
213
+ cuda.declare_device('f1', (float32[:],))
214
+
215
+ @skip_on_cudasim('cudasim does not check signatures')
216
+ def test_bad_declare_device_string(self):
217
+ with self.assertRaisesRegex(TypeError, 'Return type'):
218
+ cuda.declare_device('f1', '(float32[:],)')
219
+
220
+
221
+ if __name__ == '__main__':
222
+ unittest.main()