numba-cuda 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.13.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.13.dist-info/METADATA +69 -0
  229. numba_cuda-0.0.13.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,8 @@
1
+ from numba.cuda.testing import ensure_supported_ccs_initialized
2
+ from numba.cuda.tests import load_testsuite
3
+ import os
4
+
5
+
6
+ def load_tests(loader, tests, pattern):
7
+ ensure_supported_ccs_initialized()
8
+ return load_testsuite(loader, os.path.dirname(__file__))
@@ -0,0 +1,234 @@
1
+ from numba import cuda
2
+ from numba.cuda.testing import CUDATestCase
3
+ import numpy as np
4
+ import sys
5
+
6
+
7
+ class UseCase:
8
+ """
9
+ Provide a way to call a kernel as if it were a function.
10
+
11
+ This allows the CUDA cache tests to closely match the CPU cache tests, and
12
+ also to support calling cache use cases as njitted functions. The class
13
+ wraps a function that takes an array for the return value and arguments,
14
+ and provides an interface that accepts arguments, launches the kernel
15
+ appropriately, and returns the stored return value.
16
+
17
+ The return type is inferred from the type of the first argument, unless it
18
+ is explicitly overridden by the ``retty`` kwarg.
19
+ """
20
+ def __init__(self, func, retty=None):
21
+ self._func = func
22
+ self._retty = retty
23
+
24
+ def __call__(self, *args):
25
+ array_args = [np.asarray(arg) for arg in args]
26
+ if self._retty:
27
+ array_return = np.ndarray((), dtype=self._retty)
28
+ else:
29
+ array_return = np.zeros_like(array_args[0])
30
+
31
+ self._call(array_return, *array_args)
32
+ return array_return[()]
33
+
34
+ @property
35
+ def func(self):
36
+ return self._func
37
+
38
+
39
+ class CUDAUseCase(UseCase):
40
+ def _call(self, ret, *args):
41
+ self._func[1, 1](ret, *args)
42
+
43
+
44
+ @cuda.jit(cache=True)
45
+ def add_usecase_kernel(r, x, y):
46
+ r[()] = x[()] + y[()] + Z
47
+
48
+
49
+ @cuda.jit(cache=False)
50
+ def add_nocache_usecase_kernel(r, x, y):
51
+ r[()] = x[()] + y[()] + Z
52
+
53
+
54
+ add_usecase = CUDAUseCase(add_usecase_kernel)
55
+ add_nocache_usecase = CUDAUseCase(add_nocache_usecase_kernel)
56
+
57
+ Z = 1
58
+
59
+
60
+ # Inner / outer cached / uncached cases
61
+
62
+ @cuda.jit(cache=True)
63
+ def inner(x, y):
64
+ return x + y + Z
65
+
66
+
67
+ @cuda.jit(cache=True)
68
+ def outer_kernel(r, x, y):
69
+ r[()] = inner(-y[()], x[()])
70
+
71
+
72
+ @cuda.jit(cache=False)
73
+ def outer_uncached_kernel(r, x, y):
74
+ r[()] = inner(-y[()], x[()])
75
+
76
+
77
+ outer = CUDAUseCase(outer_kernel)
78
+ outer_uncached = CUDAUseCase(outer_uncached_kernel)
79
+
80
+
81
+ # Exercise returning a record instance. This used to hardcode the dtype
82
+ # pointer's value in the bitcode.
83
+
84
+ packed_record_type = np.dtype([('a', np.int8), ('b', np.float64)])
85
+ aligned_record_type = np.dtype([('a', np.int8), ('b', np.float64)], align=True)
86
+
87
+ packed_arr = np.empty(2, dtype=packed_record_type)
88
+ for i in range(packed_arr.size):
89
+ packed_arr[i]['a'] = i + 1
90
+ packed_arr[i]['b'] = i + 42.5
91
+
92
+ aligned_arr = np.array(packed_arr, dtype=aligned_record_type)
93
+
94
+
95
+ @cuda.jit(cache=True)
96
+ def record_return(r, ary, i):
97
+ r[()] = ary[i]
98
+
99
+
100
+ record_return_packed = CUDAUseCase(record_return, retty=packed_record_type)
101
+ record_return_aligned = CUDAUseCase(record_return, retty=aligned_record_type)
102
+
103
+
104
+ # Closure test cases
105
+
106
+ def make_closure(x):
107
+ @cuda.jit(cache=True)
108
+ def closure(r, y):
109
+ r[()] = x + y[()]
110
+
111
+ return CUDAUseCase(closure)
112
+
113
+
114
+ closure1 = make_closure(3)
115
+ closure2 = make_closure(5)
116
+ closure3 = make_closure(7)
117
+ closure4 = make_closure(9)
118
+
119
+
120
+ # Ambiguous / renamed functions
121
+
122
+ @cuda.jit(cache=True)
123
+ def ambiguous_function(r, x):
124
+ r[()] = x[()] + 2
125
+
126
+
127
+ renamed_function1 = CUDAUseCase(ambiguous_function)
128
+
129
+
130
+ @cuda.jit(cache=True)
131
+ def ambiguous_function(r, x):
132
+ r[()] = x[()] + 6
133
+
134
+
135
+ renamed_function2 = CUDAUseCase(ambiguous_function)
136
+
137
+
138
+ @cuda.jit(cache=True)
139
+ def many_locals():
140
+ aa = cuda.local.array((1, 1), np.float64)
141
+ ab = cuda.local.array((1, 1), np.float64)
142
+ ac = cuda.local.array((1, 1), np.float64)
143
+ ad = cuda.local.array((1, 1), np.float64)
144
+ ae = cuda.local.array((1, 1), np.float64)
145
+ af = cuda.local.array((1, 1), np.float64)
146
+ ag = cuda.local.array((1, 1), np.float64)
147
+ ah = cuda.local.array((1, 1), np.float64)
148
+ ai = cuda.local.array((1, 1), np.float64)
149
+ aj = cuda.local.array((1, 1), np.float64)
150
+ ak = cuda.local.array((1, 1), np.float64)
151
+ al = cuda.local.array((1, 1), np.float64)
152
+ am = cuda.local.array((1, 1), np.float64)
153
+ an = cuda.local.array((1, 1), np.float64)
154
+ ao = cuda.local.array((1, 1), np.float64)
155
+ ap = cuda.local.array((1, 1), np.float64)
156
+ ar = cuda.local.array((1, 1), np.float64)
157
+ at = cuda.local.array((1, 1), np.float64)
158
+ au = cuda.local.array((1, 1), np.float64)
159
+ av = cuda.local.array((1, 1), np.float64)
160
+ aw = cuda.local.array((1, 1), np.float64)
161
+ ax = cuda.local.array((1, 1), np.float64)
162
+ ay = cuda.local.array((1, 1), np.float64)
163
+ az = cuda.local.array((1, 1), np.float64)
164
+
165
+ aa[:] = 0
166
+ ab[:] = 0
167
+ ac[:] = 0
168
+ ad[:] = 0
169
+ ae[:] = 0
170
+ af[:] = 0
171
+ ag[:] = 0
172
+ ah[:] = 0
173
+ ai[:] = 0
174
+ aj[:] = 0
175
+ ak[:] = 0
176
+ al[:] = 0
177
+ am[:] = 0
178
+ an[:] = 0
179
+ ao[:] = 0
180
+ ap[:] = 0
181
+ ar[:] = 0
182
+ at[:] = 0
183
+ au[:] = 0
184
+ av[:] = 0
185
+ aw[:] = 0
186
+ ax[:] = 0
187
+ ay[:] = 0
188
+ az[:] = 0
189
+
190
+
191
+ # Simple use case for multiprocessing test
192
+
193
+ @cuda.jit(cache=True)
194
+ def simple_usecase_kernel(r, x):
195
+ r[()] = x[()]
196
+
197
+
198
+ simple_usecase_caller = CUDAUseCase(simple_usecase_kernel)
199
+
200
+
201
+ # Usecase with cooperative groups
202
+
203
+ @cuda.jit(cache=True)
204
+ def cg_usecase_kernel(r, x):
205
+ grid = cuda.cg.this_grid()
206
+ grid.sync()
207
+
208
+
209
+ cg_usecase = CUDAUseCase(cg_usecase_kernel)
210
+
211
+
212
+ class _TestModule(CUDATestCase):
213
+ """
214
+ Tests for functionality of this module's functions.
215
+ Note this does not define any "test_*" method, instead check_module()
216
+ should be called by hand.
217
+ """
218
+
219
+ def check_module(self, mod):
220
+ self.assertPreciseEqual(mod.add_usecase(2, 3), 6)
221
+ self.assertPreciseEqual(mod.outer_uncached(3, 2), 2)
222
+ self.assertPreciseEqual(mod.outer(3, 2), 2)
223
+
224
+ packed_rec = mod.record_return_packed(mod.packed_arr, 1)
225
+ self.assertPreciseEqual(tuple(packed_rec), (2, 43.5))
226
+ aligned_rec = mod.record_return_aligned(mod.aligned_arr, 1)
227
+ self.assertPreciseEqual(tuple(aligned_rec), (2, 43.5))
228
+
229
+ mod.simple_usecase_caller(2)
230
+
231
+
232
+ def self_test():
233
+ mod = sys.modules[__name__]
234
+ _TestModule().check_module(mod)
@@ -0,0 +1,41 @@
1
+ import sys
2
+
3
+ from numba import cuda, njit
4
+ from numba.cuda.testing import CUDATestCase
5
+ from numba.cuda.tests.cudapy.cache_usecases import CUDAUseCase, UseCase
6
+
7
+
8
+ class CPUUseCase(UseCase):
9
+ def _call(self, ret, *args):
10
+ self._func(ret, *args)
11
+
12
+
13
+ # Using the same function as a cached CPU and CUDA-jitted function
14
+
15
+ def target_shared_assign(r, x):
16
+ r[()] = x[()]
17
+
18
+
19
+ assign_cuda_kernel = cuda.jit(cache=True)(target_shared_assign)
20
+ assign_cuda = CUDAUseCase(assign_cuda_kernel)
21
+ assign_cpu_jitted = njit(cache=True)(target_shared_assign)
22
+ assign_cpu = CPUUseCase(assign_cpu_jitted)
23
+
24
+
25
+ class _TestModule(CUDATestCase):
26
+ """
27
+ Tests for functionality of this module's functions.
28
+ Note this does not define any "test_*" method, instead check_module()
29
+ should be called by hand.
30
+ """
31
+
32
+ def check_module(self, mod):
33
+ self.assertPreciseEqual(mod.assign_cpu(5), 5)
34
+ self.assertPreciseEqual(mod.assign_cpu(5.5), 5.5)
35
+ self.assertPreciseEqual(mod.assign_cuda(5), 5)
36
+ self.assertPreciseEqual(mod.assign_cuda(5.5), 5.5)
37
+
38
+
39
+ def self_test():
40
+ mod = sys.modules[__name__]
41
+ _TestModule().check_module(mod)
@@ -0,0 +1,58 @@
1
+ from numba import types
2
+ from numba.core import config
3
+
4
+
5
+ class TestStruct:
6
+ def __init__(self, x, y):
7
+ self.x = x
8
+ self.y = y
9
+
10
+
11
+ class TestStructModelType(types.Type):
12
+ def __init__(self):
13
+ super().__init__(name="TestStructModelType")
14
+
15
+
16
+ test_struct_model_type = TestStructModelType()
17
+
18
+
19
+ if not config.ENABLE_CUDASIM:
20
+ from numba import int32
21
+ from numba.core.extending import (
22
+ models,
23
+ register_model,
24
+ make_attribute_wrapper,
25
+ typeof_impl,
26
+ type_callable
27
+ )
28
+ from numba.cuda.cudaimpl import lower
29
+ from numba.core import cgutils
30
+
31
+ @typeof_impl.register(TestStruct)
32
+ def typeof_teststruct(val, c):
33
+ return test_struct_model_type
34
+
35
+ @register_model(TestStructModelType)
36
+ class TestStructModel(models.StructModel):
37
+ def __init__(self, dmm, fe_type):
38
+ members = [("x", int32), ("y", int32)]
39
+ super().__init__(dmm, fe_type, members)
40
+
41
+ make_attribute_wrapper(TestStructModelType, 'x', 'x')
42
+ make_attribute_wrapper(TestStructModelType, 'y', 'y')
43
+
44
+ @type_callable(TestStruct)
45
+ def type_test_struct(context):
46
+ def typer(x, y):
47
+ if isinstance(x, types.Integer) and isinstance(y, types.Integer):
48
+ return test_struct_model_type
49
+ return typer
50
+
51
+ @lower(TestStruct, types.Integer, types.Integer)
52
+ def lower_test_type_ctor(context, builder, sig, args):
53
+ obj = cgutils.create_struct_proxy(
54
+ test_struct_model_type
55
+ )(context, builder)
56
+ obj.x = args[0]
57
+ obj.y = args[1]
58
+ return obj._getvalue()
@@ -0,0 +1,30 @@
1
+ //
2
+ // Generated by NVIDIA NVVM Compiler
3
+ // Compiler built on Tue Apr 1 03:34:02 2014 (1396341242)
4
+ // Cuda compilation tools, release 6.0, V6.0.1
5
+ //
6
+
7
+ .version 4.0
8
+ .target sm_20
9
+ .address_size 64
10
+
11
+
12
+ .visible .func (.param .b32 func_retval0) bar(
13
+ .param .b64 bar_param_0,
14
+ .param .b32 bar_param_1
15
+ )
16
+ {
17
+ .reg .s32 %r<4>;
18
+ .reg .s64 %rd<2>;
19
+
20
+
21
+ ld.param.u64 %rd1, [bar_param_0];
22
+ ld.param.u32 %r1, [bar_param_1];
23
+ shl.b32 %r2, %r1, 1;
24
+ st.u32 [%rd1], %r2;
25
+ mov.u32 %r3, 0;
26
+ st.param.b32 [func_retval0+0], %r3;
27
+ ret;
28
+ }
29
+
30
+
@@ -0,0 +1,100 @@
1
+ """
2
+ Usecases of recursive functions in the CUDA target, many derived from
3
+ numba/tests/recursion_usecases.py.
4
+
5
+ Some functions are compiled at import time, hence a separate module.
6
+ """
7
+
8
+ from numba import cuda
9
+
10
+
11
+ @cuda.jit("i8(i8)", device=True)
12
+ def fib1(n):
13
+ if n < 2:
14
+ return n
15
+ # Note the second call does not use a named argument, unlike the CPU target
16
+ # usecase
17
+ return fib1(n - 1) + fib1(n - 2)
18
+
19
+
20
+ def make_fib2():
21
+ @cuda.jit("i8(i8)", device=True)
22
+ def fib2(n):
23
+ if n < 2:
24
+ return n
25
+ return fib2(n - 1) + fib2(n - 2)
26
+
27
+ return fib2
28
+
29
+
30
+ fib2 = make_fib2()
31
+
32
+
33
+ @cuda.jit
34
+ def type_change_self(x, y):
35
+ if x > 1 and y > 0:
36
+ return x + type_change_self(x - y, y)
37
+ else:
38
+ return y
39
+
40
+
41
+ # Implicit signature
42
+ @cuda.jit(device=True)
43
+ def fib3(n):
44
+ if n < 2:
45
+ return n
46
+
47
+ return fib3(n - 1) + fib3(n - 2)
48
+
49
+
50
+ # Run-away self recursion
51
+ @cuda.jit(device=True)
52
+ def runaway_self(x):
53
+ return runaway_self(x)
54
+
55
+
56
+ @cuda.jit(device=True)
57
+ def raise_self(x):
58
+ if x == 1:
59
+ raise ValueError("raise_self")
60
+ elif x > 0:
61
+ return raise_self(x - 1)
62
+ else:
63
+ return 1
64
+
65
+
66
+ @cuda.jit(debug=True, opt=False)
67
+ def raise_self_kernel(x):
68
+ raise_self(x)
69
+
70
+
71
+ def make_optional_return_case(jit=lambda x: x):
72
+ @jit
73
+ def foo(x):
74
+ if x > 5:
75
+ return x - 1
76
+ else:
77
+ return
78
+
79
+ @jit
80
+ def bar(x):
81
+ out = foo(x)
82
+ if out is None:
83
+ return out
84
+ elif out < 8:
85
+ return out
86
+ else:
87
+ return x * bar(out)
88
+
89
+ return bar
90
+
91
+
92
+ def make_growing_tuple_case(jit=lambda x: x):
93
+ # From issue #4387
94
+ @jit
95
+ def make_list(n):
96
+ if n <= 0:
97
+ return None
98
+
99
+ return (n, make_list(n - 1))
100
+ return make_list
@@ -0,0 +1,42 @@
1
+ import numpy as np
2
+ from numba import from_dtype, cuda
3
+ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
4
+ import unittest
5
+
6
+
7
+ class TestAlignment(CUDATestCase):
8
+ def test_record_alignment(self):
9
+ rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')], align=True)
10
+ rec = from_dtype(rec_dtype)
11
+
12
+ @cuda.jit((rec[:],))
13
+ def foo(a):
14
+ i = cuda.grid(1)
15
+ a[i].a = a[i].b
16
+
17
+ a_recarray = np.recarray(3, dtype=rec_dtype)
18
+ for i in range(a_recarray.size):
19
+ a_rec = a_recarray[i]
20
+ a_rec.a = 0
21
+ a_rec.b = (i + 1) * 123
22
+
23
+ foo[1, 3](a_recarray)
24
+
25
+ self.assertTrue(np.all(a_recarray.a == a_recarray.b))
26
+
27
+ @skip_on_cudasim('Simulator does not check alignment')
28
+ def test_record_alignment_error(self):
29
+ rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')])
30
+ rec = from_dtype(rec_dtype)
31
+
32
+ with self.assertRaises(Exception) as raises:
33
+ @cuda.jit((rec[:],))
34
+ def foo(a):
35
+ i = cuda.grid(1)
36
+ a[i].a = a[i].b
37
+
38
+ self.assertTrue('type float64 is not aligned' in str(raises.exception))
39
+
40
+
41
+ if __name__ == '__main__':
42
+ unittest.main()