numba-cuda 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.13.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.13.dist-info/METADATA +69 -0
  229. numba_cuda-0.0.13.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,109 @@
1
+ import unittest
2
+
3
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
4
+ from numba.tests.support import captured_stdout
5
+
6
+
7
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
8
+ class TestMonteCarlo(CUDATestCase):
9
+ """
10
+ Test monte-carlo integration
11
+ """
12
+
13
+ def setUp(self):
14
+ # Prevent output from this test showing up when running the test suite
15
+ self._captured_stdout = captured_stdout()
16
+ self._captured_stdout.__enter__()
17
+ super().setUp()
18
+
19
+ def tearDown(self):
20
+ # No exception type, value, or traceback
21
+ self._captured_stdout.__exit__(None, None, None)
22
+ super().tearDown()
23
+
24
+ def test_ex_montecarlo(self):
25
+ # ex_montecarlo.import.begin
26
+ import numba
27
+ import numpy as np
28
+ from numba import cuda
29
+ from numba.cuda.random import (
30
+ create_xoroshiro128p_states,
31
+ xoroshiro128p_uniform_float32,
32
+ )
33
+ # ex_montecarlo.import.end
34
+
35
+ # ex_montecarlo.define.begin
36
+ # number of samples, higher will lead to a more accurate answer
37
+ nsamps = 1000000
38
+ # ex_montecarlo.define.end
39
+
40
+ # ex_montecarlo.kernel.begin
41
+ @cuda.jit
42
+ def mc_integrator_kernel(out, rng_states, lower_lim, upper_lim):
43
+ """
44
+ kernel to draw random samples and evaluate the function to
45
+ be integrated at those sample values
46
+ """
47
+ size = len(out)
48
+
49
+ gid = cuda.grid(1)
50
+ if gid < size:
51
+ # draw a sample between 0 and 1 on this thread
52
+ samp = xoroshiro128p_uniform_float32(rng_states, gid)
53
+
54
+ # normalize this sample to the limit range
55
+ samp = samp * (upper_lim - lower_lim) + lower_lim
56
+
57
+ # evaluate the function to be
58
+ # integrated at the normalized
59
+ # value of the sample
60
+ y = func(samp)
61
+ out[gid] = y
62
+ # ex_montecarlo.kernel.end
63
+
64
+ # ex_montecarlo.callfunc.begin
65
+ @cuda.reduce
66
+ def sum_reduce(a, b):
67
+ return a + b
68
+
69
+ def mc_integrate(lower_lim, upper_lim, nsamps):
70
+ """
71
+ approximate the definite integral of `func` from
72
+ `lower_lim` to `upper_lim`
73
+ """
74
+ out = cuda.to_device(np.zeros(nsamps, dtype="float32"))
75
+ rng_states = create_xoroshiro128p_states(nsamps, seed=42)
76
+
77
+ # jit the function for use in CUDA kernels
78
+
79
+ mc_integrator_kernel.forall(nsamps)(
80
+ out, rng_states, lower_lim, upper_lim
81
+ )
82
+ # normalization factor to convert
83
+ # to the average: (b - a)/(N - 1)
84
+ factor = (upper_lim - lower_lim) / (nsamps - 1)
85
+
86
+ return sum_reduce(out) * factor
87
+ # ex_montecarlo.callfunc.end
88
+
89
+ # ex_montecarlo.launch.begin
90
+ # define a function to integrate
91
+ @numba.jit
92
+ def func(x):
93
+ return 1.0 / x
94
+
95
+ mc_integrate(1, 2, nsamps) # array(0.6929643, dtype=float32)
96
+ mc_integrate(2, 3, nsamps) # array(0.4054021, dtype=float32)
97
+ # ex_montecarlo.launch.end
98
+
99
+ # values computed independently using maple
100
+ np.testing.assert_allclose(
101
+ mc_integrate(1, 2, nsamps), 0.69315, atol=0.001
102
+ )
103
+ np.testing.assert_allclose(
104
+ mc_integrate(2, 3, nsamps), 0.4055, atol=0.001
105
+ )
106
+
107
+
108
+ if __name__ == "__main__":
109
+ unittest.main()
@@ -0,0 +1,59 @@
1
+ # Contents in this file are referenced from the sphinx-generated docs.
2
+ # "magictoken" is used for markers as beginning and ending of example text.
3
+
4
+ import unittest
5
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
6
+
7
+
8
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
9
+ class TestRandom(CUDATestCase):
10
+ def test_ex_3d_grid(self):
11
+ # magictoken.ex_3d_grid.begin
12
+ from numba import cuda
13
+ from numba.cuda.random import (create_xoroshiro128p_states,
14
+ xoroshiro128p_uniform_float32)
15
+ import numpy as np
16
+
17
+ @cuda.jit
18
+ def random_3d(arr, rng_states):
19
+ # Per-dimension thread indices and strides
20
+ startx, starty, startz = cuda.grid(3)
21
+ stridex, stridey, stridez = cuda.gridsize(3)
22
+
23
+ # Linearized thread index
24
+ tid = (startz * stridey * stridex) + (starty * stridex) + startx
25
+
26
+ # Use strided loops over the array to assign a random value to each entry
27
+ for i in range(startz, arr.shape[0], stridez):
28
+ for j in range(starty, arr.shape[1], stridey):
29
+ for k in range(startx, arr.shape[2], stridex):
30
+ arr[i, j, k] = xoroshiro128p_uniform_float32(rng_states, tid)
31
+
32
+ # Array dimensions
33
+ X, Y, Z = 701, 900, 719
34
+
35
+ # Block and grid dimensions
36
+ bx, by, bz = 8, 8, 8
37
+ gx, gy, gz = 16, 16, 16
38
+
39
+ # Total number of threads
40
+ nthreads = bx * by * bz * gx * gy * gz
41
+
42
+ # Initialize a state for each thread
43
+ rng_states = create_xoroshiro128p_states(nthreads, seed=1)
44
+
45
+ # Generate random numbers
46
+ arr = cuda.device_array((X, Y, Z), dtype=np.float32)
47
+ random_3d[(gx, gy, gz), (bx, by, bz)](arr, rng_states)
48
+ # magictoken.ex_3d_grid.end
49
+
50
+ # Some basic tests of the randomly-generated numbers
51
+ host_arr = arr.copy_to_host()
52
+ self.assertGreater(np.mean(host_arr), 0.49)
53
+ self.assertLess(np.mean(host_arr), 0.51)
54
+ self.assertTrue(np.all(host_arr <= 1.0))
55
+ self.assertTrue(np.all(host_arr >= 0.0))
56
+
57
+
58
+ if __name__ == '__main__':
59
+ unittest.main()
@@ -0,0 +1,76 @@
1
+ import unittest
2
+
3
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
4
+ from numba.tests.support import captured_stdout
5
+
6
+
7
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
8
+ class TestReduction(CUDATestCase):
9
+ """
10
+ Test shared memory reduction
11
+ """
12
+
13
+ def setUp(self):
14
+ # Prevent output from this test showing up when running the test suite
15
+ self._captured_stdout = captured_stdout()
16
+ self._captured_stdout.__enter__()
17
+ super().setUp()
18
+
19
+ def tearDown(self):
20
+ # No exception type, value, or traceback
21
+ self._captured_stdout.__exit__(None, None, None)
22
+ super().tearDown()
23
+
24
+ def test_ex_reduction(self):
25
+ # ex_reduction.import.begin
26
+ import numpy as np
27
+ from numba import cuda
28
+ from numba.types import int32
29
+ # ex_reduction.import.end
30
+
31
+ # ex_reduction.allocate.begin
32
+ # generate data
33
+ a = cuda.to_device(np.arange(1024))
34
+ nelem = len(a)
35
+ # ex_reduction.allocate.end
36
+
37
+ # ex_reduction.kernel.begin
38
+ @cuda.jit
39
+ def array_sum(data):
40
+ tid = cuda.threadIdx.x
41
+ size = len(data)
42
+ if tid < size:
43
+ i = cuda.grid(1)
44
+
45
+ # Declare an array in shared memory
46
+ shr = cuda.shared.array(nelem, int32)
47
+ shr[tid] = data[i]
48
+
49
+ # Ensure writes to shared memory are visible
50
+ # to all threads before reducing
51
+ cuda.syncthreads()
52
+
53
+ s = 1
54
+ while s < cuda.blockDim.x:
55
+ if tid % (2 * s) == 0:
56
+ # Stride by `s` and add
57
+ shr[tid] += shr[tid + s]
58
+ s *= 2
59
+ cuda.syncthreads()
60
+
61
+ # After the loop, the zeroth element contains the sum
62
+ if tid == 0:
63
+ data[tid] = shr[tid]
64
+ # ex_reduction.kernel.end
65
+
66
+ # ex_reduction.launch.begin
67
+ array_sum[1, nelem](a)
68
+ print(a[0]) # 523776
69
+ print(sum(np.arange(1024))) # 523776
70
+ # ex_reduction.launch.end
71
+
72
+ np.testing.assert_equal(a[0], sum(np.arange(1024)))
73
+
74
+
75
+ if __name__ == "__main__":
76
+ unittest.main()
@@ -0,0 +1,130 @@
1
+ import unittest
2
+
3
+ from numba.cuda.testing import (CUDATestCase, skip_if_cudadevrt_missing,
4
+ skip_on_cudasim, skip_unless_cc_60,
5
+ skip_if_mvc_enabled)
6
+ from numba.tests.support import captured_stdout
7
+
8
+
9
+ @skip_if_cudadevrt_missing
10
+ @skip_unless_cc_60
11
+ @skip_if_mvc_enabled('CG not supported with MVC')
12
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
13
+ class TestSessionization(CUDATestCase):
14
+ """
15
+ Test click stream sessionization
16
+ """
17
+
18
+ def setUp(self):
19
+ # Prevent output from this test showing up when running the test suite
20
+ self._captured_stdout = captured_stdout()
21
+ self._captured_stdout.__enter__()
22
+ super().setUp()
23
+
24
+ def tearDown(self):
25
+ # No exception type, value, or traceback
26
+ self._captured_stdout.__exit__(None, None, None)
27
+ super().tearDown()
28
+
29
+ def test_ex_sessionize(self):
30
+ # ex_sessionize.import.begin
31
+ import numpy as np
32
+ from numba import cuda
33
+
34
+ # Set the timeout to one hour
35
+ session_timeout = np.int64(np.timedelta64("3600", "s"))
36
+ # ex_sessionize.import.end
37
+
38
+ # ex_sessionize.allocate.begin
39
+ # Generate data
40
+ ids = cuda.to_device(
41
+ np.array(
42
+ [
43
+ 1, 1, 1, 1, 1, 1,
44
+ 2, 2, 2,
45
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
46
+ 4, 4, 4, 4, 4, 4, 4, 4, 4,
47
+ ]
48
+ )
49
+ )
50
+ sec = cuda.to_device(
51
+ np.array(
52
+ [
53
+ 1, 2, 3, 5000, 5001, 5002, 1,
54
+ 2, 3, 1, 2, 5000, 5001, 10000,
55
+ 10001, 10002, 10003, 15000, 150001,
56
+ 1, 5000, 50001, 15000, 20000,
57
+ 25000, 25001, 25002, 25003,
58
+ ],
59
+ dtype="datetime64[ns]",
60
+ ).astype(
61
+ "int64"
62
+ ) # Cast to int64 for compatibility
63
+ )
64
+ # Create a vector to hold the results
65
+ results = cuda.to_device(np.zeros(len(ids)))
66
+ # ex_sessionize.allocate.end
67
+
68
+ # ex_sessionize.kernel.begin
69
+ @cuda.jit
70
+ def sessionize(user_id, timestamp, results):
71
+ gid = cuda.grid(1)
72
+ size = len(user_id)
73
+
74
+ if gid >= size:
75
+ return
76
+
77
+ # Determine session boundaries
78
+ is_first_datapoint = gid == 0
79
+ if not is_first_datapoint:
80
+ new_user = user_id[gid] != user_id[gid - 1]
81
+ timed_out = (
82
+ timestamp[gid] - timestamp[gid - 1] > session_timeout
83
+ )
84
+ is_sess_boundary = new_user or timed_out
85
+ else:
86
+ is_sess_boundary = True
87
+
88
+ # Determine session labels
89
+ if is_sess_boundary:
90
+ # This thread marks the start of a session
91
+ results[gid] = gid
92
+
93
+ # Make sure all session boundaries are written
94
+ # before populating the session id
95
+ grid = cuda.cg.this_grid()
96
+ grid.sync()
97
+
98
+ look_ahead = 1
99
+ # Check elements 'forward' of this one
100
+ # until a new session boundary is found
101
+ while results[gid + look_ahead] == 0:
102
+ results[gid + look_ahead] = gid
103
+ look_ahead += 1
104
+ # Avoid out-of-bounds accesses by the last thread
105
+ if gid + look_ahead == size - 1:
106
+ results[gid + look_ahead] = gid
107
+ break
108
+ # ex_sessionize.kernel.end
109
+
110
+ # ex_sessionize.launch.begin
111
+ sessionize.forall(len(ids))(ids, sec, results)
112
+
113
+ print(results.copy_to_host())
114
+ # array([ 0., 0., 0., 3., 3., 3.,
115
+ # 6., 6., 6., 9., 9., 11.,
116
+ # 11., 13., 13., 13., 13., 17.,
117
+ # 18., 19., 20., 21., 21., 23.,
118
+ # 24., 24., 24., 24.])
119
+ # ex_sessionize.launch.end
120
+
121
+ expect = [
122
+ 0, 0, 0, 3, 3, 3, 6, 6, 6, 9, 9,
123
+ 11, 11, 13, 13, 13, 13, 17, 18, 19, 20, 21,
124
+ 21, 23, 24, 24, 24, 24
125
+ ]
126
+ np.testing.assert_equal(expect, results.copy_to_host())
127
+
128
+
129
+ if __name__ == "__main__":
130
+ unittest.main()
@@ -0,0 +1,50 @@
1
+ import unittest
2
+
3
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
4
+ from numba.tests.support import captured_stdout
5
+
6
+
7
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
8
+ class TestUFunc(CUDATestCase):
9
+ """
10
+ Test calling a UFunc
11
+ """
12
+
13
+ def setUp(self):
14
+ # Prevent output from this test showing
15
+ # up when running the test suite
16
+ self._captured_stdout = captured_stdout()
17
+ self._captured_stdout.__enter__()
18
+ super().setUp()
19
+
20
+ def tearDown(self):
21
+ # No exception type, value, or traceback
22
+ self._captured_stdout.__exit__(None, None, None)
23
+ super().tearDown()
24
+
25
+ def test_ex_cuda_ufunc_call(self):
26
+ # ex_cuda_ufunc.begin
27
+ import numpy as np
28
+ from numba import cuda
29
+
30
+ # A kernel calling a ufunc (sin, in this case)
31
+ @cuda.jit
32
+ def f(r, x):
33
+ # Compute sin(x) with result written to r
34
+ np.sin(x, r)
35
+
36
+ # Declare input and output arrays
37
+ x = np.arange(10, dtype=np.float32) - 5
38
+ r = np.zeros_like(x)
39
+
40
+ # Launch kernel that calls the ufunc
41
+ f[1, 1](r, x)
42
+
43
+ # A quick sanity check demonstrating equality of the sine computed by
44
+ # the sin ufunc inside the kernel, and NumPy's sin ufunc
45
+ np.testing.assert_allclose(r, np.sin(x))
46
+ # ex_cuda_ufunc.end
47
+
48
+
49
+ if __name__ == "__main__":
50
+ unittest.main()
@@ -0,0 +1,73 @@
1
+ import unittest
2
+
3
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
4
+ from numba.tests.support import captured_stdout
5
+
6
+
7
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
8
+ class TestVecAdd(CUDATestCase):
9
+ """
10
+ Test simple vector addition
11
+ """
12
+
13
+ def setUp(self):
14
+ # Prevent output from this test showing
15
+ # up when running the test suite
16
+ self._captured_stdout = captured_stdout()
17
+ self._captured_stdout.__enter__()
18
+ super().setUp()
19
+
20
+ def tearDown(self):
21
+ # No exception type, value, or traceback
22
+ self._captured_stdout.__exit__(None, None, None)
23
+ super().tearDown()
24
+
25
+ def test_ex_vecadd(self):
26
+ # ex_vecadd.import.begin
27
+ import numpy as np
28
+ from numba import cuda
29
+ # ex_vecadd.import.end
30
+
31
+ # ex_vecadd.kernel.begin
32
+ @cuda.jit
33
+ def f(a, b, c):
34
+ # like threadIdx.x + (blockIdx.x * blockDim.x)
35
+ tid = cuda.grid(1)
36
+ size = len(c)
37
+
38
+ if tid < size:
39
+ c[tid] = a[tid] + b[tid]
40
+ # ex_vecadd.kernel.end
41
+
42
+ # Seed RNG for test repeatability
43
+ np.random.seed(1)
44
+
45
+ # ex_vecadd.allocate.begin
46
+ N = 100000
47
+ a = cuda.to_device(np.random.random(N))
48
+ b = cuda.to_device(np.random.random(N))
49
+ c = cuda.device_array_like(a)
50
+ # ex_vecadd.allocate.end
51
+
52
+ # ex_vecadd.forall.begin
53
+ f.forall(len(a))(a, b, c)
54
+ print(c.copy_to_host())
55
+ # ex_vecadd.forall.end
56
+
57
+ # ex_vecadd.launch.begin
58
+ # Enough threads per block for several warps per block
59
+ nthreads = 256
60
+ # Enough blocks to cover the entire vector depending on its length
61
+ nblocks = (len(a) // nthreads) + 1
62
+ f[nblocks, nthreads](a, b, c)
63
+ print(c.copy_to_host())
64
+ # ex_vecadd.launch.end
65
+
66
+ np.testing.assert_equal(
67
+ c.copy_to_host(),
68
+ a.copy_to_host() + b.copy_to_host()
69
+ )
70
+
71
+
72
+ if __name__ == "__main__":
73
+ unittest.main()
@@ -0,0 +1,8 @@
1
+ from numba.cuda.testing import ensure_supported_ccs_initialized
2
+ from numba.cuda.tests import load_testsuite
3
+ import os
4
+
5
+
6
+ def load_tests(loader, tests, pattern):
7
+ ensure_supported_ccs_initialized()
8
+ return load_testsuite(loader, os.path.dirname(__file__))