numba-cuda 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.13.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.13.dist-info/METADATA +69 -0
  229. numba_cuda-0.0.13.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,244 @@
1
+ from typing import List
2
+ from dataclasses import dataclass, field
3
+ from numba import cuda, float32
4
+ from numba.cuda.compiler import compile_ptx_for_current_device, compile_ptx
5
+ from math import cos, sin, tan, exp, log, log10, log2, pow, tanh
6
+ from operator import truediv
7
+ import numpy as np
8
+ from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
9
+ skip_unless_cc_75)
10
+ import unittest
11
+
12
+
13
+ @dataclass
14
+ class FastMathCriterion:
15
+ fast_expected: List[str] = field(default_factory=list)
16
+ fast_unexpected: List[str] = field(default_factory=list)
17
+ prec_expected: List[str] = field(default_factory=list)
18
+ prec_unexpected: List[str] = field(default_factory=list)
19
+
20
+ def check(self, test: CUDATestCase, fast: str, prec: str):
21
+ test.assertTrue(all(i in fast for i in self.fast_expected))
22
+ test.assertTrue(all(i not in fast for i in self.fast_unexpected))
23
+ test.assertTrue(all(i in prec for i in self.prec_expected))
24
+ test.assertTrue(all(i not in prec for i in self.prec_unexpected))
25
+
26
+
27
+ @skip_on_cudasim('Fastmath and PTX inspection not available on cudasim')
28
+ class TestFastMathOption(CUDATestCase):
29
+ def _test_fast_math_common(self, pyfunc, sig, device, criterion):
30
+
31
+ # Test jit code path
32
+ fastver = cuda.jit(sig, device=device, fastmath=True)(pyfunc)
33
+ precver = cuda.jit(sig, device=device)(pyfunc)
34
+
35
+ criterion.check(
36
+ self, fastver.inspect_asm(sig), precver.inspect_asm(sig)
37
+ )
38
+
39
+ # Test compile_ptx code path
40
+ fastptx, _ = compile_ptx_for_current_device(
41
+ pyfunc, sig, device=device, fastmath=True
42
+ )
43
+ precptx, _ = compile_ptx_for_current_device(
44
+ pyfunc, sig, device=device
45
+ )
46
+
47
+ criterion.check(self, fastptx, precptx)
48
+
49
+ def _test_fast_math_unary(self, op, criterion: FastMathCriterion):
50
+ def kernel(r, x):
51
+ r[0] = op(x)
52
+
53
+ def device_function(x):
54
+ return op(x)
55
+
56
+ self._test_fast_math_common(
57
+ kernel, (float32[::1], float32), device=False, criterion=criterion
58
+ )
59
+ self._test_fast_math_common(
60
+ device_function, (float32,), device=True, criterion=criterion
61
+ )
62
+
63
+ def _test_fast_math_binary(self, op, criterion: FastMathCriterion):
64
+ def kernel(r, x, y):
65
+ r[0] = op(x, y)
66
+
67
+ def device(x, y):
68
+ return op(x, y)
69
+
70
+ self._test_fast_math_common(
71
+ kernel,
72
+ (float32[::1], float32, float32), device=False, criterion=criterion
73
+ )
74
+ self._test_fast_math_common(
75
+ device, (float32, float32), device=True, criterion=criterion
76
+ )
77
+
78
+ def test_cosf(self):
79
+ self._test_fast_math_unary(
80
+ cos,
81
+ FastMathCriterion(
82
+ fast_expected=['cos.approx.ftz.f32 '],
83
+ prec_unexpected=['cos.approx.ftz.f32 ']
84
+ )
85
+ )
86
+
87
+ def test_sinf(self):
88
+ self._test_fast_math_unary(
89
+ sin,
90
+ FastMathCriterion(
91
+ fast_expected=['sin.approx.ftz.f32 '],
92
+ prec_unexpected=['sin.approx.ftz.f32 ']
93
+ )
94
+ )
95
+
96
+ def test_tanf(self):
97
+ self._test_fast_math_unary(
98
+ tan,
99
+ FastMathCriterion(fast_expected=[
100
+ 'sin.approx.ftz.f32 ',
101
+ 'cos.approx.ftz.f32 ',
102
+ 'div.approx.ftz.f32 '
103
+ ], prec_unexpected=['sin.approx.ftz.f32 '])
104
+ )
105
+
106
+ @skip_unless_cc_75
107
+ def test_tanhf(self):
108
+
109
+ self._test_fast_math_unary(
110
+ tanh,
111
+ FastMathCriterion(
112
+ fast_expected=['tanh.approx.f32 '],
113
+ prec_unexpected=['tanh.approx.f32 ']
114
+ )
115
+ )
116
+
117
+ def test_tanhf_compile_ptx(self):
118
+ def tanh_kernel(r, x):
119
+ r[0] = tanh(x)
120
+
121
+ def tanh_common_test(cc, criterion):
122
+ fastptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32),
123
+ fastmath=True, cc=cc)
124
+ precptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32),
125
+ cc=cc)
126
+ criterion.check(self, fastptx, precptx)
127
+
128
+ tanh_common_test(cc=(7, 5), criterion=FastMathCriterion(
129
+ fast_expected=['tanh.approx.f32 '],
130
+ prec_unexpected=['tanh.approx.f32 ']
131
+ ))
132
+
133
+ tanh_common_test(cc=(7, 0),
134
+ criterion=FastMathCriterion(
135
+ fast_expected=['ex2.approx.ftz.f32 ',
136
+ 'rcp.approx.ftz.f32 '],
137
+ prec_unexpected=['tanh.approx.f32 ']))
138
+
139
+ def test_expf(self):
140
+ self._test_fast_math_unary(
141
+ exp,
142
+ FastMathCriterion(
143
+ fast_unexpected=['fma.rn.f32 '],
144
+ prec_expected=['fma.rn.f32 ']
145
+ )
146
+ )
147
+
148
+ def test_logf(self):
149
+ # Look for constant used to convert from log base 2 to log base e
150
+ self._test_fast_math_unary(
151
+ log, FastMathCriterion(
152
+ fast_expected=['lg2.approx.ftz.f32 ', '0f3F317218'],
153
+ prec_unexpected=['lg2.approx.ftz.f32 '],
154
+ )
155
+ )
156
+
157
+ def test_log10f(self):
158
+ # Look for constant used to convert from log base 2 to log base 10
159
+ self._test_fast_math_unary(
160
+ log10, FastMathCriterion(
161
+ fast_expected=['lg2.approx.ftz.f32 ', '0f3E9A209B'],
162
+ prec_unexpected=['lg2.approx.ftz.f32 ']
163
+ )
164
+ )
165
+
166
+ def test_log2f(self):
167
+ self._test_fast_math_unary(
168
+ log2, FastMathCriterion(
169
+ fast_expected=['lg2.approx.ftz.f32 '],
170
+ prec_unexpected=['lg2.approx.ftz.f32 ']
171
+ )
172
+ )
173
+
174
+ def test_powf(self):
175
+ self._test_fast_math_binary(
176
+ pow, FastMathCriterion(
177
+ fast_expected=['lg2.approx.ftz.f32 '],
178
+ prec_unexpected=['lg2.approx.ftz.f32 '],
179
+ )
180
+ )
181
+
182
+ def test_divf(self):
183
+ self._test_fast_math_binary(
184
+ truediv, FastMathCriterion(
185
+ fast_expected=['div.approx.ftz.f32 '],
186
+ fast_unexpected=['div.rn.f32'],
187
+ prec_expected=['div.rn.f32'],
188
+ prec_unexpected=['div.approx.ftz.f32 '],
189
+ )
190
+ )
191
+
192
+ def test_divf_exception(self):
193
+ # LTO optimizes away the exception status due to an oversight
194
+ # in the way we generate it (it is not added to the used list).
195
+ self.skip_if_lto("Exceptions not supported with LTO")
196
+
197
+ def f10(r, x, y):
198
+ r[0] = x / y
199
+
200
+ sig = (float32[::1], float32, float32)
201
+ fastver = cuda.jit(sig, fastmath=True, debug=True)(f10)
202
+ precver = cuda.jit(sig, debug=True)(f10)
203
+ nelem = 10
204
+ ary = np.empty(nelem, dtype=np.float32)
205
+ with self.assertRaises(ZeroDivisionError):
206
+ precver[1, nelem](ary, 10.0, 0.0)
207
+
208
+ try:
209
+ fastver[1, nelem](ary, 10.0, 0.0)
210
+ except ZeroDivisionError:
211
+ self.fail("Divide in fastmath should not throw ZeroDivisionError")
212
+
213
+ @unittest.expectedFailure
214
+ def test_device_fastmath_propagation(self):
215
+ # The fastmath option doesn't presently propagate to device functions
216
+ # from their callees - arguably it should do, so this test is presently
217
+ # an xfail.
218
+ @cuda.jit("float32(float32, float32)", device=True)
219
+ def foo(a, b):
220
+ return a / b
221
+
222
+ def bar(arr, val):
223
+ i = cuda.grid(1)
224
+ if i < arr.size:
225
+ arr[i] = foo(i, val)
226
+
227
+ sig = (float32[::1], float32)
228
+ fastver = cuda.jit(sig, fastmath=True)(bar)
229
+ precver = cuda.jit(sig)(bar)
230
+
231
+ # Variants of the div instruction are further documented at:
232
+ # https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div
233
+
234
+ # The fast version should use the "fast, approximate divide" variant
235
+ self.assertIn('div.approx.f32', fastver.inspect_asm(sig))
236
+ # The precise version should use the "IEEE 754 compliant rounding"
237
+ # variant, and neither of the "approximate divide" variants.
238
+ self.assertIn('div.rn.f32', precver.inspect_asm(sig))
239
+ self.assertNotIn('div.approx.f32', precver.inspect_asm(sig))
240
+ self.assertNotIn('div.full.f32', precver.inspect_asm(sig))
241
+
242
+
243
+ if __name__ == '__main__':
244
+ unittest.main()
@@ -0,0 +1,52 @@
1
+ import numpy as np
2
+
3
+ from numba import cuda
4
+ import unittest
5
+ from numba.cuda.testing import CUDATestCase
6
+
7
+
8
+ @cuda.jit
9
+ def foo(x):
10
+ i = cuda.grid(1)
11
+ if i < x.size:
12
+ x[i] += 1
13
+
14
+
15
+ class TestForAll(CUDATestCase):
16
+ def test_forall_1(self):
17
+ arr = np.arange(11)
18
+ orig = arr.copy()
19
+ foo.forall(arr.size)(arr)
20
+ np.testing.assert_array_almost_equal(arr, orig + 1)
21
+
22
+ def test_forall_2(self):
23
+ @cuda.jit("void(float32, float32[:], float32[:])")
24
+ def bar(a, x, y):
25
+ i = cuda.grid(1)
26
+ if i < x.size:
27
+ y[i] = a * x[i] + y[i]
28
+
29
+ x = np.arange(13, dtype=np.float32)
30
+ y = np.arange(13, dtype=np.float32)
31
+ oldy = y.copy()
32
+ a = 1.234
33
+ bar.forall(y.size)(a, x, y)
34
+ np.testing.assert_array_almost_equal(y, a * x + oldy, decimal=3)
35
+
36
+ def test_forall_no_work(self):
37
+ # Ensure that forall doesn't launch a kernel with no blocks when called
38
+ # with 0 elements. See Issue #5017.
39
+ arr = np.arange(11)
40
+ foo.forall(0)(arr)
41
+
42
+ def test_forall_negative_work(self):
43
+ # Ensure that forall doesn't allow the creation of a forall with a
44
+ # negative element count.
45
+ with self.assertRaises(ValueError) as raises:
46
+ foo.forall(-1)
47
+ self.assertIn("Can't create ForAll with negative task count",
48
+ str(raises.exception))
49
+
50
+
51
+ if __name__ == '__main__':
52
+ unittest.main()
@@ -0,0 +1,29 @@
1
+ import numpy as np
2
+
3
+ from numba import cuda
4
+ from numba.cuda.testing import unittest, CUDATestCase
5
+
6
+
7
+ class TestFreeVar(CUDATestCase):
8
+ def test_freevar(self):
9
+ """Make sure we can compile the following kernel with freevar reference
10
+ in arguments to shared.array
11
+ """
12
+ from numba import float32
13
+
14
+ size = 1024
15
+ nbtype = float32
16
+
17
+ @cuda.jit("(float32[::1], intp)")
18
+ def foo(A, i):
19
+ "Dummy function"
20
+ sdata = cuda.shared.array(size, # size is freevar
21
+ dtype=nbtype) # nbtype is freevar
22
+ A[i] = sdata[i]
23
+
24
+ A = np.arange(2, dtype="float32")
25
+ foo[1, 1](A, 0)
26
+
27
+
28
+ if __name__ == '__main__':
29
+ unittest.main()
@@ -0,0 +1,66 @@
1
+ import numpy as np
2
+ import math
3
+ from numba import cuda
4
+ from numba.types import float32, float64, int32, void
5
+ from numba.cuda.testing import unittest, CUDATestCase
6
+
7
+
8
+ def simple_frexp(aryx, aryexp, arg):
9
+ aryx[0], aryexp[0] = math.frexp(arg)
10
+
11
+
12
+ def simple_ldexp(aryx, arg, exp):
13
+ aryx[0] = math.ldexp(arg, exp)
14
+
15
+
16
+ class TestCudaFrexpLdexp(CUDATestCase):
17
+ def template_test_frexp(self, nptype, nbtype):
18
+ compiled = cuda.jit(void(nbtype[:], int32[:], nbtype))(simple_frexp)
19
+ arg = 3.1415
20
+ aryx = np.zeros(1, dtype=nptype)
21
+ aryexp = np.zeros(1, dtype=np.int32)
22
+ compiled[1, 1](aryx, aryexp, arg)
23
+ np.testing.assert_array_equal(aryx, nptype(0.785375))
24
+ self.assertEqual(aryexp, 2)
25
+
26
+ arg = np.inf
27
+ compiled[1, 1](aryx, aryexp, arg)
28
+ np.testing.assert_array_equal(aryx, nptype(np.inf))
29
+ self.assertEqual(aryexp, 0) # np.frexp gives -1
30
+
31
+ arg = np.nan
32
+ compiled[1, 1](aryx, aryexp, arg)
33
+ np.testing.assert_array_equal(aryx, nptype(np.nan))
34
+ self.assertEqual(aryexp, 0) # np.frexp gives -1
35
+
36
+ def template_test_ldexp(self, nptype, nbtype):
37
+ compiled = cuda.jit(void(nbtype[:], nbtype, int32))(simple_ldexp)
38
+ arg = 0.785375
39
+ exp = 2
40
+ aryx = np.zeros(1, dtype=nptype)
41
+ compiled[1, 1](aryx, arg, exp)
42
+ np.testing.assert_array_equal(aryx, nptype(3.1415))
43
+
44
+ arg = np.inf
45
+ compiled[1, 1](aryx, arg, exp)
46
+ np.testing.assert_array_equal(aryx, nptype(np.inf))
47
+
48
+ arg = np.nan
49
+ compiled[1, 1](aryx, arg, exp)
50
+ np.testing.assert_array_equal(aryx, nptype(np.nan))
51
+
52
+ def test_frexp_f4(self):
53
+ self.template_test_frexp(np.float32, float32)
54
+
55
+ def test_ldexp_f4(self):
56
+ self.template_test_ldexp(np.float32, float32)
57
+
58
+ def test_frexp_f8(self):
59
+ self.template_test_frexp(np.float64, float64)
60
+
61
+ def test_ldexp_f8(self):
62
+ self.template_test_ldexp(np.float64, float64)
63
+
64
+
65
+ if __name__ == '__main__':
66
+ unittest.main()
@@ -0,0 +1,60 @@
1
+ import numpy as np
2
+ from numba import cuda, int32, float32
3
+ from numba.cuda.testing import unittest, CUDATestCase
4
+
5
+ N = 100
6
+
7
+
8
+ def simple_smem(ary):
9
+ sm = cuda.shared.array(N, int32)
10
+ i = cuda.grid(1)
11
+ if i == 0:
12
+ for j in range(N):
13
+ sm[j] = j
14
+ cuda.syncthreads()
15
+ ary[i] = sm[i]
16
+
17
+
18
+ S0 = 10
19
+ S1 = 20
20
+
21
+
22
+ def coop_smem2d(ary):
23
+ i, j = cuda.grid(2)
24
+ sm = cuda.shared.array((S0, S1), float32)
25
+ sm[i, j] = (i + 1) / (j + 1)
26
+ cuda.syncthreads()
27
+ ary[i, j] = sm[i, j]
28
+
29
+
30
+ class TestCudaTestGlobal(CUDATestCase):
31
+ def test_global_int_const(self):
32
+ """Test simple_smem
33
+ """
34
+ compiled = cuda.jit("void(int32[:])")(simple_smem)
35
+
36
+ nelem = 100
37
+ ary = np.empty(nelem, dtype=np.int32)
38
+ compiled[1, nelem](ary)
39
+
40
+ self.assertTrue(np.all(ary == np.arange(nelem, dtype=np.int32)))
41
+
42
+ @unittest.SkipTest
43
+ def test_global_tuple_const(self):
44
+ """Test coop_smem2d
45
+ """
46
+ compiled = cuda.jit("void(float32[:,:])")(coop_smem2d)
47
+
48
+ shape = 10, 20
49
+ ary = np.empty(shape, dtype=np.float32)
50
+ compiled[1, shape](ary)
51
+
52
+ exp = np.empty_like(ary)
53
+ for i in range(ary.shape[0]):
54
+ for j in range(ary.shape[1]):
55
+ exp[i, j] = float(i + 1) / (j + 1)
56
+ self.assertTrue(np.allclose(ary, exp))
57
+
58
+
59
+ if __name__ == '__main__':
60
+ unittest.main()