numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.0.dist-info/METADATA +0 -6
  232. numba_cuda-0.0.0.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,140 @@
1
+ import math
2
+ from numba.core import types
3
+ from numba.core.typing.templates import ConcreteTemplate, signature, Registry
4
+
5
+
6
+ registry = Registry()
7
+ infer_global = registry.register_global
8
+
9
+
10
+ @infer_global(math.acos)
11
+ @infer_global(math.acosh)
12
+ @infer_global(math.asin)
13
+ @infer_global(math.asinh)
14
+ @infer_global(math.atan)
15
+ @infer_global(math.atanh)
16
+ @infer_global(math.cosh)
17
+ @infer_global(math.degrees)
18
+ @infer_global(math.erf)
19
+ @infer_global(math.erfc)
20
+ @infer_global(math.expm1)
21
+ @infer_global(math.gamma)
22
+ @infer_global(math.lgamma)
23
+ @infer_global(math.log1p)
24
+ @infer_global(math.radians)
25
+ @infer_global(math.sinh)
26
+ @infer_global(math.tanh)
27
+ @infer_global(math.tan)
28
+ class Math_unary(ConcreteTemplate):
29
+ cases = [
30
+ signature(types.float64, types.int64),
31
+ signature(types.float64, types.uint64),
32
+ signature(types.float32, types.float32),
33
+ signature(types.float64, types.float64),
34
+ ]
35
+
36
+
37
+ @infer_global(math.sin)
38
+ @infer_global(math.cos)
39
+ @infer_global(math.ceil)
40
+ @infer_global(math.floor)
41
+ @infer_global(math.sqrt)
42
+ @infer_global(math.log)
43
+ @infer_global(math.log2)
44
+ @infer_global(math.log10)
45
+ @infer_global(math.exp)
46
+ @infer_global(math.fabs)
47
+ @infer_global(math.trunc)
48
+ class Math_unary_with_fp16(ConcreteTemplate):
49
+ cases = [
50
+ signature(types.float64, types.int64),
51
+ signature(types.float64, types.uint64),
52
+ signature(types.float32, types.float32),
53
+ signature(types.float64, types.float64),
54
+ signature(types.float16, types.float16),
55
+ ]
56
+
57
+
58
+ @infer_global(math.atan2)
59
+ class Math_atan2(ConcreteTemplate):
60
+ key = math.atan2
61
+ cases = [
62
+ signature(types.float64, types.int64, types.int64),
63
+ signature(types.float64, types.uint64, types.uint64),
64
+ signature(types.float32, types.float32, types.float32),
65
+ signature(types.float64, types.float64, types.float64),
66
+ ]
67
+
68
+
69
+ @infer_global(math.hypot)
70
+ class Math_hypot(ConcreteTemplate):
71
+ key = math.hypot
72
+ cases = [
73
+ signature(types.float64, types.int64, types.int64),
74
+ signature(types.float64, types.uint64, types.uint64),
75
+ signature(types.float32, types.float32, types.float32),
76
+ signature(types.float64, types.float64, types.float64),
77
+ ]
78
+
79
+
80
+ @infer_global(math.copysign)
81
+ @infer_global(math.fmod)
82
+ class Math_binary(ConcreteTemplate):
83
+ cases = [
84
+ signature(types.float32, types.float32, types.float32),
85
+ signature(types.float64, types.float64, types.float64),
86
+ ]
87
+
88
+
89
+ @infer_global(math.remainder)
90
+ class Math_remainder(ConcreteTemplate):
91
+ cases = [
92
+ signature(types.float32, types.float32, types.float32),
93
+ signature(types.float64, types.float64, types.float64),
94
+ ]
95
+
96
+
97
+ @infer_global(math.pow)
98
+ class Math_pow(ConcreteTemplate):
99
+ cases = [
100
+ signature(types.float32, types.float32, types.float32),
101
+ signature(types.float64, types.float64, types.float64),
102
+ signature(types.float32, types.float32, types.int32),
103
+ signature(types.float64, types.float64, types.int32),
104
+ ]
105
+
106
+
107
+ @infer_global(math.frexp)
108
+ class Math_frexp(ConcreteTemplate):
109
+ cases = [
110
+ signature(types.Tuple([types.float32, types.int32]), types.float32),
111
+ signature(types.Tuple([types.float64, types.int32]), types.float64),
112
+ ]
113
+
114
+
115
+ @infer_global(math.ldexp)
116
+ class Math_ldexp(ConcreteTemplate):
117
+ cases = [
118
+ signature(types.float32, types.float32, types.int32),
119
+ signature(types.float64, types.float64, types.int32),
120
+ ]
121
+
122
+
123
+ @infer_global(math.isinf)
124
+ @infer_global(math.isnan)
125
+ @infer_global(math.isfinite)
126
+ class Math_isnan(ConcreteTemplate):
127
+ cases = [
128
+ signature(types.boolean, types.int64),
129
+ signature(types.boolean, types.uint64),
130
+ signature(types.boolean, types.float32),
131
+ signature(types.boolean, types.float64),
132
+ ]
133
+
134
+
135
+ @infer_global(math.modf)
136
+ class Math_modf(ConcreteTemplate):
137
+ cases = [
138
+ signature(types.UniTuple(types.float64, 2), types.float64),
139
+ signature(types.UniTuple(types.float32, 2), types.float32)
140
+ ]
@@ -0,0 +1,189 @@
1
+ from warnings import warn
2
+ from numba.core import types, config, sigutils
3
+ from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
4
+ from numba.cuda.compiler import declare_device_function
5
+ from numba.cuda.dispatcher import CUDADispatcher
6
+ from numba.cuda.simulator.kernel import FakeCUDAKernel
7
+
8
+
9
+ _msg_deprecated_signature_arg = ("Deprecated keyword argument `{0}`. "
10
+ "Signatures should be passed as the first "
11
+ "positional argument.")
12
+
13
+
14
+ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
15
+ opt=True, lineinfo=False, cache=False, **kws):
16
+ """
17
+ JIT compile a Python function for CUDA GPUs.
18
+
19
+ :param func_or_sig: A function to JIT compile, or *signatures* of a
20
+ function to compile. If a function is supplied, then a
21
+ :class:`Dispatcher <numba.cuda.dispatcher.CUDADispatcher>` is returned.
22
+ Otherwise, ``func_or_sig`` may be a signature or a list of signatures,
23
+ and a function is returned. The returned function accepts another
24
+ function, which it will compile and then return a :class:`Dispatcher
25
+ <numba.cuda.dispatcher.CUDADispatcher>`. See :ref:`jit-decorator` for
26
+ more information about passing signatures.
27
+
28
+ .. note:: A kernel cannot have any return value.
29
+ :param device: Indicates whether this is a device function.
30
+ :type device: bool
31
+ :param link: A list of files containing PTX or CUDA C/C++ source to link
32
+ with the function
33
+ :type link: list
34
+ :param debug: If True, check for exceptions thrown when executing the
35
+ kernel. Since this degrades performance, this should only be used for
36
+ debugging purposes. If set to True, then ``opt`` should be set to False.
37
+ Defaults to False. (The default value can be overridden by setting
38
+ environment variable ``NUMBA_CUDA_DEBUGINFO=1``.)
39
+ :param fastmath: When True, enables fastmath optimizations as outlined in
40
+ the :ref:`CUDA Fast Math documentation <cuda-fast-math>`.
41
+ :param max_registers: Request that the kernel is limited to using at most
42
+ this number of registers per thread. The limit may not be respected if
43
+ the ABI requires a greater number of registers than that requested.
44
+ Useful for increasing occupancy.
45
+ :param opt: Whether to compile from LLVM IR to PTX with optimization
46
+ enabled. When ``True``, ``-opt=3`` is passed to NVVM. When
47
+ ``False``, ``-opt=0`` is passed to NVVM. Defaults to ``True``.
48
+ :type opt: bool
49
+ :param lineinfo: If True, generate a line mapping between source code and
50
+ assembly code. This enables inspection of the source code in NVIDIA
51
+ profiling tools and correlation with program counter sampling.
52
+ :type lineinfo: bool
53
+ :param cache: If True, enables the file-based cache for this function.
54
+ :type cache: bool
55
+ """
56
+
57
+ if link and config.ENABLE_CUDASIM:
58
+ raise NotImplementedError('Cannot link PTX in the simulator')
59
+
60
+ if kws.get('boundscheck'):
61
+ raise NotImplementedError("bounds checking is not supported for CUDA")
62
+
63
+ if kws.get('argtypes') is not None:
64
+ msg = _msg_deprecated_signature_arg.format('argtypes')
65
+ raise DeprecationError(msg)
66
+ if kws.get('restype') is not None:
67
+ msg = _msg_deprecated_signature_arg.format('restype')
68
+ raise DeprecationError(msg)
69
+ if kws.get('bind') is not None:
70
+ msg = _msg_deprecated_signature_arg.format('bind')
71
+ raise DeprecationError(msg)
72
+
73
+ debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
74
+ fastmath = kws.get('fastmath', False)
75
+ extensions = kws.get('extensions', [])
76
+
77
+ if debug and opt:
78
+ msg = ("debug=True with opt=True (the default) "
79
+ "is not supported by CUDA. This may result in a crash"
80
+ " - set debug=False or opt=False.")
81
+ warn(NumbaInvalidConfigWarning(msg))
82
+
83
+ if debug and lineinfo:
84
+ msg = ("debug and lineinfo are mutually exclusive. Use debug to get "
85
+ "full debug info (this disables some optimizations), or "
86
+ "lineinfo for line info only with code generation unaffected.")
87
+ warn(NumbaInvalidConfigWarning(msg))
88
+
89
+ if device and kws.get('link'):
90
+ raise ValueError("link keyword invalid for device function")
91
+
92
+ if sigutils.is_signature(func_or_sig):
93
+ signatures = [func_or_sig]
94
+ specialized = True
95
+ elif isinstance(func_or_sig, list):
96
+ signatures = func_or_sig
97
+ specialized = False
98
+ else:
99
+ signatures = None
100
+
101
+ if signatures is not None:
102
+ if config.ENABLE_CUDASIM:
103
+ def jitwrapper(func):
104
+ return FakeCUDAKernel(func, device=device, fastmath=fastmath)
105
+ return jitwrapper
106
+
107
+ def _jit(func):
108
+ targetoptions = kws.copy()
109
+ targetoptions['debug'] = debug
110
+ targetoptions['lineinfo'] = lineinfo
111
+ targetoptions['link'] = link
112
+ targetoptions['opt'] = opt
113
+ targetoptions['fastmath'] = fastmath
114
+ targetoptions['device'] = device
115
+ targetoptions['extensions'] = extensions
116
+
117
+ disp = CUDADispatcher(func, targetoptions=targetoptions)
118
+
119
+ if cache:
120
+ disp.enable_caching()
121
+
122
+ for sig in signatures:
123
+ argtypes, restype = sigutils.normalize_signature(sig)
124
+
125
+ if restype and not device and restype != types.void:
126
+ raise TypeError("CUDA kernel must have void return type.")
127
+
128
+ if device:
129
+ from numba.core import typeinfer
130
+ with typeinfer.register_dispatcher(disp):
131
+ disp.compile_device(argtypes, restype)
132
+ else:
133
+ disp.compile(argtypes)
134
+
135
+ disp._specialized = specialized
136
+ disp.disable_compile()
137
+
138
+ return disp
139
+
140
+ return _jit
141
+ else:
142
+ if func_or_sig is None:
143
+ if config.ENABLE_CUDASIM:
144
+ def autojitwrapper(func):
145
+ return FakeCUDAKernel(func, device=device,
146
+ fastmath=fastmath)
147
+ else:
148
+ def autojitwrapper(func):
149
+ return jit(func, device=device, debug=debug, opt=opt,
150
+ lineinfo=lineinfo, link=link, cache=cache, **kws)
151
+
152
+ return autojitwrapper
153
+ # func_or_sig is a function
154
+ else:
155
+ if config.ENABLE_CUDASIM:
156
+ return FakeCUDAKernel(func_or_sig, device=device,
157
+ fastmath=fastmath)
158
+ else:
159
+ targetoptions = kws.copy()
160
+ targetoptions['debug'] = debug
161
+ targetoptions['lineinfo'] = lineinfo
162
+ targetoptions['opt'] = opt
163
+ targetoptions['link'] = link
164
+ targetoptions['fastmath'] = fastmath
165
+ targetoptions['device'] = device
166
+ targetoptions['extensions'] = extensions
167
+ disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
168
+
169
+ if cache:
170
+ disp.enable_caching()
171
+
172
+ return disp
173
+
174
+
175
+ def declare_device(name, sig):
176
+ """
177
+ Declare the signature of a foreign function. Returns a descriptor that can
178
+ be used to call the function from a Python kernel.
179
+
180
+ :param name: The name of the foreign function.
181
+ :type name: str
182
+ :param sig: The Numba signature of the function.
183
+ """
184
+ argtypes, restype = sigutils.normalize_signature(sig)
185
+ if restype is None:
186
+ msg = 'Return type must be provided for device declarations'
187
+ raise TypeError(msg)
188
+
189
+ return declare_device_function(name, restype, argtypes)
@@ -0,0 +1,33 @@
1
+ from numba.core.descriptors import TargetDescriptor
2
+ from numba.core.options import TargetOptions
3
+ from .target import CUDATargetContext, CUDATypingContext
4
+
5
+
6
+ class CUDATargetOptions(TargetOptions):
7
+ pass
8
+
9
+
10
+ class CUDATarget(TargetDescriptor):
11
+ def __init__(self, name):
12
+ self.options = CUDATargetOptions
13
+ # The typing and target contexts are initialized only when needed -
14
+ # this prevents an attempt to load CUDA libraries at import time on
15
+ # systems that might not have them present.
16
+ self._typingctx = None
17
+ self._targetctx = None
18
+ super().__init__(name)
19
+
20
+ @property
21
+ def typing_context(self):
22
+ if self._typingctx is None:
23
+ self._typingctx = CUDATypingContext()
24
+ return self._typingctx
25
+
26
+ @property
27
+ def target_context(self):
28
+ if self._targetctx is None:
29
+ self._targetctx = CUDATargetContext(self._typingctx)
30
+ return self._targetctx
31
+
32
+
33
+ cuda_target = CUDATarget('cuda')
@@ -0,0 +1,89 @@
1
+ # Re export
2
+ import sys
3
+ from numba.cuda import cg
4
+ from .stubs import (threadIdx, blockIdx, blockDim, gridDim, laneid, warpsize,
5
+ syncwarp, shared, local, const, atomic,
6
+ shfl_sync_intrinsic, vote_sync_intrinsic, match_any_sync,
7
+ match_all_sync, threadfence_block, threadfence_system,
8
+ threadfence, selp, popc, brev, clz, ffs, fma, cbrt,
9
+ activemask, lanemask_lt, nanosleep, fp16,
10
+ _vector_type_stubs)
11
+ from .intrinsics import (grid, gridsize, syncthreads, syncthreads_and,
12
+ syncthreads_count, syncthreads_or)
13
+ from .cudadrv.error import CudaSupportError
14
+ from numba.cuda.cudadrv.driver import (BaseCUDAMemoryManager,
15
+ HostOnlyCUDAMemoryManager,
16
+ GetIpcHandleMixin, MemoryPointer,
17
+ MappedMemory, PinnedMemory, MemoryInfo,
18
+ IpcHandle, set_memory_manager)
19
+ from numba.cuda.cudadrv.runtime import runtime
20
+ from .cudadrv import nvvm
21
+ from numba.cuda import initialize
22
+ from .errors import KernelRuntimeError
23
+
24
+ from .decorators import jit, declare_device
25
+ from .api import *
26
+ from .api import _auto_device
27
+ from .args import In, Out, InOut
28
+
29
+ from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
30
+ shfl_sync, shfl_up_sync, shfl_down_sync,
31
+ shfl_xor_sync)
32
+
33
+ from .kernels import reduction
34
+
35
+ reduce = Reduce = reduction.Reduce
36
+
37
+ # Expose vector type constructors and aliases as module level attributes.
38
+ for vector_type_stub in _vector_type_stubs:
39
+ setattr(sys.modules[__name__], vector_type_stub.__name__, vector_type_stub)
40
+ for alias in vector_type_stub.aliases:
41
+ setattr(sys.modules[__name__], alias, vector_type_stub)
42
+ del vector_type_stub, _vector_type_stubs
43
+
44
+
45
+ def is_available():
46
+ """Returns a boolean to indicate the availability of a CUDA GPU.
47
+
48
+ This will initialize the driver if it hasn't been initialized.
49
+ """
50
+ # whilst `driver.is_available` will init the driver itself,
51
+ # the driver initialization may raise and as a result break
52
+ # test discovery/orchestration as `cuda.is_available` is often
53
+ # used as a guard for whether to run a CUDA test, the try/except
54
+ # below is to handle this case.
55
+ driver_is_available = False
56
+ try:
57
+ driver_is_available = driver.driver.is_available
58
+ except CudaSupportError:
59
+ pass
60
+
61
+ return driver_is_available and nvvm.is_available()
62
+
63
+
64
+ def is_supported_version():
65
+ """Returns True if the CUDA Runtime is a supported version.
66
+
67
+ Unsupported versions (e.g. newer versions than those known to Numba)
68
+ may still work; this function provides a facility to check whether the
69
+ current Numba version is tested and known to work with the current
70
+ runtime version. If the current version is unsupported, the caller can
71
+ decide how to act. Options include:
72
+
73
+ - Continuing silently,
74
+ - Emitting a warning,
75
+ - Generating an error or otherwise preventing the use of CUDA.
76
+ """
77
+
78
+ return runtime.is_supported_version()
79
+
80
+
81
+ def cuda_error():
82
+ """Returns None if there was no error initializing the CUDA driver.
83
+ If there was an error initializing the driver, a string describing the
84
+ error is returned.
85
+ """
86
+ return driver.driver.initialization_error
87
+
88
+
89
+ initialize.initialize_all()