numba-cuda 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.13.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.13.dist-info/METADATA +69 -0
  229. numba_cuda-0.0.13.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,235 @@
1
+ import itertools
2
+ from llvmlite import ir
3
+ from numba.core import cgutils, targetconfig
4
+ from .cudadrv import nvvm
5
+
6
+
7
+ def declare_atomic_cas_int(lmod, isize):
8
+ fname = '___numba_atomic_i' + str(isize) + '_cas_hack'
9
+ fnty = ir.FunctionType(ir.IntType(isize),
10
+ (ir.PointerType(ir.IntType(isize)),
11
+ ir.IntType(isize),
12
+ ir.IntType(isize)))
13
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
14
+
15
+
16
+ def atomic_cmpxchg(builder, lmod, isize, ptr, cmp, val):
17
+ out = builder.cmpxchg(ptr, cmp, val, 'monotonic', 'monotonic')
18
+ return builder.extract_value(out, 0)
19
+
20
+
21
+ def declare_atomic_add_float32(lmod):
22
+ fname = 'llvm.nvvm.atomic.load.add.f32.p0f32'
23
+ fnty = ir.FunctionType(ir.FloatType(),
24
+ (ir.PointerType(ir.FloatType(), 0), ir.FloatType()))
25
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
26
+
27
+
28
+ def declare_atomic_add_float64(lmod):
29
+ flags = targetconfig.ConfigStack().top()
30
+ if flags.compute_capability >= (6, 0):
31
+ fname = 'llvm.nvvm.atomic.load.add.f64.p0f64'
32
+ else:
33
+ fname = '___numba_atomic_double_add'
34
+ fnty = ir.FunctionType(ir.DoubleType(),
35
+ (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
36
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
37
+
38
+
39
+ def declare_atomic_sub_float32(lmod):
40
+ fname = '___numba_atomic_float_sub'
41
+ fnty = ir.FunctionType(ir.FloatType(),
42
+ (ir.PointerType(ir.FloatType()), ir.FloatType()))
43
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
44
+
45
+
46
+ def declare_atomic_sub_float64(lmod):
47
+ fname = '___numba_atomic_double_sub'
48
+ fnty = ir.FunctionType(ir.DoubleType(),
49
+ (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
50
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
51
+
52
+
53
+ def declare_atomic_inc_int32(lmod):
54
+ fname = 'llvm.nvvm.atomic.load.inc.32.p0i32'
55
+ fnty = ir.FunctionType(ir.IntType(32),
56
+ (ir.PointerType(ir.IntType(32)), ir.IntType(32)))
57
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
58
+
59
+
60
+ def declare_atomic_inc_int64(lmod):
61
+ fname = '___numba_atomic_u64_inc'
62
+ fnty = ir.FunctionType(ir.IntType(64),
63
+ (ir.PointerType(ir.IntType(64)), ir.IntType(64)))
64
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
65
+
66
+
67
+ def declare_atomic_dec_int32(lmod):
68
+ fname = 'llvm.nvvm.atomic.load.dec.32.p0i32'
69
+ fnty = ir.FunctionType(ir.IntType(32),
70
+ (ir.PointerType(ir.IntType(32)), ir.IntType(32)))
71
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
72
+
73
+
74
+ def declare_atomic_dec_int64(lmod):
75
+ fname = '___numba_atomic_u64_dec'
76
+ fnty = ir.FunctionType(ir.IntType(64),
77
+ (ir.PointerType(ir.IntType(64)), ir.IntType(64)))
78
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
79
+
80
+
81
+ def declare_atomic_max_float32(lmod):
82
+ fname = '___numba_atomic_float_max'
83
+ fnty = ir.FunctionType(ir.FloatType(),
84
+ (ir.PointerType(ir.FloatType()), ir.FloatType()))
85
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
86
+
87
+
88
+ def declare_atomic_max_float64(lmod):
89
+ fname = '___numba_atomic_double_max'
90
+ fnty = ir.FunctionType(ir.DoubleType(),
91
+ (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
92
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
93
+
94
+
95
+ def declare_atomic_min_float32(lmod):
96
+ fname = '___numba_atomic_float_min'
97
+ fnty = ir.FunctionType(ir.FloatType(),
98
+ (ir.PointerType(ir.FloatType()), ir.FloatType()))
99
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
100
+
101
+
102
+ def declare_atomic_min_float64(lmod):
103
+ fname = '___numba_atomic_double_min'
104
+ fnty = ir.FunctionType(ir.DoubleType(),
105
+ (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
106
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
107
+
108
+
109
+ def declare_atomic_nanmax_float32(lmod):
110
+ fname = '___numba_atomic_float_nanmax'
111
+ fnty = ir.FunctionType(ir.FloatType(),
112
+ (ir.PointerType(ir.FloatType()), ir.FloatType()))
113
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
114
+
115
+
116
+ def declare_atomic_nanmax_float64(lmod):
117
+ fname = '___numba_atomic_double_nanmax'
118
+ fnty = ir.FunctionType(ir.DoubleType(),
119
+ (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
120
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
121
+
122
+
123
+ def declare_atomic_nanmin_float32(lmod):
124
+ fname = '___numba_atomic_float_nanmin'
125
+ fnty = ir.FunctionType(ir.FloatType(),
126
+ (ir.PointerType(ir.FloatType()), ir.FloatType()))
127
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
128
+
129
+
130
+ def declare_atomic_nanmin_float64(lmod):
131
+ fname = '___numba_atomic_double_nanmin'
132
+ fnty = ir.FunctionType(ir.DoubleType(),
133
+ (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
134
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
135
+
136
+
137
+ def declare_cudaCGGetIntrinsicHandle(lmod):
138
+ fname = 'cudaCGGetIntrinsicHandle'
139
+ fnty = ir.FunctionType(ir.IntType(64),
140
+ (ir.IntType(32),))
141
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
142
+
143
+
144
+ def declare_cudaCGSynchronize(lmod):
145
+ fname = 'cudaCGSynchronize'
146
+ fnty = ir.FunctionType(ir.IntType(32),
147
+ (ir.IntType(64), ir.IntType(32)))
148
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
149
+
150
+
151
+ def declare_string(builder, value):
152
+ lmod = builder.basic_block.function.module
153
+ cval = cgutils.make_bytearray(value.encode("utf-8") + b"\x00")
154
+ gl = cgutils.add_global_variable(lmod, cval.type, name="_str",
155
+ addrspace=nvvm.ADDRSPACE_CONSTANT)
156
+ gl.linkage = 'internal'
157
+ gl.global_constant = True
158
+ gl.initializer = cval
159
+
160
+ return builder.addrspacecast(gl, ir.PointerType(ir.IntType(8)), 'generic')
161
+
162
+
163
+ def declare_vprint(lmod):
164
+ voidptrty = ir.PointerType(ir.IntType(8))
165
+ # NOTE: the second argument to vprintf() points to the variable-length
166
+ # array of arguments (after the format)
167
+ vprintfty = ir.FunctionType(ir.IntType(32), [voidptrty, voidptrty])
168
+ vprintf = cgutils.get_or_insert_function(lmod, vprintfty, "vprintf")
169
+ return vprintf
170
+
171
+
172
+ # -----------------------------------------------------------------------------
173
+
174
+ SREG_MAPPING = {
175
+ 'tid.x': 'llvm.nvvm.read.ptx.sreg.tid.x',
176
+ 'tid.y': 'llvm.nvvm.read.ptx.sreg.tid.y',
177
+ 'tid.z': 'llvm.nvvm.read.ptx.sreg.tid.z',
178
+
179
+ 'ntid.x': 'llvm.nvvm.read.ptx.sreg.ntid.x',
180
+ 'ntid.y': 'llvm.nvvm.read.ptx.sreg.ntid.y',
181
+ 'ntid.z': 'llvm.nvvm.read.ptx.sreg.ntid.z',
182
+
183
+ 'ctaid.x': 'llvm.nvvm.read.ptx.sreg.ctaid.x',
184
+ 'ctaid.y': 'llvm.nvvm.read.ptx.sreg.ctaid.y',
185
+ 'ctaid.z': 'llvm.nvvm.read.ptx.sreg.ctaid.z',
186
+
187
+ 'nctaid.x': 'llvm.nvvm.read.ptx.sreg.nctaid.x',
188
+ 'nctaid.y': 'llvm.nvvm.read.ptx.sreg.nctaid.y',
189
+ 'nctaid.z': 'llvm.nvvm.read.ptx.sreg.nctaid.z',
190
+
191
+ 'warpsize': 'llvm.nvvm.read.ptx.sreg.warpsize',
192
+ 'laneid': 'llvm.nvvm.read.ptx.sreg.laneid',
193
+ }
194
+
195
+
196
+ def call_sreg(builder, name):
197
+ module = builder.module
198
+ fnty = ir.FunctionType(ir.IntType(32), ())
199
+ fn = cgutils.get_or_insert_function(module, fnty, SREG_MAPPING[name])
200
+ return builder.call(fn, ())
201
+
202
+
203
+ class SRegBuilder(object):
204
+ def __init__(self, builder):
205
+ self.builder = builder
206
+
207
+ def tid(self, xyz):
208
+ return call_sreg(self.builder, 'tid.%s' % xyz)
209
+
210
+ def ctaid(self, xyz):
211
+ return call_sreg(self.builder, 'ctaid.%s' % xyz)
212
+
213
+ def ntid(self, xyz):
214
+ return call_sreg(self.builder, 'ntid.%s' % xyz)
215
+
216
+ def nctaid(self, xyz):
217
+ return call_sreg(self.builder, 'nctaid.%s' % xyz)
218
+
219
+ def getdim(self, xyz):
220
+ i64 = ir.IntType(64)
221
+ tid = self.builder.sext(self.tid(xyz), i64)
222
+ ntid = self.builder.sext(self.ntid(xyz), i64)
223
+ nctaid = self.builder.sext(self.ctaid(xyz), i64)
224
+ res = self.builder.add(self.builder.mul(ntid, nctaid), tid)
225
+ return res
226
+
227
+
228
+ def get_global_id(builder, dim):
229
+ sreg = SRegBuilder(builder)
230
+ it = (sreg.getdim(xyz) for xyz in 'xyz')
231
+ seq = list(itertools.islice(it, None, dim))
232
+ if dim == 1:
233
+ return seq[0]
234
+ else:
235
+ return seq
@@ -0,0 +1,86 @@
1
+ from functools import singledispatch
2
+ from llvmlite import ir
3
+ from numba.core import types, cgutils
4
+ from numba.core.errors import NumbaWarning
5
+ from numba.core.imputils import Registry
6
+ from numba.cuda import nvvmutils
7
+ from warnings import warn
8
+
9
+ registry = Registry()
10
+ lower = registry.lower
11
+
12
+ voidptr = ir.PointerType(ir.IntType(8))
13
+
14
+
15
+ # NOTE: we don't use @lower here since print_item() doesn't return a LLVM value
16
+
17
+ @singledispatch
18
+ def print_item(ty, context, builder, val):
19
+ """
20
+ Handle printing of a single value of the given Numba type.
21
+ A (format string, [list of arguments]) is returned that will allow
22
+ forming the final printf()-like call.
23
+ """
24
+ raise NotImplementedError("printing unimplemented for values of type %s"
25
+ % (ty,))
26
+
27
+
28
+ @print_item.register(types.Integer)
29
+ @print_item.register(types.IntegerLiteral)
30
+ def int_print_impl(ty, context, builder, val):
31
+ if ty in types.unsigned_domain:
32
+ rawfmt = "%llu"
33
+ dsttype = types.uint64
34
+ else:
35
+ rawfmt = "%lld"
36
+ dsttype = types.int64
37
+ lld = context.cast(builder, val, ty, dsttype)
38
+ return rawfmt, [lld]
39
+
40
+
41
+ @print_item.register(types.Float)
42
+ def real_print_impl(ty, context, builder, val):
43
+ lld = context.cast(builder, val, ty, types.float64)
44
+ return "%f", [lld]
45
+
46
+
47
+ @print_item.register(types.StringLiteral)
48
+ def const_print_impl(ty, context, builder, sigval):
49
+ pyval = ty.literal_value
50
+ assert isinstance(pyval, str) # Ensured by lowering
51
+ rawfmt = "%s"
52
+ val = context.insert_string_const_addrspace(builder, pyval)
53
+ return rawfmt, [val]
54
+
55
+
56
+ @lower(print, types.VarArg(types.Any))
57
+ def print_varargs(context, builder, sig, args):
58
+ """This function is a generic 'print' wrapper for arbitrary types.
59
+ It dispatches to the appropriate 'print' implementations above
60
+ depending on the detected real types in the signature."""
61
+
62
+ vprint = nvvmutils.declare_vprint(builder.module)
63
+
64
+ formats = []
65
+ values = []
66
+
67
+ for i, (argtype, argval) in enumerate(zip(sig.args, args)):
68
+ argfmt, argvals = print_item(argtype, context, builder, argval)
69
+ formats.append(argfmt)
70
+ values.extend(argvals)
71
+
72
+ rawfmt = " ".join(formats) + "\n"
73
+ if len(args) > 32:
74
+ msg = ('CUDA print() cannot print more than 32 items. '
75
+ 'The raw format string will be emitted by the kernel instead.')
76
+ warn(msg, NumbaWarning)
77
+
78
+ rawfmt = rawfmt.replace('%', '%%')
79
+ fmt = context.insert_string_const_addrspace(builder, rawfmt)
80
+ array = cgutils.make_anonymous_struct(builder, values)
81
+ arrayptr = cgutils.alloca_once_value(builder, array)
82
+
83
+ vprint = nvvmutils.declare_vprint(builder.module)
84
+ builder.call(vprint, (fmt, builder.bitcast(arrayptr, voidptr)))
85
+
86
+ return context.get_dummy_value()
@@ -0,0 +1,292 @@
1
+ import math
2
+
3
+ from numba import (config, cuda, float32, float64, uint32, int64, uint64,
4
+ from_dtype, jit)
5
+
6
+ import numpy as np
7
+
8
+ # This implementation is based upon the xoroshiro128+ and splitmix64 algorithms
9
+ # described at:
10
+ #
11
+ # http://xoroshiro.di.unimi.it/
12
+ #
13
+ # and originally implemented by David Blackman and Sebastiano Vigna.
14
+ #
15
+ # The implementations below are based on the C source code:
16
+ #
17
+ # * http://xoroshiro.di.unimi.it/xoroshiro128plus.c
18
+ # * http://xoroshiro.di.unimi.it/splitmix64.c
19
+ #
20
+ # Splitmix64 is used to generate the initial state of the xoroshiro128+
21
+ # generator to ensure that small seeds don't result in predictable output.
22
+
23
+ # **WARNING**: There is a lot of verbose casting in this file to ensure that
24
+ # NumPy casting conventions (which cast uint64 [op] int32 to float64) don't
25
+ # turn integers into floats when using these functions in the CUDA simulator.
26
+ #
27
+ # There are also no function type signatures to ensure that compilation is
28
+ # deferred so that import is quick, and Sphinx autodoc works. We are also
29
+ # using the CPU @jit decorator everywhere to create functions that work as
30
+ # both CPU and CUDA device functions.
31
+
32
+ xoroshiro128p_dtype = np.dtype([('s0', np.uint64), ('s1', np.uint64)],
33
+ align=True)
34
+ xoroshiro128p_type = from_dtype(xoroshiro128p_dtype)
35
+
36
+ # When cudasim is enabled, Fake CUDA arrays are passed to some of the
37
+ # @jit-decorated functions. This required fallback to object mode. With
38
+ # Numba 0.59.0 object mode must be explicitly enabled.
39
+ # https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit
40
+ # In order to avoid the warning / future error, we explicitly specify that
41
+ # object mode with loop lifting is acceptable when using the simulator.
42
+ _forceobj = _looplift = config.ENABLE_CUDASIM
43
+ _nopython = not config.ENABLE_CUDASIM
44
+
45
+
46
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
47
+ def init_xoroshiro128p_state(states, index, seed):
48
+ '''Use SplitMix64 to generate an xoroshiro128p state from 64-bit seed.
49
+
50
+ This ensures that manually set small seeds don't result in a predictable
51
+ initial sequence from the random number generator.
52
+
53
+ :type states: 1D array, dtype=xoroshiro128p_dtype
54
+ :param states: array of RNG states
55
+ :type index: uint64
56
+ :param index: offset in states to update
57
+ :type seed: int64
58
+ :param seed: seed value to use when initializing state
59
+ '''
60
+ index = int64(index)
61
+ seed = uint64(seed)
62
+
63
+ z = seed + uint64(0x9E3779B97F4A7C15)
64
+ z = (z ^ (z >> uint32(30))) * uint64(0xBF58476D1CE4E5B9)
65
+ z = (z ^ (z >> uint32(27))) * uint64(0x94D049BB133111EB)
66
+ z = z ^ (z >> uint32(31))
67
+
68
+ states[index]['s0'] = z
69
+ states[index]['s1'] = z
70
+
71
+
72
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
73
+ def rotl(x, k):
74
+ '''Left rotate x by k bits.'''
75
+ x = uint64(x)
76
+ k = uint32(k)
77
+ return (x << k) | (x >> uint32(64 - k))
78
+
79
+
80
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
81
+ def xoroshiro128p_next(states, index):
82
+ '''Return the next random uint64 and advance the RNG in states[index].
83
+
84
+ :type states: 1D array, dtype=xoroshiro128p_dtype
85
+ :param states: array of RNG states
86
+ :type index: int64
87
+ :param index: offset in states to update
88
+ :rtype: uint64
89
+ '''
90
+ index = int64(index)
91
+ s0 = states[index]['s0']
92
+ s1 = states[index]['s1']
93
+ result = s0 + s1
94
+
95
+ s1 ^= s0
96
+ states[index]['s0'] = uint64(rotl(s0, uint32(55))) ^ s1 ^ (s1 << uint32(14))
97
+ states[index]['s1'] = uint64(rotl(s1, uint32(36)))
98
+
99
+ return result
100
+
101
+
102
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
103
+ def xoroshiro128p_jump(states, index):
104
+ '''Advance the RNG in ``states[index]`` by 2**64 steps.
105
+
106
+ :type states: 1D array, dtype=xoroshiro128p_dtype
107
+ :param states: array of RNG states
108
+ :type index: int64
109
+ :param index: offset in states to update
110
+ '''
111
+ index = int64(index)
112
+
113
+ jump = (uint64(0xbeac0467eba5facb), uint64(0xd86b048b86aa9922))
114
+
115
+ s0 = uint64(0)
116
+ s1 = uint64(0)
117
+
118
+ for i in range(2):
119
+ for b in range(64):
120
+ if jump[i] & (uint64(1) << uint32(b)):
121
+ s0 ^= states[index]['s0']
122
+ s1 ^= states[index]['s1']
123
+ xoroshiro128p_next(states, index)
124
+
125
+ states[index]['s0'] = s0
126
+ states[index]['s1'] = s1
127
+
128
+
129
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
130
+ def uint64_to_unit_float64(x):
131
+ '''Convert uint64 to float64 value in the range [0.0, 1.0)'''
132
+ x = uint64(x)
133
+ return (x >> uint32(11)) * (float64(1) / (uint64(1) << uint32(53)))
134
+
135
+
136
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
137
+ def uint64_to_unit_float32(x):
138
+ '''Convert uint64 to float32 value in the range [0.0, 1.0)'''
139
+ x = uint64(x)
140
+ return float32(uint64_to_unit_float64(x))
141
+
142
+
143
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
144
+ def xoroshiro128p_uniform_float32(states, index):
145
+ '''Return a float32 in range [0.0, 1.0) and advance ``states[index]``.
146
+
147
+ :type states: 1D array, dtype=xoroshiro128p_dtype
148
+ :param states: array of RNG states
149
+ :type index: int64
150
+ :param index: offset in states to update
151
+ :rtype: float32
152
+ '''
153
+ index = int64(index)
154
+ return uint64_to_unit_float32(xoroshiro128p_next(states, index))
155
+
156
+
157
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
158
+ def xoroshiro128p_uniform_float64(states, index):
159
+ '''Return a float64 in range [0.0, 1.0) and advance ``states[index]``.
160
+
161
+ :type states: 1D array, dtype=xoroshiro128p_dtype
162
+ :param states: array of RNG states
163
+ :type index: int64
164
+ :param index: offset in states to update
165
+ :rtype: float64
166
+ '''
167
+ index = int64(index)
168
+ return uint64_to_unit_float64(xoroshiro128p_next(states, index))
169
+
170
+
171
+ TWO_PI_FLOAT32 = np.float32(2 * math.pi)
172
+ TWO_PI_FLOAT64 = np.float64(2 * math.pi)
173
+
174
+
175
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
176
+ def xoroshiro128p_normal_float32(states, index):
177
+ '''Return a normally distributed float32 and advance ``states[index]``.
178
+
179
+ The return value is drawn from a Gaussian of mean=0 and sigma=1 using the
180
+ Box-Muller transform. This advances the RNG sequence by two steps.
181
+
182
+ :type states: 1D array, dtype=xoroshiro128p_dtype
183
+ :param states: array of RNG states
184
+ :type index: int64
185
+ :param index: offset in states to update
186
+ :rtype: float32
187
+ '''
188
+ index = int64(index)
189
+
190
+ u1 = xoroshiro128p_uniform_float32(states, index)
191
+ u2 = xoroshiro128p_uniform_float32(states, index)
192
+
193
+ z0 = math.sqrt(-float32(2.0) * math.log(u1)) * math.cos(TWO_PI_FLOAT32 * u2)
194
+ # discarding second normal value
195
+ # z1 = math.sqrt(-float32(2.0) * math.log(u1))
196
+ # * math.sin(TWO_PI_FLOAT32 * u2)
197
+ return z0
198
+
199
+
200
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
201
+ def xoroshiro128p_normal_float64(states, index):
202
+ '''Return a normally distributed float32 and advance ``states[index]``.
203
+
204
+ The return value is drawn from a Gaussian of mean=0 and sigma=1 using the
205
+ Box-Muller transform. This advances the RNG sequence by two steps.
206
+
207
+ :type states: 1D array, dtype=xoroshiro128p_dtype
208
+ :param states: array of RNG states
209
+ :type index: int64
210
+ :param index: offset in states to update
211
+ :rtype: float64
212
+ '''
213
+ index = int64(index)
214
+
215
+ u1 = xoroshiro128p_uniform_float32(states, index)
216
+ u2 = xoroshiro128p_uniform_float32(states, index)
217
+
218
+ z0 = math.sqrt(-float64(2.0) * math.log(u1)) * math.cos(TWO_PI_FLOAT64 * u2)
219
+ # discarding second normal value
220
+ # z1 = math.sqrt(-float64(2.0) * math.log(u1))
221
+ # * math.sin(TWO_PI_FLOAT64 * u2)
222
+ return z0
223
+
224
+
225
+ @jit(forceobj=_forceobj, looplift=_looplift, nopython=_nopython)
226
+ def init_xoroshiro128p_states_cpu(states, seed, subsequence_start):
227
+ n = states.shape[0]
228
+ seed = uint64(seed)
229
+ subsequence_start = uint64(subsequence_start)
230
+
231
+ if n >= 1:
232
+ init_xoroshiro128p_state(states, 0, seed)
233
+
234
+ # advance to starting subsequence number
235
+ for _ in range(subsequence_start):
236
+ xoroshiro128p_jump(states, 0)
237
+
238
+ # populate the rest of the array
239
+ for i in range(1, n):
240
+ states[i] = states[i - 1] # take state of previous generator
241
+ xoroshiro128p_jump(states, i) # and jump forward 2**64 steps
242
+
243
+
244
+ def init_xoroshiro128p_states(states, seed, subsequence_start=0, stream=0):
245
+ '''Initialize RNG states on the GPU for parallel generators.
246
+
247
+ This initializes the RNG states so that each state in the array corresponds
248
+ subsequences in the separated by 2**64 steps from each other in the main
249
+ sequence. Therefore, as long no CUDA thread requests more than 2**64
250
+ random numbers, all of the RNG states produced by this function are
251
+ guaranteed to be independent.
252
+
253
+ The subsequence_start parameter can be used to advance the first RNG state
254
+ by a multiple of 2**64 steps.
255
+
256
+ :type states: 1D DeviceNDArray, dtype=xoroshiro128p_dtype
257
+ :param states: array of RNG states
258
+ :type seed: uint64
259
+ :param seed: starting seed for list of generators
260
+ '''
261
+
262
+ # Initialization on CPU is much faster than the GPU
263
+ states_cpu = np.empty(shape=states.shape, dtype=xoroshiro128p_dtype)
264
+ init_xoroshiro128p_states_cpu(states_cpu, seed, subsequence_start)
265
+
266
+ states.copy_to_device(states_cpu, stream=stream)
267
+
268
+
269
+ def create_xoroshiro128p_states(n, seed, subsequence_start=0, stream=0):
270
+ '''Returns a new device array initialized for n random number generators.
271
+
272
+ This initializes the RNG states so that each state in the array corresponds
273
+ subsequences in the separated by 2**64 steps from each other in the main
274
+ sequence. Therefore, as long no CUDA thread requests more than 2**64
275
+ random numbers, all of the RNG states produced by this function are
276
+ guaranteed to be independent.
277
+
278
+ The subsequence_start parameter can be used to advance the first RNG state
279
+ by a multiple of 2**64 steps.
280
+
281
+ :type n: int
282
+ :param n: number of RNG states to create
283
+ :type seed: uint64
284
+ :param seed: starting seed for list of generators
285
+ :type subsequence_start: uint64
286
+ :param subsequence_start:
287
+ :type stream: CUDA stream
288
+ :param stream: stream to run initialization kernel on
289
+ '''
290
+ states = cuda.device_array(n, dtype=xoroshiro128p_dtype, stream=stream)
291
+ init_xoroshiro128p_states(states, seed, subsequence_start, stream)
292
+ return states
@@ -0,0 +1,38 @@
1
+ import sys
2
+
3
+ from .api import *
4
+ from .vector_types import vector_types
5
+ from .reduction import Reduce
6
+ from .cudadrv.devicearray import (device_array, device_array_like, pinned,
7
+ pinned_array, pinned_array_like,
8
+ mapped_array, to_device, auto_device)
9
+ from .cudadrv import devicearray
10
+ from .cudadrv.devices import require_context, gpus
11
+ from .cudadrv.devices import get_context as current_context
12
+ from .cudadrv.runtime import runtime
13
+ from numba.core import config
14
+ reduce = Reduce
15
+
16
+ # Register simulated vector types as module level variables
17
+ for name, svty in vector_types.items():
18
+ setattr(sys.modules[__name__], name, svty)
19
+ for alias in svty.aliases:
20
+ setattr(sys.modules[__name__], alias, svty)
21
+ del vector_types, name, svty, alias
22
+
23
+ # Ensure that any user code attempting to import cudadrv etc. gets the
24
+ # simulator's version and not the real version if the simulator is enabled.
25
+ if config.ENABLE_CUDASIM:
26
+ import sys
27
+ from numba.cuda.simulator import cudadrv
28
+ sys.modules['numba.cuda.cudadrv'] = cudadrv
29
+ sys.modules['numba.cuda.cudadrv.devicearray'] = cudadrv.devicearray
30
+ sys.modules['numba.cuda.cudadrv.devices'] = cudadrv.devices
31
+ sys.modules['numba.cuda.cudadrv.driver'] = cudadrv.driver
32
+ sys.modules['numba.cuda.cudadrv.runtime'] = cudadrv.runtime
33
+ sys.modules['numba.cuda.cudadrv.drvapi'] = cudadrv.drvapi
34
+ sys.modules['numba.cuda.cudadrv.error'] = cudadrv.error
35
+ sys.modules['numba.cuda.cudadrv.nvvm'] = cudadrv.nvvm
36
+
37
+ from . import compiler
38
+ sys.modules['numba.cuda.compiler'] = compiler