numba-cuda 0.0.1__tar.gz → 0.0.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. numba_cuda-0.0.16/LICENSE +25 -0
  2. numba_cuda-0.0.16/PKG-INFO +51 -0
  3. numba_cuda-0.0.16/README.md +36 -0
  4. numba_cuda-0.0.16/numba_cuda/VERSION +1 -0
  5. numba_cuda-0.0.16/numba_cuda/__init__.py +5 -0
  6. numba_cuda-0.0.16/numba_cuda/_version.py +19 -0
  7. numba_cuda-0.0.16/numba_cuda/numba/cuda/__init__.py +22 -0
  8. numba_cuda-0.0.16/numba_cuda/numba/cuda/api.py +526 -0
  9. numba_cuda-0.0.16/numba_cuda/numba/cuda/api_util.py +30 -0
  10. numba_cuda-0.0.16/numba_cuda/numba/cuda/args.py +77 -0
  11. numba_cuda-0.0.16/numba_cuda/numba/cuda/cg.py +62 -0
  12. numba_cuda-0.0.16/numba_cuda/numba/cuda/codegen.py +378 -0
  13. numba_cuda-0.0.16/numba_cuda/numba/cuda/compiler.py +425 -0
  14. numba_cuda-0.0.16/numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  15. numba_cuda-0.0.16/numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  16. numba_cuda-0.0.16/numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  17. numba_cuda-0.0.16/numba_cuda/numba/cuda/cuda_paths.py +258 -0
  18. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadecl.py +806 -0
  19. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  20. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  21. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  22. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  23. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  24. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  25. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  26. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  27. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  28. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  29. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  30. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  31. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  32. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  33. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  34. numba_cuda-0.0.16/numba_cuda/numba/cuda/cudamath.py +140 -0
  35. numba_cuda-0.0.16/numba_cuda/numba/cuda/decorators.py +190 -0
  36. numba_cuda-0.0.16/numba_cuda/numba/cuda/descriptor.py +33 -0
  37. numba_cuda-0.0.16/numba_cuda/numba/cuda/device_init.py +89 -0
  38. numba_cuda-0.0.16/numba_cuda/numba/cuda/deviceufunc.py +908 -0
  39. numba_cuda-0.0.16/numba_cuda/numba/cuda/dispatcher.py +1057 -0
  40. numba_cuda-0.0.16/numba_cuda/numba/cuda/errors.py +59 -0
  41. numba_cuda-0.0.16/numba_cuda/numba/cuda/extending.py +7 -0
  42. numba_cuda-0.0.16/numba_cuda/numba/cuda/initialize.py +13 -0
  43. numba_cuda-0.0.16/numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  44. numba_cuda-0.0.16/numba_cuda/numba/cuda/intrinsics.py +198 -0
  45. numba_cuda-0.0.16/numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda-0.0.16/numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda-0.0.16/numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda-0.0.16/numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda-0.0.16/numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda-0.0.16/numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda-0.0.16/numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda-0.0.16/numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda-0.0.16/numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda-0.0.16/numba_cuda/numba/cuda/printimpl.py +96 -0
  55. numba_cuda-0.0.16/numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/api.py +113 -0
  58. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda-0.0.16/numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda-0.0.16/numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda-0.0.16/numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda-0.0.16/numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +201 -0
  100. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1614 -0
  118. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_print.py +148 -0
  172. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +177 -0
  193. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda-0.0.16/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda-0.0.16/numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda-0.0.16/numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda-0.0.16/numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda-0.0.16/numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.16/numba_cuda.egg-info/PKG-INFO +51 -0
  228. numba_cuda-0.0.16/numba_cuda.egg-info/SOURCES.txt +235 -0
  229. numba_cuda-0.0.16/numba_cuda.egg-info/requires.txt +1 -0
  230. numba_cuda-0.0.16/pyproject.toml +39 -0
  231. numba_cuda-0.0.16/setup.py +70 -0
  232. numba_cuda-0.0.16/site-packages/_numba_cuda_redirector.pth +1 -0
  233. numba_cuda-0.0.16/site-packages/_numba_cuda_redirector.py +76 -0
  234. numba_cuda-0.0.1/PKG-INFO +0 -10
  235. numba_cuda-0.0.1/README.md +0 -3
  236. numba_cuda-0.0.1/numba_cuda.egg-info/PKG-INFO +0 -10
  237. numba_cuda-0.0.1/numba_cuda.egg-info/SOURCES.txt +0 -7
  238. numba_cuda-0.0.1/pyproject.toml +0 -8
  239. {numba_cuda-0.0.1/numba_cuda → numba_cuda-0.0.16/numba_cuda/numba/cuda/kernels}/__init__.py +0 -0
  240. {numba_cuda-0.0.1 → numba_cuda-0.0.16}/numba_cuda.egg-info/dependency_links.txt +0 -0
  241. {numba_cuda-0.0.1 → numba_cuda-0.0.16}/numba_cuda.egg-info/top_level.txt +0 -0
  242. {numba_cuda-0.0.1 → numba_cuda-0.0.16}/setup.cfg +0 -0
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2012, Anaconda, Inc.
2
+ Copyright (c) 2024, NVIDIA CORPORATION.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are
7
+ met:
8
+
9
+ Redistributions of source code must retain the above copyright notice,
10
+ this list of conditions and the following disclaimer.
11
+
12
+ Redistributions in binary form must reproduce the above copyright
13
+ notice, this list of conditions and the following disclaimer in the
14
+ documentation and/or other materials provided with the distribution.
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.1
2
+ Name: numba-cuda
3
+ Version: 0.0.16
4
+ Summary: CUDA target for Numba
5
+ Author: Anaconda Inc., NVIDIA Corporation
6
+ License: BSD 2-clause
7
+ Project-URL: Homepage, https://github.com/rapidsai/numba-cuda
8
+ Project-URL: Documentation, https://github.com/rapidsai/numba-cuda/blob/main/README.md
9
+ Project-URL: Repository, https://github.com/rapidsai/numba-cuda
10
+ Project-URL: License, https://github.com/rapidsai/numba-cuda/blob/main/LICENSE
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numba>=0.59.1
15
+
16
+ # Numba CUDA Target
17
+
18
+ An out-of-tree CUDA target for Numba.
19
+
20
+ This contains an entire copy of Numba's CUDA target (the `numba.cuda` module),
21
+ and a mechanism to ensure the code from this module (`numba_cuda.numba.cuda`) is
22
+ used as the `numba.cuda` module instead of the code from the `numba` package.
23
+
24
+ This is presently in an early state and is published for testing and feedback.
25
+
26
+ ## Building / testing
27
+
28
+ Install as an editable install:
29
+
30
+ ```
31
+ pip install -e .
32
+ ```
33
+
34
+ Running tests:
35
+
36
+ ```
37
+ python -m numba.runtests numba.cuda.tests
38
+ ```
39
+
40
+ This should discover the`numba.cuda` module from the `numba_cuda` package. You
41
+ can check where `numba.cuda` files are being located by running
42
+
43
+ ```
44
+ python -c "from numba import cuda; print(cuda.__file__)"
45
+ ```
46
+
47
+ which will show a path like:
48
+
49
+ ```
50
+ <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
51
+ ```
@@ -0,0 +1,36 @@
1
+ # Numba CUDA Target
2
+
3
+ An out-of-tree CUDA target for Numba.
4
+
5
+ This contains an entire copy of Numba's CUDA target (the `numba.cuda` module),
6
+ and a mechanism to ensure the code from this module (`numba_cuda.numba.cuda`) is
7
+ used as the `numba.cuda` module instead of the code from the `numba` package.
8
+
9
+ This is presently in an early state and is published for testing and feedback.
10
+
11
+ ## Building / testing
12
+
13
+ Install as an editable install:
14
+
15
+ ```
16
+ pip install -e .
17
+ ```
18
+
19
+ Running tests:
20
+
21
+ ```
22
+ python -m numba.runtests numba.cuda.tests
23
+ ```
24
+
25
+ This should discover the`numba.cuda` module from the `numba_cuda` package. You
26
+ can check where `numba.cuda` files are being located by running
27
+
28
+ ```
29
+ python -c "from numba import cuda; print(cuda.__file__)"
30
+ ```
31
+
32
+ which will show a path like:
33
+
34
+ ```
35
+ <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
36
+ ```
@@ -0,0 +1 @@
1
+ 0.0.16
@@ -0,0 +1,5 @@
1
+ # Copyright (c) 2024, NVIDIA CORPORATION.
2
+
3
+ from numba_cuda._version import __version__
4
+
5
+ __all__ = ["__version__"]
@@ -0,0 +1,19 @@
1
+ # Copyright (c) 2024, NVIDIA CORPORATION.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import importlib.resources
16
+
17
+ __version__ = (
18
+ importlib.resources.files("numba_cuda").joinpath("VERSION").read_text().strip()
19
+ )
@@ -0,0 +1,22 @@
1
+ from numba import runtests
2
+ from numba.core import config
3
+
4
+ if config.ENABLE_CUDASIM:
5
+ from .simulator_init import *
6
+ else:
7
+ from .device_init import *
8
+ from .device_init import _auto_device
9
+
10
+ from numba.cuda.compiler import (compile, compile_for_current_device,
11
+ compile_ptx, compile_ptx_for_current_device)
12
+
13
+ # This is the out-of-tree NVIDIA-maintained target. This is reported in Numba
14
+ # sysinfo (`numba -s`):
15
+ implementation = "NVIDIA"
16
+
17
+
18
+ def test(*args, **kwargs):
19
+ if not is_available():
20
+ raise cuda_error()
21
+
22
+ return runtests.main("numba.cuda.tests", *args, **kwargs)
@@ -0,0 +1,526 @@
1
+ """
2
+ API that are reported to numba.cuda
3
+ """
4
+
5
+
6
+ import contextlib
7
+ import os
8
+
9
+ import numpy as np
10
+
11
+ from .cudadrv import devicearray, devices, driver
12
+ from numba.core import config
13
+ from numba.cuda.api_util import prepare_shape_strides_dtype
14
+
15
+ # NDarray device helper
16
+
17
+ require_context = devices.require_context
18
+ current_context = devices.get_context
19
+ gpus = devices.gpus
20
+
21
+
22
+ @require_context
23
+ def from_cuda_array_interface(desc, owner=None, sync=True):
24
+ """Create a DeviceNDArray from a cuda-array-interface description.
25
+ The ``owner`` is the owner of the underlying memory.
26
+ The resulting DeviceNDArray will acquire a reference from it.
27
+
28
+ If ``sync`` is ``True``, then the imported stream (if present) will be
29
+ synchronized.
30
+ """
31
+ version = desc.get('version')
32
+ # Mask introduced in version 1
33
+ if 1 <= version:
34
+ mask = desc.get('mask')
35
+ # Would ideally be better to detect if the mask is all valid
36
+ if mask is not None:
37
+ raise NotImplementedError('Masked arrays are not supported')
38
+
39
+ shape = desc['shape']
40
+ strides = desc.get('strides')
41
+ dtype = np.dtype(desc['typestr'])
42
+
43
+ shape, strides, dtype = prepare_shape_strides_dtype(
44
+ shape, strides, dtype, order='C')
45
+ size = driver.memory_size_from_info(shape, strides, dtype.itemsize)
46
+
47
+ devptr = driver.get_devptr_for_active_ctx(desc['data'][0])
48
+ data = driver.MemoryPointer(
49
+ current_context(), devptr, size=size, owner=owner)
50
+ stream_ptr = desc.get('stream', None)
51
+ if stream_ptr is not None:
52
+ stream = external_stream(stream_ptr)
53
+ if sync and config.CUDA_ARRAY_INTERFACE_SYNC:
54
+ stream.synchronize()
55
+ else:
56
+ stream = 0 # No "Numba default stream", not the CUDA default stream
57
+ da = devicearray.DeviceNDArray(shape=shape, strides=strides,
58
+ dtype=dtype, gpu_data=data,
59
+ stream=stream)
60
+ return da
61
+
62
+
63
+ def as_cuda_array(obj, sync=True):
64
+ """Create a DeviceNDArray from any object that implements
65
+ the :ref:`cuda array interface <cuda-array-interface>`.
66
+
67
+ A view of the underlying GPU buffer is created. No copying of the data
68
+ is done. The resulting DeviceNDArray will acquire a reference from `obj`.
69
+
70
+ If ``sync`` is ``True``, then the imported stream (if present) will be
71
+ synchronized.
72
+ """
73
+ if not is_cuda_array(obj):
74
+ raise TypeError("*obj* doesn't implement the cuda array interface.")
75
+ else:
76
+ return from_cuda_array_interface(obj.__cuda_array_interface__,
77
+ owner=obj, sync=sync)
78
+
79
+
80
+ def is_cuda_array(obj):
81
+ """Test if the object has defined the `__cuda_array_interface__` attribute.
82
+
83
+ Does not verify the validity of the interface.
84
+ """
85
+ return hasattr(obj, '__cuda_array_interface__')
86
+
87
+
88
+ def is_float16_supported():
89
+ """Whether 16-bit floats are supported.
90
+
91
+ float16 is always supported in current versions of Numba - returns True.
92
+ """
93
+ return True
94
+
95
+
96
+ @require_context
97
+ def to_device(obj, stream=0, copy=True, to=None):
98
+ """to_device(obj, stream=0, copy=True, to=None)
99
+
100
+ Allocate and transfer a numpy ndarray or structured scalar to the device.
101
+
102
+ To copy host->device a numpy array::
103
+
104
+ ary = np.arange(10)
105
+ d_ary = cuda.to_device(ary)
106
+
107
+ To enqueue the transfer to a stream::
108
+
109
+ stream = cuda.stream()
110
+ d_ary = cuda.to_device(ary, stream=stream)
111
+
112
+ The resulting ``d_ary`` is a ``DeviceNDArray``.
113
+
114
+ To copy device->host::
115
+
116
+ hary = d_ary.copy_to_host()
117
+
118
+ To copy device->host to an existing array::
119
+
120
+ ary = np.empty(shape=d_ary.shape, dtype=d_ary.dtype)
121
+ d_ary.copy_to_host(ary)
122
+
123
+ To enqueue the transfer to a stream::
124
+
125
+ hary = d_ary.copy_to_host(stream=stream)
126
+ """
127
+ if to is None:
128
+ to, new = devicearray.auto_device(obj, stream=stream, copy=copy,
129
+ user_explicit=True)
130
+ return to
131
+ if copy:
132
+ to.copy_to_device(obj, stream=stream)
133
+ return to
134
+
135
+
136
+ @require_context
137
+ def device_array(shape, dtype=np.float64, strides=None, order='C', stream=0):
138
+ """device_array(shape, dtype=np.float64, strides=None, order='C', stream=0)
139
+
140
+ Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
141
+ """
142
+ shape, strides, dtype = prepare_shape_strides_dtype(shape, strides, dtype,
143
+ order)
144
+ return devicearray.DeviceNDArray(shape=shape, strides=strides, dtype=dtype,
145
+ stream=stream)
146
+
147
+
148
+ @require_context
149
+ def managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
150
+ attach_global=True):
151
+ """managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
152
+ attach_global=True)
153
+
154
+ Allocate a np.ndarray with a buffer that is managed.
155
+ Similar to np.empty().
156
+
157
+ Managed memory is supported on Linux / x86 and PowerPC, and is considered
158
+ experimental on Windows and Linux / AArch64.
159
+
160
+ :param attach_global: A flag indicating whether to attach globally. Global
161
+ attachment implies that the memory is accessible from
162
+ any stream on any device. If ``False``, attachment is
163
+ *host*, and memory is only accessible by devices
164
+ with Compute Capability 6.0 and later.
165
+ """
166
+ shape, strides, dtype = prepare_shape_strides_dtype(shape, strides, dtype,
167
+ order)
168
+ bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
169
+ buffer = current_context().memallocmanaged(bytesize,
170
+ attach_global=attach_global)
171
+ npary = np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order,
172
+ buffer=buffer)
173
+ managedview = np.ndarray.view(npary, type=devicearray.ManagedNDArray)
174
+ managedview.device_setup(buffer, stream=stream)
175
+ return managedview
176
+
177
+
178
+ @require_context
179
+ def pinned_array(shape, dtype=np.float64, strides=None, order='C'):
180
+ """pinned_array(shape, dtype=np.float64, strides=None, order='C')
181
+
182
+ Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
183
+ (pagelocked). Similar to :func:`np.empty() <numpy.empty>`.
184
+ """
185
+ shape, strides, dtype = prepare_shape_strides_dtype(shape, strides, dtype,
186
+ order)
187
+ bytesize = driver.memory_size_from_info(shape, strides,
188
+ dtype.itemsize)
189
+ buffer = current_context().memhostalloc(bytesize)
190
+ return np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order,
191
+ buffer=buffer)
192
+
193
+
194
+ @require_context
195
+ def mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
196
+ portable=False, wc=False):
197
+ """mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
198
+ portable=False, wc=False)
199
+
200
+ Allocate a mapped ndarray with a buffer that is pinned and mapped on
201
+ to the device. Similar to np.empty()
202
+
203
+ :param portable: a boolean flag to allow the allocated device memory to be
204
+ usable in multiple devices.
205
+ :param wc: a boolean flag to enable writecombined allocation which is faster
206
+ to write by the host and to read by the device, but slower to
207
+ write by the host and slower to write by the device.
208
+ """
209
+ shape, strides, dtype = prepare_shape_strides_dtype(shape, strides, dtype,
210
+ order)
211
+ bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
212
+ buffer = current_context().memhostalloc(bytesize, mapped=True)
213
+ npary = np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order,
214
+ buffer=buffer)
215
+ mappedview = np.ndarray.view(npary, type=devicearray.MappedNDArray)
216
+ mappedview.device_setup(buffer, stream=stream)
217
+ return mappedview
218
+
219
+
220
+ @contextlib.contextmanager
221
+ @require_context
222
+ def open_ipc_array(handle, shape, dtype, strides=None, offset=0):
223
+ """
224
+ A context manager that opens a IPC *handle* (*CUipcMemHandle*) that is
225
+ represented as a sequence of bytes (e.g. *bytes*, tuple of int)
226
+ and represent it as an array of the given *shape*, *strides* and *dtype*.
227
+ The *strides* can be omitted. In that case, it is assumed to be a 1D
228
+ C contiguous array.
229
+
230
+ Yields a device array.
231
+
232
+ The IPC handle is closed automatically when context manager exits.
233
+ """
234
+ dtype = np.dtype(dtype)
235
+ # compute size
236
+ size = np.prod(shape) * dtype.itemsize
237
+ # manually recreate the IPC mem handle
238
+ if driver.USE_NV_BINDING:
239
+ driver_handle = driver.binding.CUipcMemHandle()
240
+ driver_handle.reserved = handle
241
+ else:
242
+ driver_handle = driver.drvapi.cu_ipc_mem_handle()
243
+ driver_handle.reserved[:] = handle
244
+ # use *IpcHandle* to open the IPC memory
245
+ ipchandle = driver.IpcHandle(None, driver_handle, size, offset=offset)
246
+ yield ipchandle.open_array(current_context(), shape=shape,
247
+ strides=strides, dtype=dtype)
248
+ ipchandle.close()
249
+
250
+
251
+ def synchronize():
252
+ "Synchronize the current context."
253
+ return current_context().synchronize()
254
+
255
+
256
+ def _contiguous_strides_like_array(ary):
257
+ """
258
+ Given an array, compute strides for a new contiguous array of the same
259
+ shape.
260
+ """
261
+ # Don't recompute strides if the default strides will be sufficient to
262
+ # create a contiguous array.
263
+ if ary.flags['C_CONTIGUOUS'] or ary.flags['F_CONTIGUOUS'] or ary.ndim <= 1:
264
+ return None
265
+
266
+ # Otherwise, we need to compute new strides using an algorithm adapted from
267
+ # NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
268
+ # core/src/multiarray/ctors.c. We permute the strides in ascending order
269
+ # then compute the stride for the dimensions with the same permutation.
270
+
271
+ # Stride permutation. E.g. a stride array (4, -2, 12) becomes
272
+ # [(1, -2), (0, 4), (2, 12)]
273
+ strideperm = [ x for x in enumerate(ary.strides) ]
274
+ strideperm.sort(key=lambda x: x[1])
275
+
276
+ # Compute new strides using permutation
277
+ strides = [0] * len(ary.strides)
278
+ stride = ary.dtype.itemsize
279
+ for i_perm, _ in strideperm:
280
+ strides[i_perm] = stride
281
+ stride *= ary.shape[i_perm]
282
+ return tuple(strides)
283
+
284
+
285
+ def _order_like_array(ary):
286
+ if ary.flags['F_CONTIGUOUS'] and not ary.flags['C_CONTIGUOUS']:
287
+ return 'F'
288
+ else:
289
+ return 'C'
290
+
291
+
292
+ def device_array_like(ary, stream=0):
293
+ """
294
+ Call :func:`device_array() <numba.cuda.device_array>` with information from
295
+ the array.
296
+ """
297
+ strides = _contiguous_strides_like_array(ary)
298
+ order = _order_like_array(ary)
299
+ return device_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
300
+ order=order, stream=stream)
301
+
302
+
303
+ def mapped_array_like(ary, stream=0, portable=False, wc=False):
304
+ """
305
+ Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
306
+ from the array.
307
+ """
308
+ strides = _contiguous_strides_like_array(ary)
309
+ order = _order_like_array(ary)
310
+ return mapped_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
311
+ order=order, stream=stream, portable=portable, wc=wc)
312
+
313
+
314
+ def pinned_array_like(ary):
315
+ """
316
+ Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
317
+ from the array.
318
+ """
319
+ strides = _contiguous_strides_like_array(ary)
320
+ order = _order_like_array(ary)
321
+ return pinned_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
322
+ order=order)
323
+
324
+
325
+ # Stream helper
326
+ @require_context
327
+ def stream():
328
+ """
329
+ Create a CUDA stream that represents a command queue for the device.
330
+ """
331
+ return current_context().create_stream()
332
+
333
+
334
+ @require_context
335
+ def default_stream():
336
+ """
337
+ Get the default CUDA stream. CUDA semantics in general are that the default
338
+ stream is either the legacy default stream or the per-thread default stream
339
+ depending on which CUDA APIs are in use. In Numba, the APIs for the legacy
340
+ default stream are always the ones in use, but an option to use APIs for
341
+ the per-thread default stream may be provided in future.
342
+ """
343
+ return current_context().get_default_stream()
344
+
345
+
346
+ @require_context
347
+ def legacy_default_stream():
348
+ """
349
+ Get the legacy default CUDA stream.
350
+ """
351
+ return current_context().get_legacy_default_stream()
352
+
353
+
354
+ @require_context
355
+ def per_thread_default_stream():
356
+ """
357
+ Get the per-thread default CUDA stream.
358
+ """
359
+ return current_context().get_per_thread_default_stream()
360
+
361
+
362
+ @require_context
363
+ def external_stream(ptr):
364
+ """Create a Numba stream object for a stream allocated outside Numba.
365
+
366
+ :param ptr: Pointer to the external stream to wrap in a Numba Stream
367
+ :type ptr: int
368
+ """
369
+ return current_context().create_external_stream(ptr)
370
+
371
+
372
+ # Page lock
373
+ @require_context
374
+ @contextlib.contextmanager
375
+ def pinned(*arylist):
376
+ """A context manager for temporary pinning a sequence of host ndarrays.
377
+ """
378
+ pmlist = []
379
+ for ary in arylist:
380
+ pm = current_context().mempin(ary, driver.host_pointer(ary),
381
+ driver.host_memory_size(ary),
382
+ mapped=False)
383
+ pmlist.append(pm)
384
+ yield
385
+
386
+
387
+ @require_context
388
+ @contextlib.contextmanager
389
+ def mapped(*arylist, **kws):
390
+ """A context manager for temporarily mapping a sequence of host ndarrays.
391
+ """
392
+ assert not kws or 'stream' in kws, "Only accept 'stream' as keyword."
393
+ stream = kws.get('stream', 0)
394
+ pmlist = []
395
+ devarylist = []
396
+ for ary in arylist:
397
+ pm = current_context().mempin(ary, driver.host_pointer(ary),
398
+ driver.host_memory_size(ary),
399
+ mapped=True)
400
+ pmlist.append(pm)
401
+ devary = devicearray.from_array_like(ary, gpu_data=pm, stream=stream)
402
+ devarylist.append(devary)
403
+ try:
404
+ if len(devarylist) == 1:
405
+ yield devarylist[0]
406
+ else:
407
+ yield devarylist
408
+ finally:
409
+ # When exiting from `with cuda.mapped(*arrs) as mapped_arrs:`, the name
410
+ # `mapped_arrs` stays in scope, blocking automatic unmapping based on
411
+ # reference count. We therefore invoke the finalizer manually.
412
+ for pm in pmlist:
413
+ pm.free()
414
+
415
+
416
+ def event(timing=True):
417
+ """
418
+ Create a CUDA event. Timing data is only recorded by the event if it is
419
+ created with ``timing=True``.
420
+ """
421
+ evt = current_context().create_event(timing=timing)
422
+ return evt
423
+
424
+
425
+ event_elapsed_time = driver.event_elapsed_time
426
+
427
+
428
+ # Device selection
429
+
430
+ def select_device(device_id):
431
+ """
432
+ Make the context associated with device *device_id* the current context.
433
+
434
+ Returns a Device instance.
435
+
436
+ Raises exception on error.
437
+ """
438
+ context = devices.get_context(device_id)
439
+ return context.device
440
+
441
+
442
+ def get_current_device():
443
+ "Get current device associated with the current thread"
444
+ return current_context().device
445
+
446
+
447
+ def list_devices():
448
+ "Return a list of all detected devices"
449
+ return devices.gpus
450
+
451
+
452
+ def close():
453
+ """
454
+ Explicitly clears all contexts in the current thread, and destroys all
455
+ contexts if the current thread is the main thread.
456
+ """
457
+ devices.reset()
458
+
459
+
460
+ def _auto_device(ary, stream=0, copy=True):
461
+ return devicearray.auto_device(ary, stream=stream, copy=copy)
462
+
463
+
464
+ def detect():
465
+ """
466
+ Detect supported CUDA hardware and print a summary of the detected hardware.
467
+
468
+ Returns a boolean indicating whether any supported devices were detected.
469
+ """
470
+ devlist = list_devices()
471
+ print('Found %d CUDA devices' % len(devlist))
472
+ supported_count = 0
473
+ for dev in devlist:
474
+ attrs = []
475
+ cc = dev.compute_capability
476
+ kernel_timeout = dev.KERNEL_EXEC_TIMEOUT
477
+ tcc = dev.TCC_DRIVER
478
+ fp32_to_fp64_ratio = dev.SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO
479
+ attrs += [('Compute Capability', '%d.%d' % cc)]
480
+ attrs += [('PCI Device ID', dev.PCI_DEVICE_ID)]
481
+ attrs += [('PCI Bus ID', dev.PCI_BUS_ID)]
482
+ attrs += [('UUID', dev.uuid)]
483
+ attrs += [('Watchdog', 'Enabled' if kernel_timeout else 'Disabled')]
484
+ if os.name == "nt":
485
+ attrs += [('Compute Mode', 'TCC' if tcc else 'WDDM')]
486
+ attrs += [('FP32/FP64 Performance Ratio', fp32_to_fp64_ratio)]
487
+ if cc < (3, 5):
488
+ support = '[NOT SUPPORTED: CC < 3.5]'
489
+ elif cc < (5, 0):
490
+ support = '[SUPPORTED (DEPRECATED)]'
491
+ supported_count += 1
492
+ else:
493
+ support = '[SUPPORTED]'
494
+ supported_count += 1
495
+
496
+ print('id %d %20s %40s' % (dev.id, dev.name, support))
497
+ for key, val in attrs:
498
+ print('%40s: %s' % (key, val))
499
+
500
+ print('Summary:')
501
+ print('\t%d/%d devices are supported' % (supported_count, len(devlist)))
502
+ return supported_count > 0
503
+
504
+
505
+ @contextlib.contextmanager
506
+ def defer_cleanup():
507
+ """
508
+ Temporarily disable memory deallocation.
509
+ Use this to prevent resource deallocation breaking asynchronous execution.
510
+
511
+ For example::
512
+
513
+ with defer_cleanup():
514
+ # all cleanup is deferred in here
515
+ do_speed_critical_code()
516
+ # cleanup can occur here
517
+
518
+ Note: this context manager can be nested.
519
+ """
520
+ with current_context().defer_cleanup():
521
+ yield
522
+
523
+
524
+ profiling = require_context(driver.profiling)
525
+ profile_start = require_context(driver.profile_start)
526
+ profile_stop = require_context(driver.profile_stop)