numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.0.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -35,19 +35,21 @@ class CUDATypingContext(typing.BaseContext):
35
35
  def resolve_value_type(self, val):
36
36
  # treat other dispatcher object as another device function
37
37
  from numba.cuda.dispatcher import CUDADispatcher
38
- if (isinstance(val, Dispatcher) and not
39
- isinstance(val, CUDADispatcher)):
38
+
39
+ if isinstance(val, Dispatcher) and not isinstance(val, CUDADispatcher):
40
40
  try:
41
41
  # use cached device function
42
42
  val = val.__dispatcher
43
43
  except AttributeError:
44
44
  if not val._can_compile:
45
- raise ValueError('using cpu function on device '
46
- 'but its compilation is disabled')
45
+ raise ValueError(
46
+ "using cpu function on device "
47
+ "but its compilation is disabled"
48
+ )
47
49
  targetoptions = val.targetoptions.copy()
48
- targetoptions['device'] = True
49
- targetoptions['debug'] = targetoptions.get('debug', False)
50
- targetoptions['opt'] = targetoptions.get('opt', True)
50
+ targetoptions["device"] = True
51
+ targetoptions["debug"] = targetoptions.get("debug", False)
52
+ targetoptions["opt"] = targetoptions.get("opt", True)
51
53
  disp = CUDADispatcher(val.py_func, targetoptions)
52
54
  # cache the device function for future use and to avoid
53
55
  # duplicated copy of the same function.
@@ -57,18 +59,19 @@ class CUDATypingContext(typing.BaseContext):
57
59
  # continue with parent logic
58
60
  return super(CUDATypingContext, self).resolve_value_type(val)
59
61
 
62
+
60
63
  # -----------------------------------------------------------------------------
61
64
  # Implementation
62
65
 
63
66
 
64
- VALID_CHARS = re.compile(r'[^a-z0-9]', re.I)
67
+ VALID_CHARS = re.compile(r"[^a-z0-9]", re.I)
65
68
 
66
69
 
67
70
  class CUDATargetContext(BaseContext):
68
71
  implement_powi_as_math_call = True
69
72
  strict_alignment = True
70
73
 
71
- def __init__(self, typingctx, target='cuda'):
74
+ def __init__(self, typingctx, target="cuda"):
72
75
  super().__init__(typingctx, target)
73
76
  self.data_model_manager = cuda_data_manager.chain(
74
77
  datamodel.default_manager
@@ -76,7 +79,7 @@ class CUDATargetContext(BaseContext):
76
79
 
77
80
  @property
78
81
  def enable_nrt(self):
79
- return getattr(config, 'CUDA_ENABLE_NRT', False)
82
+ return getattr(config, "CUDA_ENABLE_NRT", False)
80
83
 
81
84
  @property
82
85
  def DIBuilder(self):
@@ -98,18 +101,17 @@ class CUDATargetContext(BaseContext):
98
101
  def load_additional_registries(self):
99
102
  # side effect of import needed for numba.cpython.*, the builtins
100
103
  # registry is updated at import time.
101
- from numba.cpython import numbers, tupleobj, slicing # noqa: F401
102
- from numba.cpython import rangeobj, iterators, enumimpl # noqa: F401
103
- from numba.cpython import unicode, charseq # noqa: F401
104
+ from numba.cpython import numbers, tupleobj, slicing # noqa: F401
105
+ from numba.cpython import rangeobj, iterators, enumimpl # noqa: F401
106
+ from numba.cpython import unicode, charseq # noqa: F401
104
107
  from numba.cpython import cmathimpl
105
108
  from numba.misc import cffiimpl
106
- from numba.np import arrayobj # noqa: F401
107
- from numba.np import npdatetime # noqa: F401
108
- from . import (
109
- cudaimpl, printimpl, libdeviceimpl, mathimpl, vector_types
110
- )
109
+ from numba.np import arrayobj # noqa: F401
110
+ from numba.np import npdatetime # noqa: F401
111
+ from . import cudaimpl, printimpl, libdeviceimpl, mathimpl, vector_types
112
+
111
113
  # fix for #8940
112
- from numba.np.unsafe import ndarray # noqa F401
114
+ from numba.np.unsafe import ndarray # noqa F401
113
115
 
114
116
  self.install_registry(cudaimpl.registry)
115
117
  self.install_registry(cffiimpl.registry)
@@ -136,10 +138,18 @@ class CUDATargetContext(BaseContext):
136
138
  These include threadIdx, blockDim, etc.
137
139
  """
138
140
  from numba import cuda
139
- nonconsts = ('threadIdx', 'blockDim', 'blockIdx', 'gridDim', 'laneid',
140
- 'warpsize')
141
- nonconsts_with_mod = tuple([(types.Module(cuda), nc)
142
- for nc in nonconsts])
141
+
142
+ nonconsts = (
143
+ "threadIdx",
144
+ "blockDim",
145
+ "blockIdx",
146
+ "gridDim",
147
+ "laneid",
148
+ "warpsize",
149
+ )
150
+ nonconsts_with_mod = tuple(
151
+ [(types.Module(cuda), nc) for nc in nonconsts]
152
+ )
143
153
  return nonconsts_with_mod
144
154
 
145
155
  @cached_property
@@ -147,8 +157,9 @@ class CUDATargetContext(BaseContext):
147
157
  return CUDACallConv(self)
148
158
 
149
159
  def mangler(self, name, argtypes, *, abi_tags=(), uid=None):
150
- return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
151
- uid=uid)
160
+ return itanium_mangler.mangle(
161
+ name, argtypes, abi_tags=abi_tags, uid=uid
162
+ )
152
163
 
153
164
  def make_constant_array(self, builder, aryty, arr):
154
165
  """
@@ -160,15 +171,16 @@ class CUDATargetContext(BaseContext):
160
171
 
161
172
  constvals = [
162
173
  self.get_constant(types.byte, i)
163
- for i in iter(arr.tobytes(order='A'))
174
+ for i in iter(arr.tobytes(order="A"))
164
175
  ]
165
176
  constaryty = ir.ArrayType(ir.IntType(8), len(constvals))
166
177
  constary = ir.Constant(constaryty, constvals)
167
178
 
168
179
  addrspace = nvvm.ADDRSPACE_CONSTANT
169
- gv = cgutils.add_global_variable(lmod, constary.type, "_cudapy_cmem",
170
- addrspace=addrspace)
171
- gv.linkage = 'internal'
180
+ gv = cgutils.add_global_variable(
181
+ lmod, constary.type, "_cudapy_cmem", addrspace=addrspace
182
+ )
183
+ gv.linkage = "internal"
172
184
  gv.global_constant = True
173
185
  gv.initializer = constary
174
186
 
@@ -179,17 +191,21 @@ class CUDATargetContext(BaseContext):
179
191
 
180
192
  # Convert to generic address-space
181
193
  ptrty = ir.PointerType(ir.IntType(8))
182
- genptr = builder.addrspacecast(gv, ptrty, 'generic')
194
+ genptr = builder.addrspacecast(gv, ptrty, "generic")
183
195
 
184
196
  # Create array object
185
197
  ary = self.make_array(aryty)(self, builder)
186
198
  kshape = [self.get_constant(types.intp, s) for s in arr.shape]
187
199
  kstrides = [self.get_constant(types.intp, s) for s in arr.strides]
188
- self.populate_array(ary, data=builder.bitcast(genptr, ary.data.type),
189
- shape=kshape,
190
- strides=kstrides,
191
- itemsize=ary.itemsize, parent=ary.parent,
192
- meminfo=None)
200
+ self.populate_array(
201
+ ary,
202
+ data=builder.bitcast(genptr, ary.data.type),
203
+ shape=kshape,
204
+ strides=kstrides,
205
+ itemsize=ary.itemsize,
206
+ parent=ary.parent,
207
+ meminfo=None,
208
+ )
193
209
 
194
210
  return ary._getvalue()
195
211
 
@@ -199,15 +215,17 @@ class CUDATargetContext(BaseContext):
199
215
  addrspace.
200
216
  """
201
217
  text = cgutils.make_bytearray(string.encode("utf-8") + b"\x00")
202
- name = '$'.join(["__conststring__",
203
- itanium_mangler.mangle_identifier(string)])
218
+ name = "$".join(
219
+ ["__conststring__", itanium_mangler.mangle_identifier(string)]
220
+ )
204
221
  # Try to reuse existing global
205
222
  gv = mod.globals.get(name)
206
223
  if gv is None:
207
224
  # Not defined yet
208
- gv = cgutils.add_global_variable(mod, text.type, name,
209
- addrspace=nvvm.ADDRSPACE_CONSTANT)
210
- gv.linkage = 'internal'
225
+ gv = cgutils.add_global_variable(
226
+ mod, text.type, name, addrspace=nvvm.ADDRSPACE_CONSTANT
227
+ )
228
+ gv.linkage = "internal"
211
229
  gv.global_constant = True
212
230
  gv.initializer = text
213
231
 
@@ -225,11 +243,10 @@ class CUDATargetContext(BaseContext):
225
243
  lmod = builder.module
226
244
  gv = self.insert_const_string(lmod, string)
227
245
  charptrty = ir.PointerType(ir.IntType(8))
228
- return builder.addrspacecast(gv, charptrty, 'generic')
246
+ return builder.addrspacecast(gv, charptrty, "generic")
229
247
 
230
248
  def optimize_function(self, func):
231
- """Run O1 function passes
232
- """
249
+ """Run O1 function passes"""
233
250
  pass
234
251
  ## XXX skipped for now
235
252
  # fpm = lp.FunctionPassManager.new(func.module)
@@ -266,8 +283,9 @@ class CUDACABICallConv(BaseCallConv):
266
283
  def return_value(self, builder, retval):
267
284
  return builder.ret(retval)
268
285
 
269
- def return_user_exc(self, builder, exc, exc_args=None, loc=None,
270
- func_name=None):
286
+ def return_user_exc(
287
+ self, builder, exc, exc_args=None, loc=None, func_name=None
288
+ ):
271
289
  msg = "Python exceptions are unsupported in the CUDA C/C++ ABI"
272
290
  raise NotImplementedError(msg)
273
291
 
@@ -290,8 +308,7 @@ class CUDACABICallConv(BaseCallConv):
290
308
  """
291
309
  assert not noalias
292
310
  arginfo = self._get_arg_packer(fe_argtypes)
293
- arginfo.assign_names(self.get_arguments(fn),
294
- ['arg.' + a for a in args])
311
+ arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args])
295
312
 
296
313
  def get_arguments(self, func):
297
314
  """
@@ -11,7 +11,7 @@ from pathlib import Path
11
11
  import unittest
12
12
 
13
13
  numba_cuda_dir = Path(__file__).parent
14
- test_data_dir = numba_cuda_dir / 'tests' / 'data'
14
+ test_data_dir = numba_cuda_dir / "tests" / "data"
15
15
 
16
16
 
17
17
  class CUDATestCase(SerialMixin, TestCase):
@@ -55,6 +55,7 @@ class ContextResettingTestCase(CUDATestCase):
55
55
  def tearDown(self):
56
56
  super().tearDown()
57
57
  from numba.cuda.cudadrv.devices import reset
58
+
58
59
  reset()
59
60
 
60
61
 
@@ -89,26 +90,26 @@ def skip_unless_conda_cudatoolkit(reason):
89
90
 
90
91
  def skip_if_external_memmgr(reason):
91
92
  """Skip test if an EMM Plugin is in use"""
92
- return unittest.skipIf(config.CUDA_MEMORY_MANAGER != 'default', reason)
93
+ return unittest.skipIf(config.CUDA_MEMORY_MANAGER != "default", reason)
93
94
 
94
95
 
95
96
  def skip_under_cuda_memcheck(reason):
96
- return unittest.skipIf(os.environ.get('CUDA_MEMCHECK') is not None, reason)
97
+ return unittest.skipIf(os.environ.get("CUDA_MEMCHECK") is not None, reason)
97
98
 
98
99
 
99
100
  def skip_without_nvdisasm(reason):
100
- nvdisasm_path = shutil.which('nvdisasm')
101
+ nvdisasm_path = shutil.which("nvdisasm")
101
102
  return unittest.skipIf(nvdisasm_path is None, reason)
102
103
 
103
104
 
104
105
  def skip_with_nvdisasm(reason):
105
- nvdisasm_path = shutil.which('nvdisasm')
106
+ nvdisasm_path = shutil.which("nvdisasm")
106
107
  return unittest.skipIf(nvdisasm_path is not None, reason)
107
108
 
108
109
 
109
110
  def skip_on_arm(reason):
110
111
  cpu = platform.processor()
111
- is_arm = cpu.startswith('arm') or cpu.startswith('aarch')
112
+ is_arm = cpu.startswith("arm") or cpu.startswith("aarch")
112
113
  return unittest.skipIf(is_arm, reason)
113
114
 
114
115
 
@@ -116,25 +117,27 @@ def skip_if_cuda_includes_missing(fn):
116
117
  # Skip when cuda.h is not available - generally this should indicate
117
118
  # whether the CUDA includes are available or not
118
119
  cuda_include_path = libs.get_cuda_include_dir()
119
- cuda_h = os.path.join(cuda_include_path, 'cuda.h')
120
- cuda_h_file = (os.path.exists(cuda_h) and os.path.isfile(cuda_h))
121
- reason = 'CUDA include dir not available on this system'
120
+ cuda_h = os.path.join(cuda_include_path, "cuda.h")
121
+ cuda_h_file = os.path.exists(cuda_h) and os.path.isfile(cuda_h)
122
+ reason = "CUDA include dir not available on this system"
122
123
  return unittest.skipUnless(cuda_h_file, reason)(fn)
123
124
 
124
125
 
125
126
  def skip_if_curand_kernel_missing(fn):
126
127
  cuda_include_path = libs.get_cuda_include_dir()
127
- curand_kernel_h = os.path.join(cuda_include_path, 'curand_kernel.h')
128
- curand_kernel_h_file = (os.path.exists(curand_kernel_h) and
129
- os.path.isfile(curand_kernel_h))
130
- reason = 'curand_kernel.h not available on this system'
128
+ curand_kernel_h = os.path.join(cuda_include_path, "curand_kernel.h")
129
+ curand_kernel_h_file = os.path.exists(curand_kernel_h) and os.path.isfile(
130
+ curand_kernel_h
131
+ )
132
+ reason = "curand_kernel.h not available on this system"
131
133
  return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
132
134
 
133
135
 
134
136
  def skip_if_mvc_enabled(reason):
135
137
  """Skip a test if Minor Version Compatibility is enabled"""
136
- return unittest.skipIf(config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY,
137
- reason)
138
+ return unittest.skipIf(
139
+ config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY, reason
140
+ )
138
141
 
139
142
 
140
143
  def skip_if_mvc_libraries_unavailable(fn):
@@ -142,12 +145,14 @@ def skip_if_mvc_libraries_unavailable(fn):
142
145
  try:
143
146
  import cubinlinker # noqa: F401
144
147
  import ptxcompiler # noqa: F401
148
+
145
149
  libs_available = True
146
150
  except ImportError:
147
151
  pass
148
152
 
149
- return unittest.skipUnless(libs_available,
150
- "Requires cubinlinker and ptxcompiler")(fn)
153
+ return unittest.skipUnless(
154
+ libs_available, "Requires cubinlinker and ptxcompiler"
155
+ )(fn)
151
156
 
152
157
 
153
158
  def cc_X_or_above(major, minor):
@@ -189,7 +194,7 @@ def cudadevrt_missing():
189
194
  if config.ENABLE_CUDASIM:
190
195
  return False
191
196
  try:
192
- path = libs.get_cudalib('cudadevrt', static=True)
197
+ path = libs.get_cudalib("cudadevrt", static=True)
193
198
  libs.check_static_lib(path)
194
199
  except FileNotFoundError:
195
200
  return True
@@ -197,7 +202,7 @@ def cudadevrt_missing():
197
202
 
198
203
 
199
204
  def skip_if_cudadevrt_missing(fn):
200
- return unittest.skipIf(cudadevrt_missing(), 'cudadevrt missing')(fn)
205
+ return unittest.skipIf(cudadevrt_missing(), "cudadevrt missing")(fn)
201
206
 
202
207
 
203
208
  class ForeignArray(object):
@@ -19,18 +19,19 @@ def load_testsuite(loader, dir):
19
19
  files = []
20
20
  for f in os.listdir(dir):
21
21
  path = join(dir, f)
22
- if isfile(path) and fnmatch(f, 'test_*.py'):
22
+ if isfile(path) and fnmatch(f, "test_*.py"):
23
23
  files.append(f)
24
- elif isfile(join(path, '__init__.py')):
25
- suite.addTests(loader.discover(path,
26
- top_level_dir=top_level_dir))
24
+ elif isfile(join(path, "__init__.py")):
25
+ suite.addTests(
26
+ loader.discover(path, top_level_dir=top_level_dir)
27
+ )
27
28
  for f in files:
28
29
  # turn 'f' into a filename relative to the toplevel dir and
29
30
  # translate it to a module name. This differs from the
30
31
  # implementation in Numba, because the toplevel dir is the
31
32
  # numba_cuda module location, not the numba one.
32
33
  f = relpath(join(dir, f), top_level_dir)
33
- f = splitext(normpath(f.replace(os.path.sep, '.')))[0]
34
+ f = splitext(normpath(f.replace(os.path.sep, ".")))[0]
34
35
  suite.addTests(loader.loadTestsFromName(f))
35
36
  return suite
36
37
  except Exception:
@@ -42,16 +43,17 @@ def load_tests(loader, tests, pattern):
42
43
  suite = unittest.TestSuite()
43
44
  this_dir = dirname(__file__)
44
45
  ensure_supported_ccs_initialized()
45
- suite.addTests(load_testsuite(loader, join(this_dir, 'nocuda')))
46
+ suite.addTests(load_testsuite(loader, join(this_dir, "nocuda")))
46
47
  if cuda.is_available():
47
- suite.addTests(load_testsuite(loader, join(this_dir, 'cudasim')))
48
+ suite.addTests(load_testsuite(loader, join(this_dir, "cudasim")))
48
49
  gpus = cuda.list_devices()
49
50
  if gpus and gpus[0].compute_capability >= (2, 0):
50
- suite.addTests(load_testsuite(loader, join(this_dir, 'cudadrv')))
51
- suite.addTests(load_testsuite(loader, join(this_dir, 'cudapy')))
52
- suite.addTests(load_testsuite(loader, join(this_dir, 'nrt')))
53
- suite.addTests(load_testsuite(loader, join(this_dir,
54
- 'doc_examples')))
51
+ suite.addTests(load_testsuite(loader, join(this_dir, "cudadrv")))
52
+ suite.addTests(load_testsuite(loader, join(this_dir, "cudapy")))
53
+ suite.addTests(load_testsuite(loader, join(this_dir, "nrt")))
54
+ suite.addTests(
55
+ load_testsuite(loader, join(this_dir, "doc_examples"))
56
+ )
55
57
  else:
56
58
  print("skipped CUDA tests because GPU CC < 2.0")
57
59
  else:
@@ -4,7 +4,6 @@ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
4
4
 
5
5
 
6
6
  class TestArrayAttr(CUDATestCase):
7
-
8
7
  def test_contigous_2d(self):
9
8
  ary = np.arange(10)
10
9
  cary = ary.reshape(2, 5)
@@ -44,7 +43,7 @@ class TestArrayAttr(CUDATestCase):
44
43
  def test_ravel_1d(self):
45
44
  ary = np.arange(60)
46
45
  dary = cuda.to_device(ary)
47
- for order in 'CFA':
46
+ for order in "CFA":
48
47
  expect = ary.ravel(order=order)
49
48
  dflat = dary.ravel(order=order)
50
49
  flat = dflat.copy_to_host()
@@ -52,14 +51,14 @@ class TestArrayAttr(CUDATestCase):
52
51
  self.assertEqual(flat.ndim, 1)
53
52
  self.assertPreciseEqual(expect, flat)
54
53
 
55
- @skip_on_cudasim('CUDA Array Interface is not supported in the simulator')
54
+ @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
56
55
  def test_ravel_stride_1d(self):
57
56
  ary = np.arange(60)
58
57
  dary = cuda.to_device(ary)
59
58
  # No-copy stride device array
60
59
  darystride = dary[::2]
61
- dary_data = dary.__cuda_array_interface__['data'][0]
62
- ddarystride_data = darystride.__cuda_array_interface__['data'][0]
60
+ dary_data = dary.__cuda_array_interface__["data"][0]
61
+ ddarystride_data = darystride.__cuda_array_interface__["data"][0]
63
62
  self.assertEqual(dary_data, ddarystride_data)
64
63
  # Fail on ravel on non-contiguous array
65
64
  with self.assertRaises(NotImplementedError):
@@ -69,7 +68,7 @@ class TestArrayAttr(CUDATestCase):
69
68
  ary = np.arange(60)
70
69
  reshaped = ary.reshape(2, 5, 2, 3)
71
70
 
72
- expect = reshaped.ravel(order='C')
71
+ expect = reshaped.ravel(order="C")
73
72
  dary = cuda.to_device(reshaped)
74
73
  dflat = dary.ravel()
75
74
  flat = dflat.copy_to_host()
@@ -78,7 +77,7 @@ class TestArrayAttr(CUDATestCase):
78
77
  self.assertPreciseEqual(expect, flat)
79
78
 
80
79
  # explicit order kwarg
81
- for order in 'CA':
80
+ for order in "CA":
82
81
  expect = reshaped.ravel(order=order)
83
82
  dary = cuda.to_device(reshaped)
84
83
  dflat = dary.ravel(order=order)
@@ -87,15 +86,15 @@ class TestArrayAttr(CUDATestCase):
87
86
  self.assertEqual(flat.ndim, 1)
88
87
  self.assertPreciseEqual(expect, flat)
89
88
 
90
- @skip_on_cudasim('CUDA Array Interface is not supported in the simulator')
89
+ @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
91
90
  def test_ravel_stride_c(self):
92
91
  ary = np.arange(60)
93
92
  reshaped = ary.reshape(2, 5, 2, 3)
94
93
 
95
94
  dary = cuda.to_device(reshaped)
96
95
  darystride = dary[::2, ::2, ::2, ::2]
97
- dary_data = dary.__cuda_array_interface__['data'][0]
98
- ddarystride_data = darystride.__cuda_array_interface__['data'][0]
96
+ dary_data = dary.__cuda_array_interface__["data"][0]
97
+ ddarystride_data = darystride.__cuda_array_interface__["data"][0]
99
98
  self.assertEqual(dary_data, ddarystride_data)
100
99
  with self.assertRaises(NotImplementedError):
101
100
  darystride.ravel()
@@ -103,7 +102,7 @@ class TestArrayAttr(CUDATestCase):
103
102
  def test_ravel_f(self):
104
103
  ary = np.arange(60)
105
104
  reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
106
- for order in 'FA':
105
+ for order in "FA":
107
106
  expect = reshaped.ravel(order=order)
108
107
  dary = cuda.to_device(reshaped)
109
108
  dflat = dary.ravel(order=order)
@@ -112,14 +111,14 @@ class TestArrayAttr(CUDATestCase):
112
111
  self.assertEqual(flat.ndim, 1)
113
112
  self.assertPreciseEqual(expect, flat)
114
113
 
115
- @skip_on_cudasim('CUDA Array Interface is not supported in the simulator')
114
+ @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
116
115
  def test_ravel_stride_f(self):
117
116
  ary = np.arange(60)
118
117
  reshaped = np.asfortranarray(ary.reshape(2, 5, 2, 3))
119
118
  dary = cuda.to_device(reshaped)
120
119
  darystride = dary[::2, ::2, ::2, ::2]
121
- dary_data = dary.__cuda_array_interface__['data'][0]
122
- ddarystride_data = darystride.__cuda_array_interface__['data'][0]
120
+ dary_data = dary.__cuda_array_interface__["data"][0]
121
+ ddarystride_data = darystride.__cuda_array_interface__["data"][0]
123
122
  self.assertEqual(dary_data, ddarystride_data)
124
123
  with self.assertRaises(NotImplementedError):
125
124
  darystride.ravel()
@@ -134,12 +133,12 @@ class TestArrayAttr(CUDATestCase):
134
133
 
135
134
  def test_reshape_f(self):
136
135
  ary = np.arange(10)
137
- expect = ary.reshape(2, 5, order='F')
136
+ expect = ary.reshape(2, 5, order="F")
138
137
  dary = cuda.to_device(ary)
139
- dary_reshaped = dary.reshape(2, 5, order='F')
138
+ dary_reshaped = dary.reshape(2, 5, order="F")
140
139
  got = dary_reshaped.copy_to_host()
141
140
  self.assertPreciseEqual(expect, got)
142
141
 
143
142
 
144
- if __name__ == '__main__':
143
+ if __name__ == "__main__":
145
144
  unittest.main()
@@ -27,7 +27,6 @@ class TestContextStack(CUDATestCase):
27
27
 
28
28
 
29
29
  class TestContextAPI(CUDATestCase):
30
-
31
30
  def tearDown(self):
32
31
  super().tearDown()
33
32
  cuda.close()
@@ -36,7 +35,7 @@ class TestContextAPI(CUDATestCase):
36
35
  try:
37
36
  mem = cuda.current_context().get_memory_info()
38
37
  except NotImplementedError:
39
- self.skipTest('EMM Plugin does not implement get_memory_info()')
38
+ self.skipTest("EMM Plugin does not implement get_memory_info()")
40
39
 
41
40
  self.assertIsInstance(mem.free, numbers.Number)
42
41
  self.assertEqual(mem.free, mem[0])
@@ -47,7 +46,7 @@ class TestContextAPI(CUDATestCase):
47
46
  self.assertLessEqual(mem.free, mem.total)
48
47
 
49
48
  @unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus")
50
- @skip_on_cudasim('CUDA HW required')
49
+ @skip_on_cudasim("CUDA HW required")
51
50
  def test_forbidden_context_switch(self):
52
51
  # Cannot switch context inside a `cuda.require_context`
53
52
  @cuda.require_context
@@ -72,7 +71,7 @@ class TestContextAPI(CUDATestCase):
72
71
  self.assertEqual(int(devid), 1)
73
72
 
74
73
 
75
- @skip_on_cudasim('CUDA HW required')
74
+ @skip_on_cudasim("CUDA HW required")
76
75
  class Test3rdPartyContext(CUDATestCase):
77
76
  def tearDown(self):
78
77
  super().tearDown()
@@ -118,8 +117,9 @@ class Test3rdPartyContext(CUDATestCase):
118
117
  cuda.current_context()
119
118
  except RuntimeError as e:
120
119
  # Expecting an error about non-primary CUDA context
121
- self.assertIn("Numba cannot operate on non-primary CUDA context ",
122
- str(e))
120
+ self.assertIn(
121
+ "Numba cannot operate on non-primary CUDA context ", str(e)
122
+ )
123
123
  else:
124
124
  self.fail("No RuntimeError raised")
125
125
  finally:
@@ -141,5 +141,5 @@ class Test3rdPartyContext(CUDATestCase):
141
141
  self.test_attached_primary(do)
142
142
 
143
143
 
144
- if __name__ == '__main__':
144
+ if __name__ == "__main__":
145
145
  unittest.main()