numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.0.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -136,5 +136,5 @@ class Math_isnan(ConcreteTemplate):
136
136
  class Math_modf(ConcreteTemplate):
137
137
  cases = [
138
138
  signature(types.UniTuple(types.float64, 2), types.float64),
139
- signature(types.UniTuple(types.float32, 2), types.float32)
139
+ signature(types.UniTuple(types.float32, 2), types.float32),
140
140
  ]
@@ -7,7 +7,6 @@ _BYTE_SIZE = 8
7
7
 
8
8
 
9
9
  class CUDADIBuilder(DIBuilder):
10
-
11
10
  def _var_type(self, lltype, size, datamodel=None):
12
11
  is_bool = False
13
12
  is_grid_group = False
@@ -34,11 +33,14 @@ class CUDADIBuilder(DIBuilder):
34
33
  elif is_grid_group:
35
34
  ditok = "DW_ATE_unsigned"
36
35
 
37
- return m.add_debug_info('DIBasicType', {
38
- 'name': name,
39
- 'size': bitsize,
40
- 'encoding': ir.DIToken(ditok),
41
- })
36
+ return m.add_debug_info(
37
+ "DIBasicType",
38
+ {
39
+ "name": name,
40
+ "size": bitsize,
41
+ "encoding": ir.DIToken(ditok),
42
+ },
43
+ )
42
44
 
43
45
  # For other cases, use upstream Numba implementation
44
46
  return super()._var_type(lltype, size, datamodel=datamodel)
@@ -6,13 +6,24 @@ from numba.cuda.dispatcher import CUDADispatcher
6
6
  from numba.cuda.simulator.kernel import FakeCUDAKernel
7
7
 
8
8
 
9
- _msg_deprecated_signature_arg = ("Deprecated keyword argument `{0}`. "
10
- "Signatures should be passed as the first "
11
- "positional argument.")
12
-
13
-
14
- def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
15
- opt=None, lineinfo=False, cache=False, **kws):
9
+ _msg_deprecated_signature_arg = (
10
+ "Deprecated keyword argument `{0}`. "
11
+ "Signatures should be passed as the first "
12
+ "positional argument."
13
+ )
14
+
15
+
16
+ def jit(
17
+ func_or_sig=None,
18
+ device=False,
19
+ inline=False,
20
+ link=[],
21
+ debug=None,
22
+ opt=None,
23
+ lineinfo=False,
24
+ cache=False,
25
+ **kws,
26
+ ):
16
27
  """
17
28
  JIT compile a Python function for CUDA GPUs.
18
29
 
@@ -55,39 +66,43 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
55
66
  """
56
67
 
57
68
  if link and config.ENABLE_CUDASIM:
58
- raise NotImplementedError('Cannot link PTX in the simulator')
69
+ raise NotImplementedError("Cannot link PTX in the simulator")
59
70
 
60
- if kws.get('boundscheck'):
71
+ if kws.get("boundscheck"):
61
72
  raise NotImplementedError("bounds checking is not supported for CUDA")
62
73
 
63
- if kws.get('argtypes') is not None:
64
- msg = _msg_deprecated_signature_arg.format('argtypes')
74
+ if kws.get("argtypes") is not None:
75
+ msg = _msg_deprecated_signature_arg.format("argtypes")
65
76
  raise DeprecationError(msg)
66
- if kws.get('restype') is not None:
67
- msg = _msg_deprecated_signature_arg.format('restype')
77
+ if kws.get("restype") is not None:
78
+ msg = _msg_deprecated_signature_arg.format("restype")
68
79
  raise DeprecationError(msg)
69
- if kws.get('bind') is not None:
70
- msg = _msg_deprecated_signature_arg.format('bind')
80
+ if kws.get("bind") is not None:
81
+ msg = _msg_deprecated_signature_arg.format("bind")
71
82
  raise DeprecationError(msg)
72
83
 
73
84
  debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
74
85
  opt = (config.OPT != 0) if opt is None else opt
75
- fastmath = kws.get('fastmath', False)
76
- extensions = kws.get('extensions', [])
86
+ fastmath = kws.get("fastmath", False)
87
+ extensions = kws.get("extensions", [])
77
88
 
78
89
  if debug and opt:
79
- msg = ("debug=True with opt=True "
80
- "is not supported by CUDA. This may result in a crash"
81
- " - set debug=False or opt=False.")
90
+ msg = (
91
+ "debug=True with opt=True "
92
+ "is not supported by CUDA. This may result in a crash"
93
+ " - set debug=False or opt=False."
94
+ )
82
95
  warn(NumbaInvalidConfigWarning(msg))
83
96
 
84
97
  if debug and lineinfo:
85
- msg = ("debug and lineinfo are mutually exclusive. Use debug to get "
86
- "full debug info (this disables some optimizations), or "
87
- "lineinfo for line info only with code generation unaffected.")
98
+ msg = (
99
+ "debug and lineinfo are mutually exclusive. Use debug to get "
100
+ "full debug info (this disables some optimizations), or "
101
+ "lineinfo for line info only with code generation unaffected."
102
+ )
88
103
  warn(NumbaInvalidConfigWarning(msg))
89
104
 
90
- if device and kws.get('link'):
105
+ if device and kws.get("link"):
91
106
  raise ValueError("link keyword invalid for device function")
92
107
 
93
108
  if sigutils.is_signature(func_or_sig):
@@ -101,19 +116,21 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
101
116
 
102
117
  if signatures is not None:
103
118
  if config.ENABLE_CUDASIM:
119
+
104
120
  def jitwrapper(func):
105
121
  return FakeCUDAKernel(func, device=device, fastmath=fastmath)
122
+
106
123
  return jitwrapper
107
124
 
108
125
  def _jit(func):
109
126
  targetoptions = kws.copy()
110
- targetoptions['debug'] = debug
111
- targetoptions['lineinfo'] = lineinfo
112
- targetoptions['link'] = link
113
- targetoptions['opt'] = opt
114
- targetoptions['fastmath'] = fastmath
115
- targetoptions['device'] = device
116
- targetoptions['extensions'] = extensions
127
+ targetoptions["debug"] = debug
128
+ targetoptions["lineinfo"] = lineinfo
129
+ targetoptions["link"] = link
130
+ targetoptions["opt"] = opt
131
+ targetoptions["fastmath"] = fastmath
132
+ targetoptions["device"] = device
133
+ targetoptions["extensions"] = extensions
117
134
 
118
135
  disp = CUDADispatcher(func, targetoptions=targetoptions)
119
136
 
@@ -128,6 +145,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
128
145
 
129
146
  if device:
130
147
  from numba.core import typeinfer
148
+
131
149
  with typeinfer.register_dispatcher(disp):
132
150
  disp.compile_device(argtypes, restype)
133
151
  else:
@@ -142,29 +160,41 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
142
160
  else:
143
161
  if func_or_sig is None:
144
162
  if config.ENABLE_CUDASIM:
163
+
145
164
  def autojitwrapper(func):
146
- return FakeCUDAKernel(func, device=device,
147
- fastmath=fastmath)
165
+ return FakeCUDAKernel(
166
+ func, device=device, fastmath=fastmath
167
+ )
148
168
  else:
169
+
149
170
  def autojitwrapper(func):
150
- return jit(func, device=device, debug=debug, opt=opt,
151
- lineinfo=lineinfo, link=link, cache=cache, **kws)
171
+ return jit(
172
+ func,
173
+ device=device,
174
+ debug=debug,
175
+ opt=opt,
176
+ lineinfo=lineinfo,
177
+ link=link,
178
+ cache=cache,
179
+ **kws,
180
+ )
152
181
 
153
182
  return autojitwrapper
154
183
  # func_or_sig is a function
155
184
  else:
156
185
  if config.ENABLE_CUDASIM:
157
- return FakeCUDAKernel(func_or_sig, device=device,
158
- fastmath=fastmath)
186
+ return FakeCUDAKernel(
187
+ func_or_sig, device=device, fastmath=fastmath
188
+ )
159
189
  else:
160
190
  targetoptions = kws.copy()
161
- targetoptions['debug'] = debug
162
- targetoptions['lineinfo'] = lineinfo
163
- targetoptions['opt'] = opt
164
- targetoptions['link'] = link
165
- targetoptions['fastmath'] = fastmath
166
- targetoptions['device'] = device
167
- targetoptions['extensions'] = extensions
191
+ targetoptions["debug"] = debug
192
+ targetoptions["lineinfo"] = lineinfo
193
+ targetoptions["opt"] = opt
194
+ targetoptions["link"] = link
195
+ targetoptions["fastmath"] = fastmath
196
+ targetoptions["device"] = device
197
+ targetoptions["extensions"] = extensions
168
198
  disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
169
199
 
170
200
  if cache:
@@ -191,7 +221,7 @@ def declare_device(name, sig, link=None):
191
221
 
192
222
  argtypes, restype = sigutils.normalize_signature(sig)
193
223
  if restype is None:
194
- msg = 'Return type must be provided for device declarations'
224
+ msg = "Return type must be provided for device declarations"
195
225
  raise TypeError(msg)
196
226
 
197
227
  return declare_device_function(name, restype, argtypes, link)
@@ -30,4 +30,4 @@ class CUDATarget(TargetDescriptor):
30
30
  return self._targetctx
31
31
 
32
32
 
33
- cuda_target = CUDATarget('cuda')
33
+ cuda_target = CUDATarget("cuda")
@@ -1,21 +1,58 @@
1
1
  # Re export
2
2
  import sys
3
3
  from numba.cuda import cg
4
- from .stubs import (threadIdx, blockIdx, blockDim, gridDim, laneid, warpsize,
5
- syncwarp, shared, local, const, atomic,
6
- shfl_sync_intrinsic, vote_sync_intrinsic, match_any_sync,
7
- match_all_sync, threadfence_block, threadfence_system,
8
- threadfence, selp, popc, brev, clz, ffs, fma, cbrt,
9
- activemask, lanemask_lt, nanosleep, fp16,
10
- _vector_type_stubs)
11
- from .intrinsics import (grid, gridsize, syncthreads, syncthreads_and,
12
- syncthreads_count, syncthreads_or)
4
+ from .stubs import (
5
+ threadIdx,
6
+ blockIdx,
7
+ blockDim,
8
+ gridDim,
9
+ laneid,
10
+ warpsize,
11
+ syncwarp,
12
+ shared,
13
+ local,
14
+ const,
15
+ atomic,
16
+ shfl_sync_intrinsic,
17
+ vote_sync_intrinsic,
18
+ match_any_sync,
19
+ match_all_sync,
20
+ threadfence_block,
21
+ threadfence_system,
22
+ threadfence,
23
+ selp,
24
+ popc,
25
+ brev,
26
+ clz,
27
+ ffs,
28
+ fma,
29
+ cbrt,
30
+ activemask,
31
+ lanemask_lt,
32
+ nanosleep,
33
+ fp16,
34
+ _vector_type_stubs,
35
+ )
36
+ from .intrinsics import (
37
+ grid,
38
+ gridsize,
39
+ syncthreads,
40
+ syncthreads_and,
41
+ syncthreads_count,
42
+ syncthreads_or,
43
+ )
13
44
  from .cudadrv.error import CudaSupportError
14
- from numba.cuda.cudadrv.driver import (BaseCUDAMemoryManager,
15
- HostOnlyCUDAMemoryManager,
16
- GetIpcHandleMixin, MemoryPointer,
17
- MappedMemory, PinnedMemory, MemoryInfo,
18
- IpcHandle, set_memory_manager)
45
+ from numba.cuda.cudadrv.driver import (
46
+ BaseCUDAMemoryManager,
47
+ HostOnlyCUDAMemoryManager,
48
+ GetIpcHandleMixin,
49
+ MemoryPointer,
50
+ MappedMemory,
51
+ PinnedMemory,
52
+ MemoryInfo,
53
+ IpcHandle,
54
+ set_memory_manager,
55
+ )
19
56
  from numba.cuda.cudadrv.runtime import runtime
20
57
  from .cudadrv import nvvm
21
58
  from numba.cuda import initialize
@@ -26,13 +63,27 @@ from .api import *
26
63
  from .api import _auto_device
27
64
  from .args import In, Out, InOut
28
65
 
29
- from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
30
- shfl_sync, shfl_up_sync, shfl_down_sync,
31
- shfl_xor_sync)
66
+ from .intrinsic_wrapper import (
67
+ all_sync,
68
+ any_sync,
69
+ eq_sync,
70
+ ballot_sync,
71
+ shfl_sync,
72
+ shfl_up_sync,
73
+ shfl_down_sync,
74
+ shfl_xor_sync,
75
+ )
32
76
 
33
77
  from .kernels import reduction
34
78
  from numba.cuda.cudadrv.linkable_code import (
35
- Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
79
+ Archive,
80
+ CUSource,
81
+ Cubin,
82
+ Fatbin,
83
+ LinkableCode,
84
+ LTOIR,
85
+ Object,
86
+ PTXSource,
36
87
  )
37
88
 
38
89
  reduce = Reduce = reduction.Reduce