numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.0.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,7 @@ attempt_nocopy_reshape = ctypes.CFUNCTYPE(
20
20
  np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newstrides
21
21
  ctypes.c_long, # itemsize
22
22
  ctypes.c_int, # is_f_order
23
- )(_helperlib.c_helpers['attempt_nocopy_reshape'])
23
+ )(_helperlib.c_helpers["attempt_nocopy_reshape"])
24
24
 
25
25
 
26
26
  class Dim(object):
@@ -37,7 +37,8 @@ class Dim(object):
37
37
  stride:
38
38
  item stride
39
39
  """
40
- __slots__ = 'start', 'stop', 'size', 'stride', 'single'
40
+
41
+ __slots__ = "start", "stop", "size", "stride", "single"
41
42
 
42
43
  def __init__(self, start, stop, size, stride, single):
43
44
  self.start = start
@@ -58,15 +59,11 @@ class Dim(object):
58
59
  else:
59
60
  size = _compute_size(start, stop, stride)
60
61
  ret = Dim(
61
- start=start,
62
- stop=stop,
63
- size=size,
64
- stride=stride,
65
- single=False
62
+ start=start, stop=stop, size=size, stride=stride, single=False
66
63
  )
67
64
  return ret
68
65
  else:
69
- sliced = self[item:item + 1] if item != -1 else self[-1:]
66
+ sliced = self[item : item + 1] if item != -1 else self[-1:]
70
67
  if sliced.size != 1:
71
68
  raise IndexError
72
69
  return Dim(
@@ -85,8 +82,13 @@ class Dim(object):
85
82
  return strfmt % (self.start, self.stop, self.size, self.stride)
86
83
 
87
84
  def normalize(self, base):
88
- return Dim(start=self.start - base, stop=self.stop - base,
89
- size=self.size, stride=self.stride, single=self.single)
85
+ return Dim(
86
+ start=self.start - base,
87
+ stop=self.stop - base,
88
+ size=self.size,
89
+ stride=self.stride,
90
+ single=self.single,
91
+ )
90
92
 
91
93
  def copy(self, start=None, stop=None, size=None, stride=None, single=None):
92
94
  if start is None:
@@ -143,14 +145,16 @@ class Array(object):
143
145
  extent: (start, end)
144
146
  start and end offset containing the memory region
145
147
  """
148
+
146
149
  is_array = True
147
150
 
148
151
  @classmethod
149
152
  def from_desc(cls, offset, shape, strides, itemsize):
150
153
  dims = []
151
154
  for ashape, astride in zip(shape, strides):
152
- dim = Dim(offset, offset + ashape * astride, ashape, astride,
153
- single=False)
155
+ dim = Dim(
156
+ offset, offset + ashape * astride, ashape, astride, single=False
157
+ )
154
158
  dims.append(dim)
155
159
  offset = 0 # offset only applies to first dimension
156
160
  return cls(dims, itemsize)
@@ -173,23 +177,23 @@ class Array(object):
173
177
 
174
178
  # Records have no dims, and we can treat them as contiguous
175
179
  if not self.dims:
176
- return {'C_CONTIGUOUS': True, 'F_CONTIGUOUS': True}
180
+ return {"C_CONTIGUOUS": True, "F_CONTIGUOUS": True}
177
181
 
178
182
  # If this is a broadcast array then it is not contiguous
179
183
  if any([dim.stride == 0 for dim in self.dims]):
180
- return {'C_CONTIGUOUS': False, 'F_CONTIGUOUS': False}
184
+ return {"C_CONTIGUOUS": False, "F_CONTIGUOUS": False}
181
185
 
182
- flags = {'C_CONTIGUOUS': True, 'F_CONTIGUOUS': True}
186
+ flags = {"C_CONTIGUOUS": True, "F_CONTIGUOUS": True}
183
187
 
184
188
  # Check C contiguity
185
189
  sd = self.itemsize
186
190
  for dim in reversed(self.dims):
187
191
  if dim.size == 0:
188
192
  # Contiguous by definition
189
- return {'C_CONTIGUOUS': True, 'F_CONTIGUOUS': True}
193
+ return {"C_CONTIGUOUS": True, "F_CONTIGUOUS": True}
190
194
  if dim.size != 1:
191
195
  if dim.stride != sd:
192
- flags['C_CONTIGUOUS'] = False
196
+ flags["C_CONTIGUOUS"] = False
193
197
  sd *= dim.size
194
198
 
195
199
  # Check F contiguity
@@ -197,7 +201,7 @@ class Array(object):
197
201
  for dim in self.dims:
198
202
  if dim.size != 1:
199
203
  if dim.stride != sd:
200
- flags['F_CONTIGUOUS'] = False
204
+ flags["F_CONTIGUOUS"] = False
201
205
  return flags
202
206
  sd *= dim.size
203
207
 
@@ -208,11 +212,11 @@ class Array(object):
208
212
  lastidx = [s - 1 for s in self.shape]
209
213
  start = compute_index(firstidx, self.dims)
210
214
  stop = compute_index(lastidx, self.dims) + self.itemsize
211
- stop = max(stop, start) # ensure positive extent
215
+ stop = max(stop, start) # ensure positive extent
212
216
  return Extent(start, stop)
213
217
 
214
218
  def __repr__(self):
215
- return '<Array dims=%s itemsize=%s>' % (self.dims, self.itemsize)
219
+ return "<Array dims=%s itemsize=%s>" % (self.dims, self.itemsize)
216
220
 
217
221
  def __getitem__(self, item):
218
222
  if not isinstance(item, tuple):
@@ -240,15 +244,14 @@ class Array(object):
240
244
 
241
245
  @property
242
246
  def is_c_contig(self):
243
- return self.flags['C_CONTIGUOUS']
247
+ return self.flags["C_CONTIGUOUS"]
244
248
 
245
249
  @property
246
250
  def is_f_contig(self):
247
- return self.flags['F_CONTIGUOUS']
251
+ return self.flags["F_CONTIGUOUS"]
248
252
 
249
253
  def iter_contiguous_extent(self):
250
- """ Generates extents
251
- """
254
+ """Generates extents"""
252
255
  if self.is_c_contig or self.is_f_contig:
253
256
  yield self.extent
254
257
  else:
@@ -279,11 +282,11 @@ class Array(object):
279
282
  if newdims == self.shape:
280
283
  return self, None
281
284
 
282
- order = kws.pop('order', 'C')
285
+ order = kws.pop("order", "C")
283
286
  if kws:
284
- raise TypeError('unknown keyword arguments %s' % kws.keys())
285
- if order not in 'CFA':
286
- raise ValueError('order not C|F|A')
287
+ raise TypeError("unknown keyword arguments %s" % kws.keys())
288
+ if order not in "CFA":
289
+ raise ValueError("order not C|F|A")
287
290
 
288
291
  # check for exactly one instance of -1 in newdims
289
292
  # https://github.com/numpy/numpy/blob/623bc1fae1d47df24e7f1e29321d0c0ba2771ce0/numpy/core/src/multiarray/shape.c#L470-L515 # noqa: E501
@@ -301,25 +304,28 @@ class Array(object):
301
304
  # compute the missing dimension
302
305
  if unknownidx >= 0:
303
306
  if knownsize == 0 or self.size % knownsize != 0:
304
- raise ValueError("cannot infer valid shape "
305
- "for unknown dimension")
307
+ raise ValueError(
308
+ "cannot infer valid shape for unknown dimension"
309
+ )
306
310
  else:
307
- newdims = newdims[0:unknownidx] \
308
- + (self.size // knownsize,) \
309
- + newdims[unknownidx + 1:]
311
+ newdims = (
312
+ newdims[0:unknownidx]
313
+ + (self.size // knownsize,)
314
+ + newdims[unknownidx + 1 :]
315
+ )
310
316
 
311
317
  newsize = functools.reduce(operator.mul, newdims, 1)
312
318
 
313
- if order == 'A':
314
- order = 'F' if self.is_f_contig else 'C'
319
+ if order == "A":
320
+ order = "F" if self.is_f_contig else "C"
315
321
 
316
322
  if newsize != self.size:
317
323
  raise ValueError("reshape changes the size of the array")
318
324
 
319
325
  if self.is_c_contig or self.is_f_contig:
320
- if order == 'C':
326
+ if order == "C":
321
327
  newstrides = list(iter_strides_c_contig(self, newdims))
322
- elif order == 'F':
328
+ elif order == "F":
323
329
  newstrides = list(iter_strides_f_contig(self, newdims))
324
330
  else:
325
331
  raise AssertionError("unreachable")
@@ -340,12 +346,16 @@ class Array(object):
340
346
  newdims,
341
347
  newstrides,
342
348
  self.itemsize,
343
- order == 'F',
349
+ order == "F",
344
350
  ):
345
- raise NotImplementedError('reshape would require copy')
351
+ raise NotImplementedError("reshape would require copy")
346
352
 
347
- ret = self.from_desc(self.extent.begin, shape=newdims,
348
- strides=newstrides, itemsize=self.itemsize)
353
+ ret = self.from_desc(
354
+ self.extent.begin,
355
+ shape=newdims,
356
+ strides=newstrides,
357
+ itemsize=self.itemsize,
358
+ )
349
359
 
350
360
  return ret, list(self.iter_contiguous_extent())
351
361
 
@@ -377,16 +387,21 @@ class Array(object):
377
387
  )
378
388
  return newarr, list(self.iter_contiguous_extent())
379
389
 
380
- def ravel(self, order='C'):
381
- if order not in 'CFA':
382
- raise ValueError('order not C|F|A')
390
+ def ravel(self, order="C"):
391
+ if order not in "CFA":
392
+ raise ValueError("order not C|F|A")
383
393
 
384
- if (order in 'CA' and self.is_c_contig
385
- or order in 'FA' and self.is_f_contig):
394
+ if (
395
+ order in "CA"
396
+ and self.is_c_contig
397
+ or order in "FA"
398
+ and self.is_f_contig
399
+ ):
386
400
  newshape = (self.size,)
387
401
  newstrides = (self.itemsize,)
388
- arr = self.from_desc(self.extent.begin, newshape, newstrides,
389
- self.itemsize)
402
+ arr = self.from_desc(
403
+ self.extent.begin, newshape, newstrides, self.itemsize
404
+ )
390
405
  return arr, list(self.iter_contiguous_extent())
391
406
 
392
407
  else:
@@ -394,8 +409,7 @@ class Array(object):
394
409
 
395
410
 
396
411
  def iter_strides_f_contig(arr, shape=None):
397
- """yields the f-contiguous strides
398
- """
412
+ """yields the f-contiguous strides"""
399
413
  shape = arr.shape if shape is None else shape
400
414
  itemsize = arr.itemsize
401
415
  yield itemsize
@@ -406,8 +420,7 @@ def iter_strides_f_contig(arr, shape=None):
406
420
 
407
421
 
408
422
  def iter_strides_c_contig(arr, shape=None):
409
- """yields the c-contiguous strides
410
- """
423
+ """yields the c-contiguous strides"""
411
424
  shape = arr.shape if shape is None else shape
412
425
  itemsize = arr.itemsize
413
426
 
@@ -438,8 +451,7 @@ def is_element_indexing(item, ndim):
438
451
 
439
452
 
440
453
  def _compute_size(start, stop, step):
441
- """Algorithm adapted from cpython rangeobject.c
442
- """
454
+ """Algorithm adapted from cpython rangeobject.c"""
443
455
  if step > 0:
444
456
  lo = start
445
457
  hi = stop
@@ -140,7 +140,7 @@ CU_CTX_USER_COREDUMP_ENABLE = 0x40
140
140
  # Force synchronous blocking on cudaMemcpy/cudaMemset
141
141
  CU_CTX_SYNC_MEMOPS = 0x80
142
142
 
143
- CU_CTX_FLAGS_MASK = 0xff
143
+ CU_CTX_FLAGS_MASK = 0xFF
144
144
 
145
145
 
146
146
  # DEFINES
@@ -12,7 +12,7 @@ class CudaSupportError(ImportError):
12
12
 
13
13
  class NvvmError(Exception):
14
14
  def __str__(self):
15
- return '\n'.join(map(str, self.args))
15
+ return "\n".join(map(str, self.args))
16
16
 
17
17
 
18
18
  class NvvmSupportError(ImportError):
@@ -25,12 +25,16 @@ class NvvmWarning(Warning):
25
25
 
26
26
  class NvrtcError(Exception):
27
27
  def __str__(self):
28
- return '\n'.join(map(str, self.args))
28
+ return "\n".join(map(str, self.args))
29
29
 
30
30
 
31
31
  class NvrtcCompilationError(NvrtcError):
32
32
  pass
33
33
 
34
34
 
35
+ class NvrtcBuiltinOperationFailure(NvrtcError):
36
+ pass
37
+
38
+
35
39
  class NvrtcSupportError(ImportError):
36
40
  pass
@@ -21,25 +21,25 @@ from numba.cuda.cudadrv.error import CudaSupportError
21
21
  from numba.core import config
22
22
 
23
23
 
24
- if sys.platform == 'win32':
25
- _dllnamepattern = '%s.dll'
26
- _staticnamepattern = '%s.lib'
27
- elif sys.platform == 'darwin':
28
- _dllnamepattern = 'lib%s.dylib'
29
- _staticnamepattern = 'lib%s.a'
24
+ if sys.platform == "win32":
25
+ _dllnamepattern = "%s.dll"
26
+ _staticnamepattern = "%s.lib"
27
+ elif sys.platform == "darwin":
28
+ _dllnamepattern = "lib%s.dylib"
29
+ _staticnamepattern = "lib%s.a"
30
30
  else:
31
- _dllnamepattern = 'lib%s.so'
32
- _staticnamepattern = 'lib%s.a'
31
+ _dllnamepattern = "lib%s.so"
32
+ _staticnamepattern = "lib%s.a"
33
33
 
34
34
 
35
35
  def get_libdevice():
36
36
  d = get_cuda_paths()
37
- paths = d['libdevice'].info
37
+ paths = d["libdevice"].info
38
38
  return paths
39
39
 
40
40
 
41
41
  def open_libdevice():
42
- with open(get_libdevice(), 'rb') as bcfile:
42
+ with open(get_libdevice(), "rb") as bcfile:
43
43
  return bcfile.read()
44
44
 
45
45
 
@@ -50,10 +50,10 @@ def get_cudalib(lib, static=False):
50
50
  'libnvvm.so' for 'nvvm') so that we may attempt to load it using the system
51
51
  loader's search mechanism.
52
52
  """
53
- if lib == 'nvvm':
54
- return get_cuda_paths()['nvvm'].info or _dllnamepattern % 'nvvm'
53
+ if lib in {"nvrtc", "nvvm"}:
54
+ return get_cuda_paths()[lib].info or _dllnamepattern % lib
55
55
  else:
56
- dir_type = 'static_cudalib_dir' if static else 'cudalib_dir'
56
+ dir_type = "static_cudalib_dir" if static else "cudalib_dir"
57
57
  libdir = get_cuda_paths()[dir_type].info
58
58
 
59
59
  candidates = find_lib(lib, libdir, static=static)
@@ -68,7 +68,7 @@ def get_cuda_include_dir():
68
68
  configuration.
69
69
  """
70
70
 
71
- return get_cuda_paths()['include_dir'].info
71
+ return get_cuda_paths()["include_dir"].info
72
72
 
73
73
 
74
74
  def check_cuda_include_dir(path):
@@ -86,39 +86,40 @@ def open_cudalib(lib):
86
86
 
87
87
  def check_static_lib(path):
88
88
  if not os.path.isfile(path):
89
- raise FileNotFoundError(f'{path} not found')
89
+ raise FileNotFoundError(f"{path} not found")
90
90
 
91
91
 
92
92
  def _get_source_variable(lib, static=False):
93
- if lib == 'nvvm':
94
- return get_cuda_paths()['nvvm'].by
95
- elif lib == 'libdevice':
96
- return get_cuda_paths()['libdevice'].by
97
- elif lib == 'include_dir':
98
- return get_cuda_paths()['include_dir'].by
93
+ if lib == "nvvm":
94
+ return get_cuda_paths()["nvvm"].by
95
+ elif lib == "nvrtc":
96
+ return get_cuda_paths()["nvrtc"].by
97
+ elif lib == "libdevice":
98
+ return get_cuda_paths()["libdevice"].by
99
+ elif lib == "include_dir":
100
+ return get_cuda_paths()["include_dir"].by
99
101
  else:
100
- dir_type = 'static_cudalib_dir' if static else 'cudalib_dir'
102
+ dir_type = "static_cudalib_dir" if static else "cudalib_dir"
101
103
  return get_cuda_paths()[dir_type].by
102
104
 
103
105
 
104
106
  def test():
105
- """Test library lookup. Path info is printed to stdout.
106
- """
107
+ """Test library lookup. Path info is printed to stdout."""
107
108
  failed = False
108
109
 
109
110
  # Check for the driver
110
111
  try:
111
112
  dlloader, candidates = locate_driver_and_loader()
112
- print('Finding driver from candidates:')
113
+ print("Finding driver from candidates:")
113
114
  for location in candidates:
114
- print(f'\t{location}')
115
- print(f'Using loader {dlloader}')
116
- print('\tTrying to load driver', end='...')
115
+ print(f"\t{location}")
116
+ print(f"Using loader {dlloader}")
117
+ print("\tTrying to load driver", end="...")
117
118
  dll, path = load_driver(dlloader, candidates)
118
- print('\tok')
119
- print(f'\t\tLoaded from {path}')
119
+ print("\tok")
120
+ print(f"\t\tLoaded from {path}")
120
121
  except CudaSupportError as e:
121
- print(f'\tERROR: failed to open driver: {e}')
122
+ print(f"\tERROR: failed to open driver: {e}")
122
123
  failed = True
123
124
 
124
125
  # Find the absolute location of the driver on Linux. Various driver-related
@@ -127,9 +128,9 @@ def test():
127
128
  # Providing the absolute location of the driver indicates its version
128
129
  # number in the soname (e.g. "libcuda.so.530.30.02"), which can be used to
129
130
  # look up whether the driver was intended for "native" Linux.
130
- if sys.platform == 'linux' and not failed:
131
+ if sys.platform == "linux" and not failed:
131
132
  pid = os.getpid()
132
- mapsfile = os.path.join(os.path.sep, 'proc', f'{pid}', 'maps')
133
+ mapsfile = os.path.join(os.path.sep, "proc", f"{pid}", "maps")
133
134
  try:
134
135
  with open(mapsfile) as f:
135
136
  maps = f.read()
@@ -140,58 +141,61 @@ def test():
140
141
  # It's helpful to report that this went wrong to the user, but we
141
142
  # don't set failed to True because this doesn't have any connection
142
143
  # to actual CUDA functionality.
143
- print(f'\tERROR: Could not open {mapsfile} to determine absolute '
144
- 'path to libcuda.so')
144
+ print(
145
+ f"\tERROR: Could not open {mapsfile} to determine absolute "
146
+ "path to libcuda.so"
147
+ )
145
148
  else:
146
149
  # In this case we could read the maps, so we can report the
147
150
  # relevant ones to the user
148
- locations = set(s for s in maps.split() if 'libcuda.so' in s)
149
- print('\tMapped libcuda.so paths:')
151
+ locations = set(s for s in maps.split() if "libcuda.so" in s)
152
+ print("\tMapped libcuda.so paths:")
150
153
  for location in locations:
151
- print(f'\t\t{location}')
154
+ print(f"\t\t{location}")
152
155
 
153
156
  # Checks for dynamic libraries
154
- libs = 'nvvm nvrtc cudart'.split()
157
+ libs = "nvvm nvrtc cudart".split()
155
158
  for lib in libs:
156
159
  path = get_cudalib(lib)
157
- print('Finding {} from {}'.format(lib, _get_source_variable(lib)))
158
- print('\tLocated at', path)
160
+ print("Finding {} from {}".format(lib, _get_source_variable(lib)))
161
+ print("\tLocated at", path)
159
162
 
160
163
  try:
161
- print('\tTrying to open library', end='...')
164
+ print("\tTrying to open library", end="...")
162
165
  open_cudalib(lib)
163
- print('\tok')
166
+ print("\tok")
164
167
  except OSError as e:
165
- print('\tERROR: failed to open %s:\n%s' % (lib, e))
168
+ print("\tERROR: failed to open %s:\n%s" % (lib, e))
166
169
  failed = True
167
170
 
168
171
  # Check for cudadevrt (the only static library)
169
- lib = 'cudadevrt'
172
+ lib = "cudadevrt"
170
173
  path = get_cudalib(lib, static=True)
171
- print('Finding {} from {}'.format(lib, _get_source_variable(lib,
172
- static=True)))
173
- print('\tLocated at', path)
174
+ print(
175
+ "Finding {} from {}".format(lib, _get_source_variable(lib, static=True))
176
+ )
177
+ print("\tLocated at", path)
174
178
 
175
179
  try:
176
- print('\tChecking library', end='...')
180
+ print("\tChecking library", end="...")
177
181
  check_static_lib(path)
178
- print('\tok')
182
+ print("\tok")
179
183
  except FileNotFoundError as e:
180
- print('\tERROR: failed to find %s:\n%s' % (lib, e))
184
+ print("\tERROR: failed to find %s:\n%s" % (lib, e))
181
185
  failed = True
182
186
 
183
187
  # Check for libdevice
184
- where = _get_source_variable('libdevice')
185
- print(f'Finding libdevice from {where}')
188
+ where = _get_source_variable("libdevice")
189
+ print(f"Finding libdevice from {where}")
186
190
  path = get_libdevice()
187
- print('\tLocated at', path)
191
+ print("\tLocated at", path)
188
192
 
189
193
  try:
190
- print('\tChecking library', end='...')
194
+ print("\tChecking library", end="...")
191
195
  check_static_lib(path)
192
- print('\tok')
196
+ print("\tok")
193
197
  except FileNotFoundError as e:
194
- print('\tERROR: failed to find %s:\n%s' % (lib, e))
198
+ print("\tERROR: failed to find %s:\n%s" % (lib, e))
195
199
  failed = True
196
200
 
197
201
  # Check cuda include paths
@@ -199,16 +203,16 @@ def test():
199
203
  print("Include directory configuration variable:")
200
204
  print(f"\tCUDA_INCLUDE_PATH={config.CUDA_INCLUDE_PATH}")
201
205
 
202
- where = _get_source_variable('include_dir')
203
- print(f'Finding include directory from {where}')
206
+ where = _get_source_variable("include_dir")
207
+ print(f"Finding include directory from {where}")
204
208
  include = get_cuda_include_dir()
205
- print('\tLocated at', include)
209
+ print("\tLocated at", include)
206
210
  try:
207
- print('\tChecking include directory', end='...')
211
+ print("\tChecking include directory", end="...")
208
212
  check_cuda_include_dir(include)
209
- print('\tok')
213
+ print("\tok")
210
214
  except FileNotFoundError as e:
211
- print('\tERROR: failed to find cuda include directory:\n%s' % e)
215
+ print("\tERROR: failed to find cuda include directory:\n%s" % e)
212
216
  failed = True
213
217
 
214
218
  return not failed
@@ -7,11 +7,26 @@ class LinkableCode:
7
7
  :param data: A buffer containing the data to link.
8
8
  :param name: The name of the file to be referenced in any compilation or
9
9
  linking errors that may be produced.
10
+ :param setup_callback: A function called prior to the launch of a kernel
11
+ contained within a module that has this code object
12
+ linked into it.
13
+ :param teardown_callback: A function called just prior to the unloading of
14
+ a module that has this code object linked into
15
+ it.
10
16
  """
11
17
 
12
- def __init__(self, data, name=None):
18
+ def __init__(
19
+ self, data, name=None, setup_callback=None, teardown_callback=None
20
+ ):
21
+ if setup_callback and not callable(setup_callback):
22
+ raise TypeError("setup_callback must be callable")
23
+ if teardown_callback and not callable(teardown_callback):
24
+ raise TypeError("teardown_callback must be callable")
25
+
13
26
  self.data = data
14
27
  self._name = name
28
+ self.setup_callback = setup_callback
29
+ self.teardown_callback = teardown_callback
15
30
 
16
31
  @property
17
32
  def name(self):
@@ -1,24 +1,26 @@
1
1
  from numba import config
2
2
  from . import enums
3
+
3
4
  if config.CUDA_USE_NVIDIA_BINDING:
4
5
  from cuda import cuda
6
+
5
7
  jitty = cuda.CUjitInputType
6
8
  FILE_EXTENSION_MAP = {
7
- 'o': jitty.CU_JIT_INPUT_OBJECT,
8
- 'ptx': jitty.CU_JIT_INPUT_PTX,
9
- 'a': jitty.CU_JIT_INPUT_LIBRARY,
10
- 'lib': jitty.CU_JIT_INPUT_LIBRARY,
11
- 'cubin': jitty.CU_JIT_INPUT_CUBIN,
12
- 'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
13
- 'ltoir': jitty.CU_JIT_INPUT_NVVM,
9
+ "o": jitty.CU_JIT_INPUT_OBJECT,
10
+ "ptx": jitty.CU_JIT_INPUT_PTX,
11
+ "a": jitty.CU_JIT_INPUT_LIBRARY,
12
+ "lib": jitty.CU_JIT_INPUT_LIBRARY,
13
+ "cubin": jitty.CU_JIT_INPUT_CUBIN,
14
+ "fatbin": jitty.CU_JIT_INPUT_FATBINARY,
15
+ "ltoir": jitty.CU_JIT_INPUT_NVVM,
14
16
  }
15
17
  else:
16
18
  FILE_EXTENSION_MAP = {
17
- 'o': enums.CU_JIT_INPUT_OBJECT,
18
- 'ptx': enums.CU_JIT_INPUT_PTX,
19
- 'a': enums.CU_JIT_INPUT_LIBRARY,
20
- 'lib': enums.CU_JIT_INPUT_LIBRARY,
21
- 'cubin': enums.CU_JIT_INPUT_CUBIN,
22
- 'fatbin': enums.CU_JIT_INPUT_FATBINARY,
23
- 'ltoir': enums.CU_JIT_INPUT_NVVM,
19
+ "o": enums.CU_JIT_INPUT_OBJECT,
20
+ "ptx": enums.CU_JIT_INPUT_PTX,
21
+ "a": enums.CU_JIT_INPUT_LIBRARY,
22
+ "lib": enums.CU_JIT_INPUT_LIBRARY,
23
+ "cubin": enums.CU_JIT_INPUT_CUBIN,
24
+ "fatbin": enums.CU_JIT_INPUT_FATBINARY,
25
+ "ltoir": enums.CU_JIT_INPUT_NVVM,
24
26
  }