numba-cuda 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  """
2
2
  This scripts specifies all PTX special objects.
3
3
  """
4
+
4
5
  import numpy as np
5
6
  from collections import defaultdict
6
7
  import functools
@@ -9,12 +10,13 @@ from inspect import Signature, Parameter
9
10
 
10
11
 
11
12
  class Stub(object):
12
- '''
13
+ """
13
14
  A stub object to represent special objects that are meaningless
14
15
  outside the context of a CUDA kernel
15
- '''
16
- _description_ = '<ptx special value>'
17
- __slots__ = () # don't allocate __dict__
16
+ """
17
+
18
+ _description_ = "<ptx special value>"
19
+ __slots__ = () # don't allocate __dict__
18
20
 
19
21
  def __new__(cls):
20
22
  raise NotImplementedError("%s is not instantiable" % cls)
@@ -24,23 +26,26 @@ class Stub(object):
24
26
 
25
27
 
26
28
  def stub_function(fn):
27
- '''
29
+ """
28
30
  A stub function to represent special functions that are meaningless
29
31
  outside the context of a CUDA kernel
30
- '''
32
+ """
33
+
31
34
  @functools.wraps(fn)
32
35
  def wrapped(*args, **kwargs):
33
36
  raise NotImplementedError("%s cannot be called from host code" % fn)
37
+
34
38
  return wrapped
35
39
 
36
40
 
37
- #-------------------------------------------------------------------------------
41
+ # -------------------------------------------------------------------------------
38
42
  # Thread and grid indices and dimensions
39
43
 
40
44
 
41
45
  class Dim3(Stub):
42
- '''A triple, (x, y, z)'''
43
- _description_ = '<Dim3>'
46
+ """A triple, (x, y, z)"""
47
+
48
+ _description_ = "<Dim3>"
44
49
 
45
50
  @property
46
51
  def x(self):
@@ -56,68 +61,76 @@ class Dim3(Stub):
56
61
 
57
62
 
58
63
  class threadIdx(Dim3):
59
- '''
64
+ """
60
65
  The thread indices in the current thread block. Each index is an integer
61
66
  spanning the range from 0 inclusive to the corresponding value of the
62
67
  attribute in :attr:`numba.cuda.blockDim` exclusive.
63
- '''
64
- _description_ = '<threadIdx.{x,y,z}>'
68
+ """
69
+
70
+ _description_ = "<threadIdx.{x,y,z}>"
65
71
 
66
72
 
67
73
  class blockIdx(Dim3):
68
- '''
74
+ """
69
75
  The block indices in the grid of thread blocks. Each index is an integer
70
76
  spanning the range from 0 inclusive to the corresponding value of the
71
77
  attribute in :attr:`numba.cuda.gridDim` exclusive.
72
- '''
73
- _description_ = '<blockIdx.{x,y,z}>'
78
+ """
79
+
80
+ _description_ = "<blockIdx.{x,y,z}>"
74
81
 
75
82
 
76
83
  class blockDim(Dim3):
77
- '''
84
+ """
78
85
  The shape of a block of threads, as declared when instantiating the kernel.
79
86
  This value is the same for all threads in a given kernel launch, even if
80
87
  they belong to different blocks (i.e. each block is "full").
81
- '''
82
- _description_ = '<blockDim.{x,y,z}>'
88
+ """
89
+
90
+ _description_ = "<blockDim.{x,y,z}>"
83
91
 
84
92
 
85
93
  class gridDim(Dim3):
86
- '''
94
+ """
87
95
  The shape of the grid of blocks. This value is the same for all threads in
88
96
  a given kernel launch.
89
- '''
90
- _description_ = '<gridDim.{x,y,z}>'
97
+ """
98
+
99
+ _description_ = "<gridDim.{x,y,z}>"
91
100
 
92
101
 
93
102
  class warpsize(Stub):
94
- '''
103
+ """
95
104
  The size of a warp. All architectures implemented to date have a warp size
96
105
  of 32.
97
- '''
98
- _description_ = '<warpsize>'
106
+ """
107
+
108
+ _description_ = "<warpsize>"
99
109
 
100
110
 
101
111
  class laneid(Stub):
102
- '''
112
+ """
103
113
  This thread's lane within a warp. Ranges from 0 to
104
114
  :attr:`numba.cuda.warpsize` - 1.
105
- '''
106
- _description_ = '<laneid>'
115
+ """
116
+
117
+ _description_ = "<laneid>"
107
118
 
108
119
 
109
- #-------------------------------------------------------------------------------
120
+ # -------------------------------------------------------------------------------
110
121
  # Array creation
111
122
 
123
+
112
124
  class shared(Stub):
113
- '''
125
+ """
114
126
  Shared memory namespace
115
- '''
116
- _description_ = '<shared>'
127
+ """
128
+
129
+ _description_ = "<shared>"
117
130
 
118
131
  @stub_function
119
132
  def array(shape, dtype):
120
- '''
133
+ """
121
134
  Allocate a shared array of the given *shape* and *type*. *shape* is
122
135
  either an integer or a tuple of integers representing the array's
123
136
  dimensions. *type* is a :ref:`Numba type <numba-types>` of the
@@ -125,83 +138,89 @@ class shared(Stub):
125
138
 
126
139
  The returned array-like object can be read and written to like any
127
140
  normal device array (e.g. through indexing).
128
- '''
141
+ """
129
142
 
130
143
 
131
144
  class local(Stub):
132
- '''
145
+ """
133
146
  Local memory namespace
134
- '''
135
- _description_ = '<local>'
147
+ """
148
+
149
+ _description_ = "<local>"
136
150
 
137
151
  @stub_function
138
152
  def array(shape, dtype):
139
- '''
153
+ """
140
154
  Allocate a local array of the given *shape* and *type*. The array is
141
155
  private to the current thread, and resides in global memory. An
142
156
  array-like object is returned which can be read and written to like any
143
157
  standard array (e.g. through indexing).
144
- '''
158
+ """
145
159
 
146
160
 
147
161
  class const(Stub):
148
- '''
162
+ """
149
163
  Constant memory namespace
150
- '''
164
+ """
151
165
 
152
166
  @stub_function
153
167
  def array_like(ndarray):
154
- '''
168
+ """
155
169
  Create a const array from *ndarry*. The resulting const array will have
156
170
  the same shape, type, and values as *ndarray*.
157
- '''
171
+ """
158
172
 
159
173
 
160
174
  # -------------------------------------------------------------------------------
161
175
  # warp level operations
162
176
 
177
+
163
178
  class syncwarp(Stub):
164
- '''
179
+ """
165
180
  syncwarp(mask=0xFFFFFFFF)
166
181
 
167
182
  Synchronizes a masked subset of threads in a warp.
168
- '''
169
- _description_ = '<warp_sync()>'
183
+ """
184
+
185
+ _description_ = "<warp_sync()>"
170
186
 
171
187
 
172
188
  class shfl_sync_intrinsic(Stub):
173
- '''
189
+ """
174
190
  shfl_sync_intrinsic(mask, mode, value, mode_offset, clamp)
175
191
 
176
192
  Nvvm intrinsic for shuffling data across a warp
177
193
  docs.nvidia.com/cuda/nvvm-ir-spec/index.html#nvvm-intrin-warp-level-datamove
178
- '''
179
- _description_ = '<shfl_sync()>'
194
+ """
195
+
196
+ _description_ = "<shfl_sync()>"
180
197
 
181
198
 
182
199
  class vote_sync_intrinsic(Stub):
183
- '''
200
+ """
184
201
  vote_sync_intrinsic(mask, mode, predictate)
185
202
 
186
203
  Nvvm intrinsic for performing a reduce and broadcast across a warp
187
204
  docs.nvidia.com/cuda/nvvm-ir-spec/index.html#nvvm-intrin-warp-level-vote
188
- '''
189
- _description_ = '<vote_sync()>'
205
+ """
206
+
207
+ _description_ = "<vote_sync()>"
190
208
 
191
209
 
192
210
  class match_any_sync(Stub):
193
- '''
211
+ """
194
212
  match_any_sync(mask, value)
195
213
 
196
214
  Nvvm intrinsic for performing a compare and broadcast across a warp.
197
215
  Returns a mask of threads that have same value as the given value from
198
216
  within the masked warp.
199
- '''
200
- _description_ = '<match_any_sync()>'
217
+ """
218
+
219
+ _description_ = "<match_any_sync()>"
201
220
 
202
221
 
203
222
  class match_all_sync(Stub):
204
- '''
223
+ """
205
224
  match_all_sync(mask, value)
206
225
 
207
226
  Nvvm intrinsic for performing a compare and broadcast across a warp.
@@ -209,12 +228,13 @@ class match_all_sync(Stub):
209
228
  same value as the given value from within the masked warp, if they
210
229
  all have the same value, otherwise it is 0. Pred is a boolean of whether
211
230
  or not all threads in the mask warp have the same warp.
212
- '''
213
- _description_ = '<match_all_sync()>'
231
+ """
232
+
233
+ _description_ = "<match_all_sync()>"
214
234
 
215
235
 
216
236
  class activemask(Stub):
217
- '''
237
+ """
218
238
  activemask()
219
239
 
220
240
  Returns a 32-bit integer mask of all currently active threads in the
@@ -222,47 +242,54 @@ class activemask(Stub):
222
242
  activemask() is called. Inactive threads are represented by 0 bits in the
223
243
  returned mask. Threads which have exited the kernel are always marked as
224
244
  inactive.
225
- '''
226
- _description_ = '<activemask()>'
245
+ """
246
+
247
+ _description_ = "<activemask()>"
227
248
 
228
249
 
229
250
  class lanemask_lt(Stub):
230
- '''
251
+ """
231
252
  lanemask_lt()
232
253
 
233
254
  Returns a 32-bit integer mask of all lanes (including inactive ones) with
234
255
  ID less than the current lane.
235
- '''
236
- _description_ = '<lanemask_lt()>'
256
+ """
257
+
258
+ _description_ = "<lanemask_lt()>"
237
259
 
238
260
 
239
261
  # -------------------------------------------------------------------------------
240
262
  # memory fences
241
263
 
264
+
242
265
  class threadfence_block(Stub):
243
- '''
266
+ """
244
267
  A memory fence at thread block level
245
- '''
246
- _description_ = '<threadfence_block()>'
268
+ """
269
+
270
+ _description_ = "<threadfence_block()>"
247
271
 
248
272
 
249
273
  class threadfence_system(Stub):
250
- '''
274
+ """
251
275
  A memory fence at system level: across devices
252
- '''
253
- _description_ = '<threadfence_system()>'
276
+ """
277
+
278
+ _description_ = "<threadfence_system()>"
254
279
 
255
280
 
256
281
  class threadfence(Stub):
257
- '''
282
+ """
258
283
  A memory fence at device level
259
- '''
260
- _description_ = '<threadfence()>'
284
+ """
285
+
286
+ _description_ = "<threadfence()>"
261
287
 
262
288
 
263
- #-------------------------------------------------------------------------------
289
+ # -------------------------------------------------------------------------------
264
290
  # bit manipulation
265
291
 
292
+
266
293
  class popc(Stub):
267
294
  """
268
295
  popc(x)
@@ -297,9 +324,10 @@ class ffs(Stub):
297
324
  """
298
325
 
299
326
 
300
- #-------------------------------------------------------------------------------
327
+ # -------------------------------------------------------------------------------
301
328
  # comparison and selection instructions
302
329
 
330
+
303
331
  class selp(Stub):
304
332
  """
305
333
  selp(a, b, c)
@@ -309,9 +337,10 @@ class selp(Stub):
309
337
  """
310
338
 
311
339
 
312
- #-------------------------------------------------------------------------------
340
+ # -------------------------------------------------------------------------------
313
341
  # single / double precision arithmetic
314
342
 
343
+
315
344
  class fma(Stub):
316
345
  """
317
346
  fma(a, b, c)
@@ -321,20 +350,21 @@ class fma(Stub):
321
350
 
322
351
 
323
352
  class cbrt(Stub):
324
- """"
353
+ """ "
325
354
  cbrt(a)
326
355
 
327
356
  Perform the cube root operation.
328
357
  """
329
358
 
330
359
 
331
- #-------------------------------------------------------------------------------
360
+ # -------------------------------------------------------------------------------
332
361
  # atomic
333
362
 
363
+
334
364
  class atomic(Stub):
335
- """Namespace for atomic operations
336
- """
337
- _description_ = '<atomic>'
365
+ """Namespace for atomic operations"""
366
+
367
+ _description_ = "<atomic>"
338
368
 
339
369
  class add(Stub):
340
370
  """add(ary, idx, val)
@@ -401,8 +431,7 @@ class atomic(Stub):
401
431
 
402
432
  Performs::
403
433
 
404
- ary[idx] = (val if (ary[idx] == 0) or
405
- (ary[idx] > val) else ary[idx] - 1)
434
+ ary[idx] = val if (ary[idx] == 0) or (ary[idx] > val) else ary[idx] - 1
406
435
 
407
436
  Supported on uint32, and uint64 operands only.
408
437
 
@@ -497,26 +526,29 @@ class atomic(Stub):
497
526
  """
498
527
 
499
528
 
500
- #-------------------------------------------------------------------------------
529
+ # -------------------------------------------------------------------------------
501
530
  # timers
502
531
 
532
+
503
533
  class nanosleep(Stub):
504
- '''
534
+ """
505
535
  nanosleep(ns)
506
536
 
507
537
  Suspends the thread for a sleep duration approximately close to the delay
508
538
  `ns`, specified in nanoseconds.
509
- '''
510
- _description_ = '<nansleep()>'
539
+ """
540
+
541
+ _description_ = "<nansleep()>"
511
542
 
512
- #-------------------------------------------------------------------------------
543
+
544
+ # -------------------------------------------------------------------------------
513
545
  # Floating point 16
514
546
 
515
547
 
516
548
  class fp16(Stub):
517
- """Namespace for fp16 operations
518
- """
519
- _description_ = '<fp16>'
549
+ """Namespace for fp16 operations"""
550
+
551
+ _description_ = "<fp16>"
520
552
 
521
553
  class hadd(Stub):
522
554
  """hadd(a, b)
@@ -817,9 +849,10 @@ class fp16(Stub):
817
849
  """
818
850
 
819
851
 
820
- #-------------------------------------------------------------------------------
852
+ # -------------------------------------------------------------------------------
821
853
  # vector types
822
854
 
855
+
823
856
  def make_vector_type_stubs():
824
857
  """Make user facing objects for vector types"""
825
858
  vector_type_stubs = []
@@ -833,7 +866,7 @@ def make_vector_type_stubs():
833
866
  "uint32",
834
867
  "uint64",
835
868
  "float32",
836
- "float64"
869
+ "float64",
837
870
  )
838
871
  vector_type_element_counts = (1, 2, 3, 4)
839
872
  vector_type_attribute_names = ("x", "y", "z", "w")
@@ -845,21 +878,25 @@ def make_vector_type_stubs():
845
878
  attr_names = vector_type_attribute_names[:nelem]
846
879
 
847
880
  vector_type_stub = type(
848
- type_name, (Stub,),
881
+ type_name,
882
+ (Stub,),
849
883
  {
850
884
  **{attr: lambda self: None for attr in attr_names},
851
885
  **{
852
886
  "_description_": f"<{type_name}>",
853
- "__signature__": Signature(parameters=[
854
- Parameter(
855
- name=attr_name, kind=Parameter.POSITIONAL_ONLY
856
- ) for attr_name in attr_names[:nelem]
857
- ]),
887
+ "__signature__": Signature(
888
+ parameters=[
889
+ Parameter(
890
+ name=attr_name, kind=Parameter.POSITIONAL_ONLY
891
+ )
892
+ for attr_name in attr_names[:nelem]
893
+ ]
894
+ ),
858
895
  "__doc__": f"A stub for {type_name} to be used in "
859
- "CUDA kernels."
896
+ "CUDA kernels.",
860
897
  },
861
- **{"aliases": []}
862
- }
898
+ **{"aliases": []},
899
+ },
863
900
  )
864
901
  vector_type_stubs.append(vector_type_stub)
865
902
  return vector_type_stubs
@@ -884,7 +921,7 @@ def map_vector_type_stubs_to_alias(vector_type_stubs):
884
921
  "ulong": f"uint{np.dtype(np.uint).itemsize * 8}",
885
922
  "ulonglong": f"uint{np.dtype(np.ulonglong).itemsize * 8}",
886
923
  "float": f"float{np.dtype(np.single).itemsize * 8}",
887
- "double": f"float{np.dtype(np.double).itemsize * 8}"
924
+ "double": f"float{np.dtype(np.double).itemsize * 8}",
888
925
  }
889
926
 
890
927
  base_type_to_vector_type = defaultdict(list)