numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.0.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
- '''
1
+ """
2
2
  Contains CUDA API functions
3
- '''
3
+ """
4
4
 
5
5
  # Imports here bring together parts of the API from other modules, so some of
6
6
  # them appear unused.
@@ -15,7 +15,7 @@ from ..args import In, Out, InOut # noqa: F401
15
15
 
16
16
 
17
17
  def select_device(dev=0):
18
- assert dev == 0, 'Only a single device supported by the simulator'
18
+ assert dev == 0, "Only a single device supported by the simulator"
19
19
 
20
20
 
21
21
  def is_float16_supported():
@@ -23,10 +23,11 @@ def is_float16_supported():
23
23
 
24
24
 
25
25
  class stream(object):
26
- '''
26
+ """
27
27
  The stream API is supported in the simulator - however, all execution
28
28
  occurs synchronously, so synchronization requires no operation.
29
- '''
29
+ """
30
+
30
31
  @contextmanager
31
32
  def auto_synchronize(self):
32
33
  yield
@@ -62,9 +63,9 @@ def declare_device(*args, **kwargs):
62
63
 
63
64
 
64
65
  def detect():
65
- print('Found 1 CUDA devices')
66
- print('id %d %20s %40s' % (0, 'SIMULATOR', '[SUPPORTED]'))
67
- print('%40s: 5.0' % 'compute capability')
66
+ print("Found 1 CUDA devices")
67
+ print("id %d %20s %40s" % (0, "SIMULATOR", "[SUPPORTED]"))
68
+ print("%40s: 5.0" % "compute capability")
68
69
 
69
70
 
70
71
  def list_devices():
@@ -73,11 +74,13 @@ def list_devices():
73
74
 
74
75
  # Events
75
76
 
77
+
76
78
  class Event(object):
77
- '''
79
+ """
78
80
  The simulator supports the event API, but they do not record timing info,
79
81
  and all simulation is synchronous. Execution time is not recorded.
80
- '''
82
+ """
83
+
81
84
  def record(self, stream=0):
82
85
  pass
83
86
 
@@ -88,35 +91,48 @@ class Event(object):
88
91
  pass
89
92
 
90
93
  def elapsed_time(self, event):
91
- warn('Simulator timings are bogus')
94
+ warn("Simulator timings are bogus")
92
95
  return 0.0
93
96
 
94
97
 
95
98
  event = Event
96
99
 
97
100
 
98
- def jit(func_or_sig=None, device=False, debug=None, argtypes=None,
99
- inline=False, restype=None, fastmath=False, link=None,
100
- boundscheck=None, opt=None, cache=None
101
- ):
101
+ def jit(
102
+ func_or_sig=None,
103
+ device=False,
104
+ debug=None,
105
+ argtypes=None,
106
+ inline=False,
107
+ restype=None,
108
+ fastmath=False,
109
+ link=None,
110
+ boundscheck=None,
111
+ opt=None,
112
+ cache=None,
113
+ ):
102
114
  # Here for API compatibility
103
115
  if boundscheck:
104
116
  raise NotImplementedError("bounds checking is not supported for CUDA")
105
117
 
106
118
  if link is not None:
107
- raise NotImplementedError('Cannot link PTX in the simulator')
119
+ raise NotImplementedError("Cannot link PTX in the simulator")
108
120
 
109
121
  debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
110
122
 
111
123
  # Check for first argument specifying types - in that case the
112
124
  # decorator is not being passed a function
113
- if (func_or_sig is None or is_signature(func_or_sig)
114
- or isinstance(func_or_sig, list)):
125
+ if (
126
+ func_or_sig is None
127
+ or is_signature(func_or_sig)
128
+ or isinstance(func_or_sig, list)
129
+ ):
130
+
115
131
  def jitwrapper(fn):
116
- return FakeCUDAKernel(fn,
117
- device=device,
118
- fastmath=fastmath,
119
- debug=debug)
132
+ return FakeCUDAKernel(
133
+ fn, device=device, fastmath=fastmath, debug=debug
134
+ )
135
+
120
136
  return jitwrapper
121
137
  return FakeCUDAKernel(func_or_sig, device=device, debug=debug)
122
138
 
@@ -1,7 +1,7 @@
1
- '''
1
+ """
2
2
  The compiler is not implemented in the simulator. This module provides a stub
3
3
  to allow tests to import successfully.
4
- '''
4
+ """
5
5
 
6
6
  compile = None
7
7
  compile_for_current_device = None
@@ -1,2 +1,8 @@
1
- from numba.cuda.simulator.cudadrv import (devicearray, devices, driver, drvapi,
2
- error, nvvm)
1
+ from numba.cuda.simulator.cudadrv import (
2
+ devicearray,
3
+ devices,
4
+ driver,
5
+ drvapi,
6
+ error,
7
+ nvvm,
8
+ )
@@ -1,7 +1,8 @@
1
- '''
1
+ """
2
2
  The Device Array API is not implemented in the simulator. This module provides
3
3
  stubs to allow tests to import correctly.
4
- '''
4
+ """
5
+
5
6
  from contextlib import contextmanager
6
7
  from numba.np.numpy_support import numpy_version
7
8
 
@@ -12,37 +13,39 @@ DeviceRecord = None
12
13
  from_record_like = None
13
14
 
14
15
 
15
- errmsg_contiguous_buffer = ("Array contains non-contiguous buffer and cannot "
16
- "be transferred as a single memory region. Please "
17
- "ensure contiguous buffer with numpy "
18
- ".ascontiguousarray()")
16
+ errmsg_contiguous_buffer = (
17
+ "Array contains non-contiguous buffer and cannot "
18
+ "be transferred as a single memory region. Please "
19
+ "ensure contiguous buffer with numpy "
20
+ ".ascontiguousarray()"
21
+ )
19
22
 
20
23
 
21
24
  class FakeShape(tuple):
22
- '''
25
+ """
23
26
  The FakeShape class is used to provide a shape which does not allow negative
24
27
  indexing, similar to the shape in CUDA Python. (Numpy shape arrays allow
25
28
  negative indexing)
26
- '''
29
+ """
27
30
 
28
31
  def __getitem__(self, k):
29
32
  if isinstance(k, int) and k < 0:
30
- raise IndexError('tuple index out of range')
33
+ raise IndexError("tuple index out of range")
31
34
  return super(FakeShape, self).__getitem__(k)
32
35
 
33
36
 
34
37
  class FakeWithinKernelCUDAArray(object):
35
- '''
38
+ """
36
39
  Created to emulate the behavior of arrays within kernels, where either
37
40
  array.item or array['item'] is valid (that is, give all structured
38
41
  arrays `numpy.recarray`-like semantics). This behaviour does not follow
39
42
  the semantics of Python and NumPy with non-jitted code, and will be
40
43
  deprecated and removed.
41
- '''
44
+ """
42
45
 
43
46
  def __init__(self, item):
44
47
  assert isinstance(item, FakeCUDAArray)
45
- self.__dict__['_item'] = item
48
+ self.__dict__["_item"] = item
46
49
 
47
50
  def __wrap_if_fake(self, item):
48
51
  if isinstance(item, FakeCUDAArray):
@@ -84,18 +87,18 @@ class FakeWithinKernelCUDAArray(object):
84
87
 
85
88
  return obj
86
89
 
87
- out = kwargs.get('out')
90
+ out = kwargs.get("out")
88
91
  if out:
89
- kwargs['out'] = tuple(convert_fakes(o) for o in out)
92
+ kwargs["out"] = tuple(convert_fakes(o) for o in out)
90
93
  args = tuple(convert_fakes(a) for a in args)
91
94
  return call(*args, **kwargs)
92
95
 
93
96
 
94
97
  class FakeCUDAArray(object):
95
- '''
98
+ """
96
99
  Implements the interface of a DeviceArray/DeviceRecord, but mostly just
97
100
  wraps a NumPy array.
98
- '''
101
+ """
99
102
 
100
103
  __cuda_ndarray__ = True # There must be gpu_data attribute
101
104
 
@@ -149,13 +152,13 @@ class FakeCUDAArray(object):
149
152
  return ary
150
153
 
151
154
  def copy_to_device(self, ary, stream=0):
152
- '''
155
+ """
153
156
  Copy from the provided array into this array.
154
157
 
155
158
  This may be less forgiving than the CUDA Python implementation, which
156
159
  will copy data up to the length of the smallest of the two arrays,
157
160
  whereas this expects the size of the arrays to be equal.
158
- '''
161
+ """
159
162
  sentry_contiguous(self)
160
163
  self_core, ary_core = array_core(self), array_core(ary)
161
164
  if isinstance(ary, FakeCUDAArray):
@@ -164,9 +167,10 @@ class FakeCUDAArray(object):
164
167
  else:
165
168
  ary_core = np.array(
166
169
  ary_core,
167
- order='C' if self_core.flags['C_CONTIGUOUS'] else 'F',
170
+ order="C" if self_core.flags["C_CONTIGUOUS"] else "F",
168
171
  subok=True,
169
- copy=False if numpy_version < (2, 0) else None)
172
+ copy=False if numpy_version < (2, 0) else None,
173
+ )
170
174
  check_array_compatibility(self_core, ary_core)
171
175
  np.copyto(self_core._ary, ary_core)
172
176
 
@@ -237,7 +241,7 @@ class FakeCUDAArray(object):
237
241
  return FakeCUDAArray(self._ary % other)
238
242
 
239
243
  def __pow__(self, other):
240
- return FakeCUDAArray(self._ary ** other)
244
+ return FakeCUDAArray(self._ary**other)
241
245
 
242
246
  def split(self, section, stream=0):
243
247
  return [
@@ -282,30 +286,33 @@ def is_contiguous(ary):
282
286
 
283
287
  def sentry_contiguous(ary):
284
288
  core = array_core(ary)
285
- if not core.flags['C_CONTIGUOUS'] and not core.flags['F_CONTIGUOUS']:
289
+ if not core.flags["C_CONTIGUOUS"] and not core.flags["F_CONTIGUOUS"]:
286
290
  raise ValueError(errmsg_contiguous_buffer)
287
291
 
288
292
 
289
293
  def check_array_compatibility(ary1, ary2):
290
294
  ary1sq, ary2sq = ary1.squeeze(), ary2.squeeze()
291
295
  if ary1.dtype != ary2.dtype:
292
- raise TypeError('incompatible dtype: %s vs. %s' %
293
- (ary1.dtype, ary2.dtype))
296
+ raise TypeError(
297
+ "incompatible dtype: %s vs. %s" % (ary1.dtype, ary2.dtype)
298
+ )
294
299
  if ary1sq.shape != ary2sq.shape:
295
- raise ValueError('incompatible shape: %s vs. %s' %
296
- (ary1.shape, ary2.shape))
300
+ raise ValueError(
301
+ "incompatible shape: %s vs. %s" % (ary1.shape, ary2.shape)
302
+ )
297
303
  if ary1sq.strides != ary2sq.strides:
298
- raise ValueError('incompatible strides: %s vs. %s' %
299
- (ary1.strides, ary2.strides))
304
+ raise ValueError(
305
+ "incompatible strides: %s vs. %s" % (ary1.strides, ary2.strides)
306
+ )
300
307
 
301
308
 
302
309
  def to_device(ary, stream=0, copy=True, to=None):
303
- ary = np.array(ary,
304
- copy=False if numpy_version < (2, 0) else None,
305
- subok=True)
310
+ ary = np.array(
311
+ ary, copy=False if numpy_version < (2, 0) else None, subok=True
312
+ )
306
313
  sentry_contiguous(ary)
307
314
  if to is None:
308
- buffer_dtype = np.int64 if ary.dtype.char in 'Mm' else ary.dtype
315
+ buffer_dtype = np.int64 if ary.dtype.char in "Mm" else ary.dtype
309
316
  return FakeCUDAArray(
310
317
  np.ndarray(
311
318
  buffer=np.copy(array_core(ary)).view(buffer_dtype),
@@ -324,22 +331,22 @@ def pinned(arg):
324
331
 
325
332
 
326
333
  def mapped_array(*args, **kwargs):
327
- for unused_arg in ('portable', 'wc'):
334
+ for unused_arg in ("portable", "wc"):
328
335
  if unused_arg in kwargs:
329
336
  kwargs.pop(unused_arg)
330
337
  return device_array(*args, **kwargs)
331
338
 
332
339
 
333
- def pinned_array(shape, dtype=np.float64, strides=None, order='C'):
340
+ def pinned_array(shape, dtype=np.float64, strides=None, order="C"):
334
341
  return np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order)
335
342
 
336
343
 
337
- def managed_array(shape, dtype=np.float64, strides=None, order='C'):
344
+ def managed_array(shape, dtype=np.float64, strides=None, order="C"):
338
345
  return np.ndarray(shape=shape, strides=strides, dtype=dtype, order=order)
339
346
 
340
347
 
341
348
  def device_array(*args, **kwargs):
342
- stream = kwargs.pop('stream') if 'stream' in kwargs else 0
349
+ stream = kwargs.pop("stream") if "stream" in kwargs else 0
343
350
  return FakeCUDAArray(np.ndarray(*args, **kwargs), stream=stream)
344
351
 
345
352
 
@@ -350,7 +357,7 @@ def _contiguous_strides_like_array(ary):
350
357
  """
351
358
  # Don't recompute strides if the default strides will be sufficient to
352
359
  # create a contiguous array.
353
- if ary.flags['C_CONTIGUOUS'] or ary.flags['F_CONTIGUOUS'] or ary.ndim <= 1:
360
+ if ary.flags["C_CONTIGUOUS"] or ary.flags["F_CONTIGUOUS"] or ary.ndim <= 1:
354
361
  return None
355
362
 
356
363
  # Otherwise, we need to compute new strides using an algorithm adapted from
@@ -360,7 +367,7 @@ def _contiguous_strides_like_array(ary):
360
367
 
361
368
  # Stride permutation. E.g. a stride array (4, -2, 12) becomes
362
369
  # [(1, -2), (0, 4), (2, 12)]
363
- strideperm = [ x for x in enumerate(ary.strides) ]
370
+ strideperm = [x for x in enumerate(ary.strides)]
364
371
  strideperm.sort(key=lambda x: x[1])
365
372
 
366
373
  # Compute new strides using permutation
@@ -373,24 +380,26 @@ def _contiguous_strides_like_array(ary):
373
380
 
374
381
 
375
382
  def _order_like_array(ary):
376
- if ary.flags['F_CONTIGUOUS'] and not ary.flags['C_CONTIGUOUS']:
377
- return 'F'
383
+ if ary.flags["F_CONTIGUOUS"] and not ary.flags["C_CONTIGUOUS"]:
384
+ return "F"
378
385
  else:
379
- return 'C'
386
+ return "C"
380
387
 
381
388
 
382
389
  def device_array_like(ary, stream=0):
383
390
  strides = _contiguous_strides_like_array(ary)
384
391
  order = _order_like_array(ary)
385
- return device_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
386
- order=order)
392
+ return device_array(
393
+ shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
394
+ )
387
395
 
388
396
 
389
397
  def pinned_array_like(ary):
390
398
  strides = _contiguous_strides_like_array(ary)
391
399
  order = _order_like_array(ary)
392
- return pinned_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
393
- order=order)
400
+ return pinned_array(
401
+ shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
402
+ )
394
403
 
395
404
 
396
405
  def auto_device(ary, stream=0, copy=True):
@@ -399,15 +408,14 @@ def auto_device(ary, stream=0, copy=True):
399
408
 
400
409
  if not isinstance(ary, np.void):
401
410
  ary = np.array(
402
- ary,
403
- copy=False if numpy_version < (2, 0) else None,
404
- subok=True)
411
+ ary, copy=False if numpy_version < (2, 0) else None, subok=True
412
+ )
405
413
  return to_device(ary, stream, copy), True
406
414
 
407
415
 
408
416
  def is_cuda_ndarray(obj):
409
417
  "Check if an object is a CUDA ndarray"
410
- return getattr(obj, '__cuda_ndarray__', False)
418
+ return getattr(obj, "__cuda_ndarray__", False)
411
419
 
412
420
 
413
421
  def verify_cuda_ndarray_interface(obj):
@@ -418,15 +426,15 @@ def verify_cuda_ndarray_interface(obj):
418
426
  if not hasattr(obj, attr):
419
427
  raise AttributeError(attr)
420
428
  if not isinstance(getattr(obj, attr), typ):
421
- raise AttributeError('%s must be of type %s' % (attr, typ))
429
+ raise AttributeError("%s must be of type %s" % (attr, typ))
422
430
 
423
- requires_attr('shape', tuple)
424
- requires_attr('strides', tuple)
425
- requires_attr('dtype', np.dtype)
426
- requires_attr('size', int)
431
+ requires_attr("shape", tuple)
432
+ requires_attr("strides", tuple)
433
+ requires_attr("dtype", np.dtype)
434
+ requires_attr("size", int)
427
435
 
428
436
 
429
437
  def require_cuda_ndarray(obj):
430
438
  "Raises ValueError is is_cuda_ndarray(obj) evaluates False"
431
439
  if not is_cuda_ndarray(obj):
432
- raise ValueError('require an cuda ndarray object')
440
+ raise ValueError("require an cuda ndarray object")
@@ -8,7 +8,7 @@ _SIMULATOR_CC = (5, 2)
8
8
 
9
9
  class FakeCUDADevice:
10
10
  def __init__(self):
11
- self.uuid = 'GPU-00000000-0000-0000-0000-000000000000'
11
+ self.uuid = "GPU-00000000-0000-0000-0000-000000000000"
12
12
 
13
13
  @property
14
14
  def compute_capability(self):
@@ -16,10 +16,11 @@ class FakeCUDADevice:
16
16
 
17
17
 
18
18
  class FakeCUDAContext:
19
- '''
19
+ """
20
20
  This stub implements functionality only for simulating a single GPU
21
21
  at the moment.
22
- '''
22
+ """
23
+
23
24
  def __init__(self, device_id):
24
25
  self._device_id = device_id
25
26
  self._device = FakeCUDADevice()
@@ -54,7 +55,7 @@ class FakeCUDAContext:
54
55
  dependencies, e.g. `psutil` - so return infinite memory to maintain API
55
56
  type compatibility
56
57
  """
57
- return _MemoryInfo(float('inf'), float('inf'))
58
+ return _MemoryInfo(float("inf"), float("inf"))
58
59
 
59
60
  def memalloc(self, sz):
60
61
  """
@@ -62,19 +63,20 @@ class FakeCUDAContext:
62
63
  At present, there is no division between simulated
63
64
  host memory and simulated device memory.
64
65
  """
65
- return np.ndarray(sz, dtype='u1')
66
+ return np.ndarray(sz, dtype="u1")
66
67
 
67
68
  def memhostalloc(self, sz, mapped=False, portable=False, wc=False):
68
- '''Allocates memory on the host'''
69
+ """Allocates memory on the host"""
69
70
  return self.memalloc(sz)
70
71
 
71
72
 
72
73
  class FakeDeviceList:
73
- '''
74
+ """
74
75
  This stub implements a device list containing a single GPU. It also
75
76
  keeps track of the GPU status, i.e. whether the context is closed or not,
76
77
  which may have been set by the user calling reset()
77
- '''
78
+ """
79
+
78
80
  def __init__(self):
79
81
  self.lst = (FakeCUDAContext(0),)
80
82
  self.closed = False
@@ -84,7 +86,7 @@ class FakeDeviceList:
84
86
  return self.lst[devnum]
85
87
 
86
88
  def __str__(self):
87
- return ', '.join([str(d) for d in self.lst])
89
+ return ", ".join([str(d) for d in self.lst])
88
90
 
89
91
  def __iter__(self):
90
92
  return iter(self.lst)
@@ -111,7 +113,7 @@ def get_context(devnum=0):
111
113
 
112
114
 
113
115
  def require_context(func):
114
- '''
116
+ """
115
117
  In the simulator, a context is always "available", so this is a no-op.
116
- '''
118
+ """
117
119
  return func
@@ -1,15 +1,15 @@
1
- '''
1
+ """
2
2
  Most of the driver API is unsupported in the simulator, but some stubs are
3
3
  provided to allow tests to import correctly.
4
- '''
4
+ """
5
5
 
6
6
 
7
7
  def device_memset(dst, val, size, stream=0):
8
- dst.view('u1')[:size].fill(bytes([val])[0])
8
+ dst.view("u1")[:size].fill(bytes([val])[0])
9
9
 
10
10
 
11
11
  def host_to_device(dst, src, size, stream=0):
12
- dst.view('u1')[:size] = src.view('u1')[:size]
12
+ dst.view("u1")[:size] = src.view("u1")[:size]
13
13
 
14
14
 
15
15
  def device_to_host(dst, src, size, stream=0):
@@ -55,7 +55,7 @@ class CudaAPIError(RuntimeError):
55
55
 
56
56
 
57
57
  def launch_kernel(*args, **kwargs):
58
- msg = 'Launching kernels directly is not supported in the simulator'
58
+ msg = "Launching kernels directly is not supported in the simulator"
59
59
  raise RuntimeError(msg)
60
60
 
61
61
 
@@ -1,4 +1,4 @@
1
- '''
1
+ """
2
2
  drvapi is not implemented in the simulator, but this module exists to allow
3
3
  tests to import correctly.
4
- '''
4
+ """
@@ -1,2 +1,2 @@
1
1
  def check_static_lib(lib):
2
- raise FileNotFoundError('Linking libraries not supported by cudasim')
2
+ raise FileNotFoundError("Linking libraries not supported by cudasim")
@@ -1,7 +1,7 @@
1
- '''
1
+ """
2
2
  NVVM is not supported in the simulator, but stubs are provided to allow tests
3
3
  to import correctly.
4
- '''
4
+ """
5
5
 
6
6
 
7
7
  class NvvmSupportError(ImportError):
@@ -10,7 +10,7 @@ class NvvmSupportError(ImportError):
10
10
 
11
11
  class NVVM(object):
12
12
  def __init__(self):
13
- raise NvvmSupportError('NVVM not supported in the simulator')
13
+ raise NvvmSupportError("NVVM not supported in the simulator")
14
14
 
15
15
 
16
16
  CompilationUnit = None
@@ -1,7 +1,7 @@
1
- '''
1
+ """
2
2
  The runtime API is unsupported in the simulator, but some stubs are
3
3
  provided to allow tests to import correctly.
4
- '''
4
+ """
5
5
 
6
6
 
7
7
  class FakeRuntime(object):
@@ -13,7 +13,7 @@ class FakeRuntime(object):
13
13
 
14
14
  @property
15
15
  def supported_versions(self):
16
- return (-1, -1),
16
+ return ((-1, -1),)
17
17
 
18
18
 
19
19
  runtime = FakeRuntime()