numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.0.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -49,8 +49,9 @@ def libdevice_implement_multiple_returns(func, retty, prototype_args):
49
49
  for arg in prototype_args:
50
50
  if arg.is_ptr:
51
51
  # Allocate space for return value and add to args
52
- tmp_arg = cgutils.alloca_once(builder,
53
- context.get_value_type(arg.ty))
52
+ tmp_arg = cgutils.alloca_once(
53
+ builder, context.get_value_type(arg.ty)
54
+ )
54
55
  actual_args.append(tmp_arg)
55
56
  virtual_args.append(tmp_arg)
56
57
  else:
@@ -0,0 +1,16 @@
1
+ from threading import Lock
2
+ from functools import wraps
3
+
4
+ # Thread safety guard for module initialization.
5
+ _module_init_lock = Lock()
6
+
7
+
8
+ def module_init_lock(func):
9
+ """Decorator to make sure initialization is invoked once for all threads."""
10
+
11
+ @wraps(func)
12
+ def wrapper(*args, **kwargs):
13
+ with _module_init_lock:
14
+ return func(*args, **kwargs)
15
+
16
+ return wrapper
@@ -12,57 +12,57 @@ lower = registry.lower
12
12
 
13
13
 
14
14
  booleans = []
15
- booleans += [('isnand', 'isnanf', math.isnan)]
16
- booleans += [('isinfd', 'isinff', math.isinf)]
17
- booleans += [('isfinited', 'finitef', math.isfinite)]
15
+ booleans += [("isnand", "isnanf", math.isnan)]
16
+ booleans += [("isinfd", "isinff", math.isinf)]
17
+ booleans += [("isfinited", "finitef", math.isfinite)]
18
18
 
19
19
  unarys = []
20
- unarys += [('ceil', 'ceilf', math.ceil)]
21
- unarys += [('floor', 'floorf', math.floor)]
22
- unarys += [('fabs', 'fabsf', math.fabs)]
23
- unarys += [('exp', 'expf', math.exp)]
24
- unarys += [('expm1', 'expm1f', math.expm1)]
25
- unarys += [('erf', 'erff', math.erf)]
26
- unarys += [('erfc', 'erfcf', math.erfc)]
27
- unarys += [('tgamma', 'tgammaf', math.gamma)]
28
- unarys += [('lgamma', 'lgammaf', math.lgamma)]
29
- unarys += [('sqrt', 'sqrtf', math.sqrt)]
30
- unarys += [('log', 'logf', math.log)]
31
- unarys += [('log2', 'log2f', math.log2)]
32
- unarys += [('log10', 'log10f', math.log10)]
33
- unarys += [('log1p', 'log1pf', math.log1p)]
34
- unarys += [('acosh', 'acoshf', math.acosh)]
35
- unarys += [('acos', 'acosf', math.acos)]
36
- unarys += [('cos', 'cosf', math.cos)]
37
- unarys += [('cosh', 'coshf', math.cosh)]
38
- unarys += [('asinh', 'asinhf', math.asinh)]
39
- unarys += [('asin', 'asinf', math.asin)]
40
- unarys += [('sin', 'sinf', math.sin)]
41
- unarys += [('sinh', 'sinhf', math.sinh)]
42
- unarys += [('atan', 'atanf', math.atan)]
43
- unarys += [('atanh', 'atanhf', math.atanh)]
44
- unarys += [('tan', 'tanf', math.tan)]
45
- unarys += [('trunc', 'truncf', math.trunc)]
20
+ unarys += [("ceil", "ceilf", math.ceil)]
21
+ unarys += [("floor", "floorf", math.floor)]
22
+ unarys += [("fabs", "fabsf", math.fabs)]
23
+ unarys += [("exp", "expf", math.exp)]
24
+ unarys += [("expm1", "expm1f", math.expm1)]
25
+ unarys += [("erf", "erff", math.erf)]
26
+ unarys += [("erfc", "erfcf", math.erfc)]
27
+ unarys += [("tgamma", "tgammaf", math.gamma)]
28
+ unarys += [("lgamma", "lgammaf", math.lgamma)]
29
+ unarys += [("sqrt", "sqrtf", math.sqrt)]
30
+ unarys += [("log", "logf", math.log)]
31
+ unarys += [("log2", "log2f", math.log2)]
32
+ unarys += [("log10", "log10f", math.log10)]
33
+ unarys += [("log1p", "log1pf", math.log1p)]
34
+ unarys += [("acosh", "acoshf", math.acosh)]
35
+ unarys += [("acos", "acosf", math.acos)]
36
+ unarys += [("cos", "cosf", math.cos)]
37
+ unarys += [("cosh", "coshf", math.cosh)]
38
+ unarys += [("asinh", "asinhf", math.asinh)]
39
+ unarys += [("asin", "asinf", math.asin)]
40
+ unarys += [("sin", "sinf", math.sin)]
41
+ unarys += [("sinh", "sinhf", math.sinh)]
42
+ unarys += [("atan", "atanf", math.atan)]
43
+ unarys += [("atanh", "atanhf", math.atanh)]
44
+ unarys += [("tan", "tanf", math.tan)]
45
+ unarys += [("trunc", "truncf", math.trunc)]
46
46
 
47
47
  unarys_fastmath = {}
48
- unarys_fastmath['cosf'] = 'fast_cosf'
49
- unarys_fastmath['sinf'] = 'fast_sinf'
50
- unarys_fastmath['tanf'] = 'fast_tanf'
51
- unarys_fastmath['expf'] = 'fast_expf'
52
- unarys_fastmath['log2f'] = 'fast_log2f'
53
- unarys_fastmath['log10f'] = 'fast_log10f'
54
- unarys_fastmath['logf'] = 'fast_logf'
48
+ unarys_fastmath["cosf"] = "fast_cosf"
49
+ unarys_fastmath["sinf"] = "fast_sinf"
50
+ unarys_fastmath["tanf"] = "fast_tanf"
51
+ unarys_fastmath["expf"] = "fast_expf"
52
+ unarys_fastmath["log2f"] = "fast_log2f"
53
+ unarys_fastmath["log10f"] = "fast_log10f"
54
+ unarys_fastmath["logf"] = "fast_logf"
55
55
 
56
56
  binarys = []
57
- binarys += [('copysign', 'copysignf', math.copysign)]
58
- binarys += [('atan2', 'atan2f', math.atan2)]
59
- binarys += [('pow', 'powf', math.pow)]
60
- binarys += [('fmod', 'fmodf', math.fmod)]
61
- binarys += [('hypot', 'hypotf', math.hypot)]
62
- binarys += [('remainder', 'remainderf', math.remainder)]
57
+ binarys += [("copysign", "copysignf", math.copysign)]
58
+ binarys += [("atan2", "atan2f", math.atan2)]
59
+ binarys += [("pow", "powf", math.pow)]
60
+ binarys += [("fmod", "fmodf", math.fmod)]
61
+ binarys += [("hypot", "hypotf", math.hypot)]
62
+ binarys += [("remainder", "remainderf", math.remainder)]
63
63
 
64
64
  binarys_fastmath = {}
65
- binarys_fastmath['powf'] = 'fast_powf'
65
+ binarys_fastmath["powf"] = "fast_powf"
66
66
 
67
67
 
68
68
  @lower(math.isinf, types.Integer)
@@ -179,8 +179,9 @@ def fp16_trunc_impl(context, builder, sig, args):
179
179
 
180
180
  def impl_boolean(key, ty, libfunc):
181
181
  def lower_boolean_impl(context, builder, sig, args):
182
- libfunc_impl = context.get_function(libfunc,
183
- typing.signature(types.int32, ty))
182
+ libfunc_impl = context.get_function(
183
+ libfunc, typing.signature(types.int32, ty)
184
+ )
184
185
  result = libfunc_impl(builder, args)
185
186
  return context.cast(builder, result, types.int32, types.boolean)
186
187
 
@@ -197,9 +198,11 @@ def get_lower_unary_impl(key, ty, libfunc):
197
198
  if fast_replacement is not None:
198
199
  actual_libfunc = getattr(libdevice, fast_replacement)
199
200
 
200
- libfunc_impl = context.get_function(actual_libfunc,
201
- typing.signature(ty, ty))
201
+ libfunc_impl = context.get_function(
202
+ actual_libfunc, typing.signature(ty, ty)
203
+ )
202
204
  return libfunc_impl(builder, args)
205
+
203
206
  return lower_unary_impl
204
207
 
205
208
 
@@ -208,7 +211,7 @@ def get_unary_impl_for_fn_and_ty(fn, ty):
208
211
  # unary implementations, it does not appear in the unarys list. However,
209
212
  # its implementation can be looked up by key like the other
210
213
  # implementations, so we add it to the list we search here.
211
- tanh_impls = ('tanh', 'tanhf', math.tanh)
214
+ tanh_impls = ("tanh", "tanhf", math.tanh)
212
215
  for fname64, fname32, key in unarys + [tanh_impls]:
213
216
  if fn == key:
214
217
  if ty == float32:
@@ -233,7 +236,7 @@ def impl_unary_int(key, ty, libfunc):
233
236
  elif sig.args[0] == uint64:
234
237
  convert = builder.uitofp
235
238
  else:
236
- m = 'Only 64-bit integers are supported for generic unary int ops'
239
+ m = "Only 64-bit integers are supported for generic unary int ops"
237
240
  raise TypeError(m)
238
241
 
239
242
  arg = convert(args[0], ir.DoubleType())
@@ -254,9 +257,11 @@ def get_lower_binary_impl(key, ty, libfunc):
254
257
  if fast_replacement is not None:
255
258
  actual_libfunc = getattr(libdevice, fast_replacement)
256
259
 
257
- libfunc_impl = context.get_function(actual_libfunc,
258
- typing.signature(ty, ty, ty))
260
+ libfunc_impl = context.get_function(
261
+ actual_libfunc, typing.signature(ty, ty, ty)
262
+ )
259
263
  return libfunc_impl(builder, args)
264
+
260
265
  return lower_binary_impl
261
266
 
262
267
 
@@ -285,7 +290,7 @@ def impl_binary_int(key, ty, libfunc):
285
290
  elif sig.args[0] == uint64:
286
291
  convert = builder.uitofp
287
292
  else:
288
- m = 'Only 64-bit integers are supported for generic binary int ops'
293
+ m = "Only 64-bit integers are supported for generic binary int ops"
289
294
  raise TypeError(m)
290
295
 
291
296
  args = [convert(arg, ir.DoubleType()) for arg in args]
@@ -390,12 +395,12 @@ def impl_tanh(ty, libfunc):
390
395
 
391
396
  def tanhf_impl_fastmath():
392
397
  fnty = ir.FunctionType(ir.FloatType(), [ir.FloatType()])
393
- asm = ir.InlineAsm(fnty, 'tanh.approx.f32 $0, $1;', '=f,f')
398
+ asm = ir.InlineAsm(fnty, "tanh.approx.f32 $0, $1;", "=f,f")
394
399
  return builder.call(asm, args)
395
400
 
396
401
  if ty == float32 and context.fastmath:
397
402
  cc = get_compute_capability()
398
- if cc >= (7,5):
403
+ if cc >= (7, 5):
399
404
  return tanhf_impl_fastmath()
400
405
 
401
406
  return tanh_impl_libdevice()
@@ -420,7 +425,6 @@ impl_unary_int(math.tanh, uint64, libdevice.tanh)
420
425
  def cpow_implement(fty, cty):
421
426
  def core(context, builder, sig, args):
422
427
  def cpow_internal(a, b):
423
-
424
428
  if b.real == fty(0.0) and b.imag == fty(0.0):
425
429
  return cty(1.0) + cty(0.0j)
426
430
  elif a.real == fty(0.0) and b.real == fty(0.0):
@@ -434,8 +438,9 @@ def cpow_implement(fty, cty):
434
438
  len /= math.exp(at * b.imag)
435
439
  phase += b.imag * math.log(vabs)
436
440
 
437
- return len * (cty(math.cos(phase)) +
438
- cty(math.sin(phase) * cty(1.0j)))
441
+ return len * (
442
+ cty(math.cos(phase)) + cty(math.sin(phase) * cty(1.0j))
443
+ )
439
444
 
440
445
  return context.compile_internal(builder, cpow_internal, sig, args)
441
446
 
@@ -16,11 +16,7 @@ register_model = functools.partial(register, cuda_data_manager)
16
16
  @register_model(Dim3)
17
17
  class Dim3Model(models.StructModel):
18
18
  def __init__(self, dmm, fe_type):
19
- members = [
20
- ('x', types.int32),
21
- ('y', types.int32),
22
- ('z', types.int32)
23
- ]
19
+ members = [("x", types.int32), ("y", types.int32), ("z", types.int32)]
24
20
  super().__init__(dmm, fe_type, members)
25
21
 
26
22
 
@@ -5,159 +5,178 @@ from .cudadrv import nvvm
5
5
 
6
6
 
7
7
  def declare_atomic_cas_int(lmod, isize):
8
- fname = '___numba_atomic_i' + str(isize) + '_cas_hack'
9
- fnty = ir.FunctionType(ir.IntType(isize),
10
- (ir.PointerType(ir.IntType(isize)),
11
- ir.IntType(isize),
12
- ir.IntType(isize)))
8
+ fname = "___numba_atomic_i" + str(isize) + "_cas_hack"
9
+ fnty = ir.FunctionType(
10
+ ir.IntType(isize),
11
+ (
12
+ ir.PointerType(ir.IntType(isize)),
13
+ ir.IntType(isize),
14
+ ir.IntType(isize),
15
+ ),
16
+ )
13
17
  return cgutils.get_or_insert_function(lmod, fnty, fname)
14
18
 
15
19
 
16
20
  def atomic_cmpxchg(builder, lmod, isize, ptr, cmp, val):
17
- out = builder.cmpxchg(ptr, cmp, val, 'monotonic', 'monotonic')
21
+ out = builder.cmpxchg(ptr, cmp, val, "monotonic", "monotonic")
18
22
  return builder.extract_value(out, 0)
19
23
 
20
24
 
21
25
  def declare_atomic_add_float32(lmod):
22
- fname = 'llvm.nvvm.atomic.load.add.f32.p0f32'
23
- fnty = ir.FunctionType(ir.FloatType(),
24
- (ir.PointerType(ir.FloatType(), 0), ir.FloatType()))
26
+ fname = "llvm.nvvm.atomic.load.add.f32.p0f32"
27
+ fnty = ir.FunctionType(
28
+ ir.FloatType(), (ir.PointerType(ir.FloatType(), 0), ir.FloatType())
29
+ )
25
30
  return cgutils.get_or_insert_function(lmod, fnty, fname)
26
31
 
27
32
 
28
33
  def declare_atomic_add_float64(lmod):
29
34
  flags = targetconfig.ConfigStack().top()
30
35
  if flags.compute_capability >= (6, 0):
31
- fname = 'llvm.nvvm.atomic.load.add.f64.p0f64'
36
+ fname = "llvm.nvvm.atomic.load.add.f64.p0f64"
32
37
  else:
33
- fname = '___numba_atomic_double_add'
34
- fnty = ir.FunctionType(ir.DoubleType(),
35
- (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
38
+ fname = "___numba_atomic_double_add"
39
+ fnty = ir.FunctionType(
40
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
41
+ )
36
42
  return cgutils.get_or_insert_function(lmod, fnty, fname)
37
43
 
38
44
 
39
45
  def declare_atomic_sub_float32(lmod):
40
- fname = '___numba_atomic_float_sub'
41
- fnty = ir.FunctionType(ir.FloatType(),
42
- (ir.PointerType(ir.FloatType()), ir.FloatType()))
46
+ fname = "___numba_atomic_float_sub"
47
+ fnty = ir.FunctionType(
48
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
49
+ )
43
50
  return cgutils.get_or_insert_function(lmod, fnty, fname)
44
51
 
45
52
 
46
53
  def declare_atomic_sub_float64(lmod):
47
- fname = '___numba_atomic_double_sub'
48
- fnty = ir.FunctionType(ir.DoubleType(),
49
- (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
54
+ fname = "___numba_atomic_double_sub"
55
+ fnty = ir.FunctionType(
56
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
57
+ )
50
58
  return cgutils.get_or_insert_function(lmod, fnty, fname)
51
59
 
52
60
 
53
61
  def declare_atomic_inc_int32(lmod):
54
- fname = 'llvm.nvvm.atomic.load.inc.32.p0i32'
55
- fnty = ir.FunctionType(ir.IntType(32),
56
- (ir.PointerType(ir.IntType(32)), ir.IntType(32)))
62
+ fname = "llvm.nvvm.atomic.load.inc.32.p0i32"
63
+ fnty = ir.FunctionType(
64
+ ir.IntType(32), (ir.PointerType(ir.IntType(32)), ir.IntType(32))
65
+ )
57
66
  return cgutils.get_or_insert_function(lmod, fnty, fname)
58
67
 
59
68
 
60
69
  def declare_atomic_inc_int64(lmod):
61
- fname = '___numba_atomic_u64_inc'
62
- fnty = ir.FunctionType(ir.IntType(64),
63
- (ir.PointerType(ir.IntType(64)), ir.IntType(64)))
70
+ fname = "___numba_atomic_u64_inc"
71
+ fnty = ir.FunctionType(
72
+ ir.IntType(64), (ir.PointerType(ir.IntType(64)), ir.IntType(64))
73
+ )
64
74
  return cgutils.get_or_insert_function(lmod, fnty, fname)
65
75
 
66
76
 
67
77
  def declare_atomic_dec_int32(lmod):
68
- fname = 'llvm.nvvm.atomic.load.dec.32.p0i32'
69
- fnty = ir.FunctionType(ir.IntType(32),
70
- (ir.PointerType(ir.IntType(32)), ir.IntType(32)))
78
+ fname = "llvm.nvvm.atomic.load.dec.32.p0i32"
79
+ fnty = ir.FunctionType(
80
+ ir.IntType(32), (ir.PointerType(ir.IntType(32)), ir.IntType(32))
81
+ )
71
82
  return cgutils.get_or_insert_function(lmod, fnty, fname)
72
83
 
73
84
 
74
85
  def declare_atomic_dec_int64(lmod):
75
- fname = '___numba_atomic_u64_dec'
76
- fnty = ir.FunctionType(ir.IntType(64),
77
- (ir.PointerType(ir.IntType(64)), ir.IntType(64)))
86
+ fname = "___numba_atomic_u64_dec"
87
+ fnty = ir.FunctionType(
88
+ ir.IntType(64), (ir.PointerType(ir.IntType(64)), ir.IntType(64))
89
+ )
78
90
  return cgutils.get_or_insert_function(lmod, fnty, fname)
79
91
 
80
92
 
81
93
  def declare_atomic_max_float32(lmod):
82
- fname = '___numba_atomic_float_max'
83
- fnty = ir.FunctionType(ir.FloatType(),
84
- (ir.PointerType(ir.FloatType()), ir.FloatType()))
94
+ fname = "___numba_atomic_float_max"
95
+ fnty = ir.FunctionType(
96
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
97
+ )
85
98
  return cgutils.get_or_insert_function(lmod, fnty, fname)
86
99
 
87
100
 
88
101
  def declare_atomic_max_float64(lmod):
89
- fname = '___numba_atomic_double_max'
90
- fnty = ir.FunctionType(ir.DoubleType(),
91
- (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
102
+ fname = "___numba_atomic_double_max"
103
+ fnty = ir.FunctionType(
104
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
105
+ )
92
106
  return cgutils.get_or_insert_function(lmod, fnty, fname)
93
107
 
94
108
 
95
109
  def declare_atomic_min_float32(lmod):
96
- fname = '___numba_atomic_float_min'
97
- fnty = ir.FunctionType(ir.FloatType(),
98
- (ir.PointerType(ir.FloatType()), ir.FloatType()))
110
+ fname = "___numba_atomic_float_min"
111
+ fnty = ir.FunctionType(
112
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
113
+ )
99
114
  return cgutils.get_or_insert_function(lmod, fnty, fname)
100
115
 
101
116
 
102
117
  def declare_atomic_min_float64(lmod):
103
- fname = '___numba_atomic_double_min'
104
- fnty = ir.FunctionType(ir.DoubleType(),
105
- (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
118
+ fname = "___numba_atomic_double_min"
119
+ fnty = ir.FunctionType(
120
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
121
+ )
106
122
  return cgutils.get_or_insert_function(lmod, fnty, fname)
107
123
 
108
124
 
109
125
  def declare_atomic_nanmax_float32(lmod):
110
- fname = '___numba_atomic_float_nanmax'
111
- fnty = ir.FunctionType(ir.FloatType(),
112
- (ir.PointerType(ir.FloatType()), ir.FloatType()))
126
+ fname = "___numba_atomic_float_nanmax"
127
+ fnty = ir.FunctionType(
128
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
129
+ )
113
130
  return cgutils.get_or_insert_function(lmod, fnty, fname)
114
131
 
115
132
 
116
133
  def declare_atomic_nanmax_float64(lmod):
117
- fname = '___numba_atomic_double_nanmax'
118
- fnty = ir.FunctionType(ir.DoubleType(),
119
- (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
134
+ fname = "___numba_atomic_double_nanmax"
135
+ fnty = ir.FunctionType(
136
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
137
+ )
120
138
  return cgutils.get_or_insert_function(lmod, fnty, fname)
121
139
 
122
140
 
123
141
  def declare_atomic_nanmin_float32(lmod):
124
- fname = '___numba_atomic_float_nanmin'
125
- fnty = ir.FunctionType(ir.FloatType(),
126
- (ir.PointerType(ir.FloatType()), ir.FloatType()))
142
+ fname = "___numba_atomic_float_nanmin"
143
+ fnty = ir.FunctionType(
144
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
145
+ )
127
146
  return cgutils.get_or_insert_function(lmod, fnty, fname)
128
147
 
129
148
 
130
149
  def declare_atomic_nanmin_float64(lmod):
131
- fname = '___numba_atomic_double_nanmin'
132
- fnty = ir.FunctionType(ir.DoubleType(),
133
- (ir.PointerType(ir.DoubleType()), ir.DoubleType()))
150
+ fname = "___numba_atomic_double_nanmin"
151
+ fnty = ir.FunctionType(
152
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
153
+ )
134
154
  return cgutils.get_or_insert_function(lmod, fnty, fname)
135
155
 
136
156
 
137
157
  def declare_cudaCGGetIntrinsicHandle(lmod):
138
- fname = 'cudaCGGetIntrinsicHandle'
139
- fnty = ir.FunctionType(ir.IntType(64),
140
- (ir.IntType(32),))
158
+ fname = "cudaCGGetIntrinsicHandle"
159
+ fnty = ir.FunctionType(ir.IntType(64), (ir.IntType(32),))
141
160
  return cgutils.get_or_insert_function(lmod, fnty, fname)
142
161
 
143
162
 
144
163
  def declare_cudaCGSynchronize(lmod):
145
- fname = 'cudaCGSynchronize'
146
- fnty = ir.FunctionType(ir.IntType(32),
147
- (ir.IntType(64), ir.IntType(32)))
164
+ fname = "cudaCGSynchronize"
165
+ fnty = ir.FunctionType(ir.IntType(32), (ir.IntType(64), ir.IntType(32)))
148
166
  return cgutils.get_or_insert_function(lmod, fnty, fname)
149
167
 
150
168
 
151
169
  def declare_string(builder, value):
152
170
  lmod = builder.basic_block.function.module
153
171
  cval = cgutils.make_bytearray(value.encode("utf-8") + b"\x00")
154
- gl = cgutils.add_global_variable(lmod, cval.type, name="_str",
155
- addrspace=nvvm.ADDRSPACE_CONSTANT)
156
- gl.linkage = 'internal'
172
+ gl = cgutils.add_global_variable(
173
+ lmod, cval.type, name="_str", addrspace=nvvm.ADDRSPACE_CONSTANT
174
+ )
175
+ gl.linkage = "internal"
157
176
  gl.global_constant = True
158
177
  gl.initializer = cval
159
178
 
160
- return builder.addrspacecast(gl, ir.PointerType(ir.IntType(8)), 'generic')
179
+ return builder.addrspacecast(gl, ir.PointerType(ir.IntType(8)), "generic")
161
180
 
162
181
 
163
182
  def declare_vprint(lmod):
@@ -172,24 +191,20 @@ def declare_vprint(lmod):
172
191
  # -----------------------------------------------------------------------------
173
192
 
174
193
  SREG_MAPPING = {
175
- 'tid.x': 'llvm.nvvm.read.ptx.sreg.tid.x',
176
- 'tid.y': 'llvm.nvvm.read.ptx.sreg.tid.y',
177
- 'tid.z': 'llvm.nvvm.read.ptx.sreg.tid.z',
178
-
179
- 'ntid.x': 'llvm.nvvm.read.ptx.sreg.ntid.x',
180
- 'ntid.y': 'llvm.nvvm.read.ptx.sreg.ntid.y',
181
- 'ntid.z': 'llvm.nvvm.read.ptx.sreg.ntid.z',
182
-
183
- 'ctaid.x': 'llvm.nvvm.read.ptx.sreg.ctaid.x',
184
- 'ctaid.y': 'llvm.nvvm.read.ptx.sreg.ctaid.y',
185
- 'ctaid.z': 'llvm.nvvm.read.ptx.sreg.ctaid.z',
186
-
187
- 'nctaid.x': 'llvm.nvvm.read.ptx.sreg.nctaid.x',
188
- 'nctaid.y': 'llvm.nvvm.read.ptx.sreg.nctaid.y',
189
- 'nctaid.z': 'llvm.nvvm.read.ptx.sreg.nctaid.z',
190
-
191
- 'warpsize': 'llvm.nvvm.read.ptx.sreg.warpsize',
192
- 'laneid': 'llvm.nvvm.read.ptx.sreg.laneid',
194
+ "tid.x": "llvm.nvvm.read.ptx.sreg.tid.x",
195
+ "tid.y": "llvm.nvvm.read.ptx.sreg.tid.y",
196
+ "tid.z": "llvm.nvvm.read.ptx.sreg.tid.z",
197
+ "ntid.x": "llvm.nvvm.read.ptx.sreg.ntid.x",
198
+ "ntid.y": "llvm.nvvm.read.ptx.sreg.ntid.y",
199
+ "ntid.z": "llvm.nvvm.read.ptx.sreg.ntid.z",
200
+ "ctaid.x": "llvm.nvvm.read.ptx.sreg.ctaid.x",
201
+ "ctaid.y": "llvm.nvvm.read.ptx.sreg.ctaid.y",
202
+ "ctaid.z": "llvm.nvvm.read.ptx.sreg.ctaid.z",
203
+ "nctaid.x": "llvm.nvvm.read.ptx.sreg.nctaid.x",
204
+ "nctaid.y": "llvm.nvvm.read.ptx.sreg.nctaid.y",
205
+ "nctaid.z": "llvm.nvvm.read.ptx.sreg.nctaid.z",
206
+ "warpsize": "llvm.nvvm.read.ptx.sreg.warpsize",
207
+ "laneid": "llvm.nvvm.read.ptx.sreg.laneid",
193
208
  }
194
209
 
195
210
 
@@ -205,16 +220,16 @@ class SRegBuilder(object):
205
220
  self.builder = builder
206
221
 
207
222
  def tid(self, xyz):
208
- return call_sreg(self.builder, 'tid.%s' % xyz)
223
+ return call_sreg(self.builder, "tid.%s" % xyz)
209
224
 
210
225
  def ctaid(self, xyz):
211
- return call_sreg(self.builder, 'ctaid.%s' % xyz)
226
+ return call_sreg(self.builder, "ctaid.%s" % xyz)
212
227
 
213
228
  def ntid(self, xyz):
214
- return call_sreg(self.builder, 'ntid.%s' % xyz)
229
+ return call_sreg(self.builder, "ntid.%s" % xyz)
215
230
 
216
231
  def nctaid(self, xyz):
217
- return call_sreg(self.builder, 'nctaid.%s' % xyz)
232
+ return call_sreg(self.builder, "nctaid.%s" % xyz)
218
233
 
219
234
  def getdim(self, xyz):
220
235
  i64 = ir.IntType(64)
@@ -227,7 +242,7 @@ class SRegBuilder(object):
227
242
 
228
243
  def get_global_id(builder, dim):
229
244
  sreg = SRegBuilder(builder)
230
- it = (sreg.getdim(xyz) for xyz in 'xyz')
245
+ it = (sreg.getdim(xyz) for xyz in "xyz")
231
246
  seq = list(itertools.islice(it, None, dim))
232
247
  if dim == 1:
233
248
  return seq[0]
@@ -15,6 +15,7 @@ voidptr = ir.PointerType(ir.IntType(8))
15
15
 
16
16
  # NOTE: we don't use @lower here since print_item() doesn't return a LLVM value
17
17
 
18
+
18
19
  @singledispatch
19
20
  def print_item(ty, context, builder, val):
20
21
  """
@@ -22,8 +23,9 @@ def print_item(ty, context, builder, val):
22
23
  A (format string, [list of arguments]) is returned that will allow
23
24
  forming the final printf()-like call.
24
25
  """
25
- raise NotImplementedError("printing unimplemented for values of type %s"
26
- % (ty,))
26
+ raise NotImplementedError(
27
+ "printing unimplemented for values of type %s" % (ty,)
28
+ )
27
29
 
28
30
 
29
31
  @print_item.register(types.Integer)
@@ -92,11 +94,13 @@ def print_varargs(context, builder, sig, args):
92
94
 
93
95
  rawfmt = " ".join(formats) + "\n"
94
96
  if len(args) > 32:
95
- msg = ('CUDA print() cannot print more than 32 items. '
96
- 'The raw format string will be emitted by the kernel instead.')
97
+ msg = (
98
+ "CUDA print() cannot print more than 32 items. "
99
+ "The raw format string will be emitted by the kernel instead."
100
+ )
97
101
  warn(msg, NumbaWarning)
98
102
 
99
- rawfmt = rawfmt.replace('%', '%%')
103
+ rawfmt = rawfmt.replace("%", "%%")
100
104
  fmt = context.insert_string_const_addrspace(builder, rawfmt)
101
105
  array = cgutils.make_anonymous_struct(builder, values)
102
106
  arrayptr = cgutils.alloca_once_value(builder, array)