numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +246 -114
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +293 -99
  14. numba_cuda/numba/cuda/cudadecl.py +93 -79
  15. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  16. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  17. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  18. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  19. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  20. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  21. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  22. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  23. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  24. numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
  25. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  26. numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
  27. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  28. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  29. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  30. numba_cuda/numba/cuda/cudaimpl.py +296 -275
  31. numba_cuda/numba/cuda/cudamath.py +1 -1
  32. numba_cuda/numba/cuda/debuginfo.py +99 -7
  33. numba_cuda/numba/cuda/decorators.py +87 -45
  34. numba_cuda/numba/cuda/descriptor.py +1 -1
  35. numba_cuda/numba/cuda/device_init.py +68 -18
  36. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  37. numba_cuda/numba/cuda/dispatcher.py +300 -213
  38. numba_cuda/numba/cuda/errors.py +13 -10
  39. numba_cuda/numba/cuda/extending.py +55 -1
  40. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  41. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  42. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
  43. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
  44. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  45. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  46. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  47. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  48. numba_cuda/numba/cuda/initialize.py +5 -3
  49. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  50. numba_cuda/numba/cuda/intrinsics.py +203 -28
  51. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  52. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  53. numba_cuda/numba/cuda/libdevice.py +317 -317
  54. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  55. numba_cuda/numba/cuda/locks.py +16 -0
  56. numba_cuda/numba/cuda/lowering.py +43 -0
  57. numba_cuda/numba/cuda/mathimpl.py +62 -57
  58. numba_cuda/numba/cuda/models.py +1 -5
  59. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  60. numba_cuda/numba/cuda/printimpl.py +9 -5
  61. numba_cuda/numba/cuda/random.py +46 -36
  62. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  63. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  64. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  65. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  66. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  67. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  68. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  69. numba_cuda/numba/cuda/simulator/api.py +38 -22
  70. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  71. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  72. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  73. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  74. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  75. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  76. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  77. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  78. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  79. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  80. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  81. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  82. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  83. numba_cuda/numba/cuda/simulator_init.py +2 -4
  84. numba_cuda/numba/cuda/stubs.py +134 -108
  85. numba_cuda/numba/cuda/target.py +92 -47
  86. numba_cuda/numba/cuda/testing.py +24 -19
  87. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  88. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  89. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  90. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  91. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  92. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  93. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  94. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  95. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  96. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  97. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  98. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  99. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  100. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  102. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  103. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  104. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  105. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  107. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  108. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  109. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  110. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  111. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  112. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  113. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  114. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  115. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  117. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  118. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  119. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
  120. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  121. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  123. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  124. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  125. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  127. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
  129. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  130. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  131. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  132. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  133. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  134. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  135. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  136. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  137. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  138. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  139. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  140. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  141. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  142. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  143. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
  144. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  145. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  146. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
  147. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  148. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  149. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
  150. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  151. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  152. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  153. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  154. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  155. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  156. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  157. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  158. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  159. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  161. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  162. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  163. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  164. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  165. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
  166. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  167. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  168. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  169. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  170. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  171. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  172. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  173. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  174. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  175. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  176. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  178. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  179. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  180. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  181. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  182. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  183. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  184. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  185. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  186. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  187. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  188. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  189. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  190. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  191. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  192. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  193. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  194. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  195. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  196. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  197. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  198. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  199. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  200. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  201. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  202. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  203. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  204. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  205. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
  206. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  207. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  208. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  209. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  210. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  211. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  212. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  213. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  214. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  216. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  217. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  218. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  219. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  220. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  221. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  222. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  223. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  224. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  225. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  226. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  227. numba_cuda/numba/cuda/types.py +5 -2
  228. numba_cuda/numba/cuda/ufuncs.py +382 -362
  229. numba_cuda/numba/cuda/utils.py +2 -2
  230. numba_cuda/numba/cuda/vector_types.py +5 -3
  231. numba_cuda/numba/cuda/vectorizers.py +38 -33
  232. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  233. numba_cuda-0.10.0.dist-info/RECORD +263 -0
  234. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  235. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  236. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  237. {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,18 @@
1
1
  import unittest
2
2
 
3
- from numba.cuda.testing import (CUDATestCase, skip_if_cudadevrt_missing,
4
- skip_on_cudasim, skip_unless_cc_60,
5
- skip_if_mvc_enabled)
3
+ from numba.cuda.testing import (
4
+ CUDATestCase,
5
+ skip_if_cudadevrt_missing,
6
+ skip_on_cudasim,
7
+ skip_unless_cc_60,
8
+ skip_if_mvc_enabled,
9
+ )
6
10
  from numba.tests.support import captured_stdout
7
11
 
8
12
 
9
13
  @skip_if_cudadevrt_missing
10
14
  @skip_unless_cc_60
11
- @skip_if_mvc_enabled('CG not supported with MVC')
15
+ @skip_if_mvc_enabled("CG not supported with MVC")
12
16
  @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
13
17
  class TestSessionization(CUDATestCase):
14
18
  """
@@ -40,26 +44,71 @@ class TestSessionization(CUDATestCase):
40
44
  ids = cuda.to_device(
41
45
  np.array(
42
46
  [
43
- 1, 1, 1, 1, 1, 1,
44
- 2, 2, 2,
45
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
46
- 4, 4, 4, 4, 4, 4, 4, 4, 4,
47
+ 1,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 1,
52
+ 1,
53
+ 2,
54
+ 2,
55
+ 2,
56
+ 3,
57
+ 3,
58
+ 3,
59
+ 3,
60
+ 3,
61
+ 3,
62
+ 3,
63
+ 3,
64
+ 3,
65
+ 3,
66
+ 4,
67
+ 4,
68
+ 4,
69
+ 4,
70
+ 4,
71
+ 4,
72
+ 4,
73
+ 4,
74
+ 4,
47
75
  ]
48
76
  )
49
77
  )
50
78
  sec = cuda.to_device(
51
79
  np.array(
52
80
  [
53
- 1, 2, 3, 5000, 5001, 5002, 1,
54
- 2, 3, 1, 2, 5000, 5001, 10000,
55
- 10001, 10002, 10003, 15000, 150001,
56
- 1, 5000, 50001, 15000, 20000,
57
- 25000, 25001, 25002, 25003,
81
+ 1,
82
+ 2,
83
+ 3,
84
+ 5000,
85
+ 5001,
86
+ 5002,
87
+ 1,
88
+ 2,
89
+ 3,
90
+ 1,
91
+ 2,
92
+ 5000,
93
+ 5001,
94
+ 10000,
95
+ 10001,
96
+ 10002,
97
+ 10003,
98
+ 15000,
99
+ 150001,
100
+ 1,
101
+ 5000,
102
+ 50001,
103
+ 15000,
104
+ 20000,
105
+ 25000,
106
+ 25001,
107
+ 25002,
108
+ 25003,
58
109
  ],
59
110
  dtype="datetime64[ns]",
60
- ).astype(
61
- "int64"
62
- ) # Cast to int64 for compatibility
111
+ ).astype("int64") # Cast to int64 for compatibility
63
112
  )
64
113
  # Create a vector to hold the results
65
114
  results = cuda.to_device(np.zeros(len(ids)))
@@ -105,6 +154,7 @@ class TestSessionization(CUDATestCase):
105
154
  if gid + look_ahead == size - 1:
106
155
  results[gid + look_ahead] = gid
107
156
  break
157
+
108
158
  # ex_sessionize.kernel.end
109
159
 
110
160
  # ex_sessionize.launch.begin
@@ -119,9 +169,34 @@ class TestSessionization(CUDATestCase):
119
169
  # ex_sessionize.launch.end
120
170
 
121
171
  expect = [
122
- 0, 0, 0, 3, 3, 3, 6, 6, 6, 9, 9,
123
- 11, 11, 13, 13, 13, 13, 17, 18, 19, 20, 21,
124
- 21, 23, 24, 24, 24, 24
172
+ 0,
173
+ 0,
174
+ 0,
175
+ 3,
176
+ 3,
177
+ 3,
178
+ 6,
179
+ 6,
180
+ 6,
181
+ 9,
182
+ 9,
183
+ 11,
184
+ 11,
185
+ 13,
186
+ 13,
187
+ 13,
188
+ 13,
189
+ 17,
190
+ 18,
191
+ 19,
192
+ 20,
193
+ 21,
194
+ 21,
195
+ 23,
196
+ 24,
197
+ 24,
198
+ 24,
199
+ 24,
125
200
  ]
126
201
  np.testing.assert_equal(expect, results.copy_to_host())
127
202
 
@@ -37,6 +37,7 @@ class TestVecAdd(CUDATestCase):
37
37
 
38
38
  if tid < size:
39
39
  c[tid] = a[tid] + b[tid]
40
+
40
41
  # ex_vecadd.kernel.end
41
42
 
42
43
  # Seed RNG for test repeatability
@@ -64,8 +65,7 @@ class TestVecAdd(CUDATestCase):
64
65
  # ex_vecadd.launch.end
65
66
 
66
67
  np.testing.assert_equal(
67
- c.copy_to_host(),
68
- a.copy_to_host() + b.copy_to_host()
68
+ c.copy_to_host(), a.copy_to_host() + b.copy_to_host()
69
69
  )
70
70
 
71
71
 
@@ -7,9 +7,8 @@ from numba.cuda.testing import skip_on_cudasim
7
7
 
8
8
  @skip_on_cudasim("Tests internals of the CUDA driver device array")
9
9
  class TestSlicing(unittest.TestCase):
10
-
11
10
  def assertSameContig(self, arr, nparr):
12
- attrs = 'C_CONTIGUOUS', 'F_CONTIGUOUS'
11
+ attrs = "C_CONTIGUOUS", "F_CONTIGUOUS"
13
12
  for attr in attrs:
14
13
  if arr.flags[attr] != nparr.flags[attr]:
15
14
  if arr.size == 0 and nparr.size == 0:
@@ -17,15 +16,18 @@ class TestSlicing(unittest.TestCase):
17
16
  # some are not
18
17
  pass
19
18
  else:
20
- self.fail("contiguous flag mismatch:\ngot=%s\nexpect=%s" %
21
- (arr.flags, nparr.flags))
19
+ self.fail(
20
+ "contiguous flag mismatch:\ngot=%s\nexpect=%s"
21
+ % (arr.flags, nparr.flags)
22
+ )
22
23
 
23
24
  #### 1D
24
25
 
25
26
  def test_slice0_1d(self):
26
27
  nparr = np.empty(4)
27
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
28
- nparr.dtype.itemsize)
28
+ arr = Array.from_desc(
29
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
30
+ )
29
31
  self.assertSameContig(arr, nparr)
30
32
  xx = -2, -1, 0, 1, 2
31
33
  for x in xx:
@@ -37,8 +39,9 @@ class TestSlicing(unittest.TestCase):
37
39
 
38
40
  def test_slice1_1d(self):
39
41
  nparr = np.empty(4)
40
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
41
- nparr.dtype.itemsize)
42
+ arr = Array.from_desc(
43
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
44
+ )
42
45
  xx = -2, -1, 0, 1, 2
43
46
  for x in xx:
44
47
  expect = nparr[:x]
@@ -49,8 +52,9 @@ class TestSlicing(unittest.TestCase):
49
52
 
50
53
  def test_slice2_1d(self):
51
54
  nparr = np.empty(4)
52
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
53
- nparr.dtype.itemsize)
55
+ arr = Array.from_desc(
56
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
57
+ )
54
58
  xx = -2, -1, 0, 1, 2
55
59
  for x, y in itertools.product(xx, xx):
56
60
  expect = nparr[x:y]
@@ -63,8 +67,9 @@ class TestSlicing(unittest.TestCase):
63
67
 
64
68
  def test_slice0_2d(self):
65
69
  nparr = np.empty((4, 5))
66
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
67
- nparr.dtype.itemsize)
70
+ arr = Array.from_desc(
71
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
72
+ )
68
73
  xx = -2, 0, 1, 2
69
74
  for x in xx:
70
75
  expect = nparr[x:]
@@ -82,8 +87,9 @@ class TestSlicing(unittest.TestCase):
82
87
 
83
88
  def test_slice1_2d(self):
84
89
  nparr = np.empty((4, 5))
85
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
86
- nparr.dtype.itemsize)
90
+ arr = Array.from_desc(
91
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
92
+ )
87
93
  xx = -2, 0, 2
88
94
  for x in xx:
89
95
  expect = nparr[:x]
@@ -101,8 +107,9 @@ class TestSlicing(unittest.TestCase):
101
107
 
102
108
  def test_slice2_2d(self):
103
109
  nparr = np.empty((4, 5))
104
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
105
- nparr.dtype.itemsize)
110
+ arr = Array.from_desc(
111
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
112
+ )
106
113
  xx = -2, 0, 2
107
114
  for s, t, u, v in itertools.product(xx, xx, xx, xx):
108
115
  expect = nparr[s:t, u:v]
@@ -122,8 +129,9 @@ class TestSlicing(unittest.TestCase):
122
129
 
123
130
  def test_strided_1d(self):
124
131
  nparr = np.empty(4)
125
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
126
- nparr.dtype.itemsize)
132
+ arr = Array.from_desc(
133
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
134
+ )
127
135
  xx = -2, -1, 1, 2
128
136
  for x in xx:
129
137
  expect = nparr[::x]
@@ -134,8 +142,9 @@ class TestSlicing(unittest.TestCase):
134
142
 
135
143
  def test_strided_2d(self):
136
144
  nparr = np.empty((4, 5))
137
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
138
- nparr.dtype.itemsize)
145
+ arr = Array.from_desc(
146
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
147
+ )
139
148
  xx = -2, -1, 1, 2
140
149
  for a, b in itertools.product(xx, xx):
141
150
  expect = nparr[::a, ::b]
@@ -146,8 +155,9 @@ class TestSlicing(unittest.TestCase):
146
155
 
147
156
  def test_strided_3d(self):
148
157
  nparr = np.empty((4, 5, 6))
149
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
150
- nparr.dtype.itemsize)
158
+ arr = Array.from_desc(
159
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
160
+ )
151
161
  xx = -2, -1, 1, 2
152
162
  for a, b, c in itertools.product(xx, xx, xx):
153
163
  expect = nparr[::a, ::b, ::c]
@@ -160,16 +170,17 @@ class TestSlicing(unittest.TestCase):
160
170
  z = np.empty((1, 2, 3))
161
171
  z = np.transpose(z, axes=(2, 0, 1))
162
172
  arr = Array.from_desc(0, z.shape, z.strides, z.itemsize)
163
- self.assertEqual(z.flags['C_CONTIGUOUS'], arr.flags['C_CONTIGUOUS'])
164
- self.assertEqual(z.flags['F_CONTIGUOUS'], arr.flags['F_CONTIGUOUS'])
173
+ self.assertEqual(z.flags["C_CONTIGUOUS"], arr.flags["C_CONTIGUOUS"])
174
+ self.assertEqual(z.flags["F_CONTIGUOUS"], arr.flags["F_CONTIGUOUS"])
165
175
 
166
176
 
167
177
  @skip_on_cudasim("Tests internals of the CUDA driver device array")
168
178
  class TestReshape(unittest.TestCase):
169
179
  def test_reshape_2d2d(self):
170
180
  nparr = np.empty((4, 5))
171
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
172
- nparr.dtype.itemsize)
181
+ arr = Array.from_desc(
182
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
183
+ )
173
184
  expect = nparr.reshape(5, 4)
174
185
  got = arr.reshape(5, 4)[0]
175
186
  self.assertEqual(got.shape, expect.shape)
@@ -177,8 +188,9 @@ class TestReshape(unittest.TestCase):
177
188
 
178
189
  def test_reshape_2d1d(self):
179
190
  nparr = np.empty((4, 5))
180
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
181
- nparr.dtype.itemsize)
191
+ arr = Array.from_desc(
192
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
193
+ )
182
194
  expect = nparr.reshape(5 * 4)
183
195
  got = arr.reshape(5 * 4)[0]
184
196
  self.assertEqual(got.shape, expect.shape)
@@ -186,8 +198,9 @@ class TestReshape(unittest.TestCase):
186
198
 
187
199
  def test_reshape_3d3d(self):
188
200
  nparr = np.empty((3, 4, 5))
189
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
190
- nparr.dtype.itemsize)
201
+ arr = Array.from_desc(
202
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
203
+ )
191
204
  expect = nparr.reshape(5, 3, 4)
192
205
  got = arr.reshape(5, 3, 4)[0]
193
206
  self.assertEqual(got.shape, expect.shape)
@@ -195,8 +208,9 @@ class TestReshape(unittest.TestCase):
195
208
 
196
209
  def test_reshape_3d2d(self):
197
210
  nparr = np.empty((3, 4, 5))
198
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
199
- nparr.dtype.itemsize)
211
+ arr = Array.from_desc(
212
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
213
+ )
200
214
  expect = nparr.reshape(3 * 4, 5)
201
215
  got = arr.reshape(3 * 4, 5)[0]
202
216
  self.assertEqual(got.shape, expect.shape)
@@ -204,8 +218,9 @@ class TestReshape(unittest.TestCase):
204
218
 
205
219
  def test_reshape_3d1d(self):
206
220
  nparr = np.empty((3, 4, 5))
207
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
208
- nparr.dtype.itemsize)
221
+ arr = Array.from_desc(
222
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
223
+ )
209
224
  expect = nparr.reshape(3 * 4 * 5)
210
225
  got = arr.reshape(3 * 4 * 5)[0]
211
226
  self.assertEqual(got.shape, expect.shape)
@@ -213,8 +228,9 @@ class TestReshape(unittest.TestCase):
213
228
 
214
229
  def test_reshape_infer2d2d(self):
215
230
  nparr = np.empty((4, 5))
216
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
217
- nparr.dtype.itemsize)
231
+ arr = Array.from_desc(
232
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
233
+ )
218
234
  expect = nparr.reshape(-1, 4)
219
235
  got = arr.reshape(-1, 4)[0]
220
236
  self.assertEqual(got.shape, expect.shape)
@@ -222,8 +238,9 @@ class TestReshape(unittest.TestCase):
222
238
 
223
239
  def test_reshape_infer2d1d(self):
224
240
  nparr = np.empty((4, 5))
225
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
226
- nparr.dtype.itemsize)
241
+ arr = Array.from_desc(
242
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
243
+ )
227
244
  expect = nparr.reshape(-1)
228
245
  got = arr.reshape(-1)[0]
229
246
  self.assertEqual(got.shape, expect.shape)
@@ -231,8 +248,9 @@ class TestReshape(unittest.TestCase):
231
248
 
232
249
  def test_reshape_infer3d3d(self):
233
250
  nparr = np.empty((3, 4, 5))
234
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
235
- nparr.dtype.itemsize)
251
+ arr = Array.from_desc(
252
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
253
+ )
236
254
  expect = nparr.reshape(5, -1, 4)
237
255
  got = arr.reshape(5, -1, 4)[0]
238
256
  self.assertEqual(got.shape, expect.shape)
@@ -240,8 +258,9 @@ class TestReshape(unittest.TestCase):
240
258
 
241
259
  def test_reshape_infer3d2d(self):
242
260
  nparr = np.empty((3, 4, 5))
243
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
244
- nparr.dtype.itemsize)
261
+ arr = Array.from_desc(
262
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
263
+ )
245
264
  expect = nparr.reshape(3, -1)
246
265
  got = arr.reshape(3, -1)[0]
247
266
  self.assertEqual(got.shape, expect.shape)
@@ -249,8 +268,9 @@ class TestReshape(unittest.TestCase):
249
268
 
250
269
  def test_reshape_infer3d1d(self):
251
270
  nparr = np.empty((3, 4, 5))
252
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
253
- nparr.dtype.itemsize)
271
+ arr = Array.from_desc(
272
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
273
+ )
254
274
  expect = nparr.reshape(-1)
255
275
  got = arr.reshape(-1)[0]
256
276
  self.assertEqual(got.shape, expect.shape)
@@ -258,23 +278,26 @@ class TestReshape(unittest.TestCase):
258
278
 
259
279
  def test_reshape_infer_two_unknowns(self):
260
280
  nparr = np.empty((3, 4, 5))
261
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
262
- nparr.dtype.itemsize)
281
+ arr = Array.from_desc(
282
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
283
+ )
263
284
 
264
285
  with self.assertRaises(ValueError) as raises:
265
286
  arr.reshape(-1, -1, 3)
266
- self.assertIn('can only specify one unknown dimension',
267
- str(raises.exception))
287
+ self.assertIn(
288
+ "can only specify one unknown dimension", str(raises.exception)
289
+ )
268
290
 
269
291
  def test_reshape_infer_invalid_shape(self):
270
292
  nparr = np.empty((3, 4, 5))
271
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
272
- nparr.dtype.itemsize)
293
+ arr = Array.from_desc(
294
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
295
+ )
273
296
 
274
297
  with self.assertRaises(ValueError) as raises:
275
298
  arr.reshape(-1, 7)
276
299
 
277
- expected_message = 'cannot infer valid shape for unknown dimension'
300
+ expected_message = "cannot infer valid shape for unknown dimension"
278
301
  self.assertIn(expected_message, str(raises.exception))
279
302
 
280
303
 
@@ -289,6 +312,7 @@ class TestSqueeze(unittest.TestCase):
289
312
  def _assert_equal_shape_strides(arr1, arr2):
290
313
  self.assertEqual(arr1.shape, arr2.shape)
291
314
  self.assertEqual(arr1.strides, arr2.strides)
315
+
292
316
  _assert_equal_shape_strides(arr, nparr)
293
317
  _assert_equal_shape_strides(arr.squeeze()[0], nparr.squeeze())
294
318
  for axis in (0, 2, 4, (0, 2), (0, 4), (2, 4), (0, 2, 4)):
@@ -311,29 +335,33 @@ class TestSqueeze(unittest.TestCase):
311
335
  class TestExtent(unittest.TestCase):
312
336
  def test_extent_1d(self):
313
337
  nparr = np.empty(4)
314
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
315
- nparr.dtype.itemsize)
338
+ arr = Array.from_desc(
339
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
340
+ )
316
341
  s, e = arr.extent
317
342
  self.assertEqual(e - s, nparr.size * nparr.dtype.itemsize)
318
343
 
319
344
  def test_extent_2d(self):
320
345
  nparr = np.empty((4, 5))
321
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
322
- nparr.dtype.itemsize)
346
+ arr = Array.from_desc(
347
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
348
+ )
323
349
  s, e = arr.extent
324
350
  self.assertEqual(e - s, nparr.size * nparr.dtype.itemsize)
325
351
 
326
352
  def test_extent_iter_1d(self):
327
353
  nparr = np.empty(4)
328
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
329
- nparr.dtype.itemsize)
354
+ arr = Array.from_desc(
355
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
356
+ )
330
357
  [ext] = list(arr.iter_contiguous_extent())
331
358
  self.assertEqual(ext, arr.extent)
332
359
 
333
360
  def test_extent_iter_2d(self):
334
361
  nparr = np.empty((4, 5))
335
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
336
- nparr.dtype.itemsize)
362
+ arr = Array.from_desc(
363
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
364
+ )
337
365
  [ext] = list(arr.iter_contiguous_extent())
338
366
  self.assertEqual(ext, arr.extent)
339
367
 
@@ -346,8 +374,9 @@ class TestIterate(unittest.TestCase):
346
374
  # for #4201
347
375
  N = 5
348
376
  nparr = np.empty(N)
349
- arr = Array.from_desc(0, nparr.shape, nparr.strides,
350
- nparr.dtype.itemsize)
377
+ arr = Array.from_desc(
378
+ 0, nparr.shape, nparr.strides, nparr.dtype.itemsize
379
+ )
351
380
 
352
381
  x = 0 # just a placeholder
353
382
  # this loop should not raise AssertionError
@@ -355,5 +384,5 @@ class TestIterate(unittest.TestCase):
355
384
  x = val # noqa: F841
356
385
 
357
386
 
358
- if __name__ == '__main__':
387
+ if __name__ == "__main__":
359
388
  unittest.main()
@@ -9,19 +9,28 @@ from numba.cuda.cudadrv import nvvm
9
9
  class TestFunctionResolution(unittest.TestCase):
10
10
  def test_fp16_binary_operators(self):
11
11
  from numba.cuda.descriptor import cuda_target
12
- ops = (operator.add, operator.iadd, operator.sub, operator.isub,
13
- operator.mul, operator.imul)
12
+
13
+ ops = (
14
+ operator.add,
15
+ operator.iadd,
16
+ operator.sub,
17
+ operator.isub,
18
+ operator.mul,
19
+ operator.imul,
20
+ )
14
21
  for op in ops:
15
22
  fp16 = types.float16
16
23
  typingctx = cuda_target.typing_context
17
24
  typingctx.refresh()
18
25
  fnty = typingctx.resolve_value_type(op)
19
26
  out = typingctx.resolve_function_type(fnty, (fp16, fp16), {})
20
- self.assertEqual(out, typing.signature(fp16, fp16, fp16),
21
- msg=str(out))
27
+ self.assertEqual(
28
+ out, typing.signature(fp16, fp16, fp16), msg=str(out)
29
+ )
22
30
 
23
31
  def test_fp16_unary_operators(self):
24
32
  from numba.cuda.descriptor import cuda_target
33
+
25
34
  ops = (operator.neg, abs)
26
35
  for op in ops:
27
36
  fp16 = types.float16
@@ -32,5 +41,5 @@ class TestFunctionResolution(unittest.TestCase):
32
41
  self.assertEqual(out, typing.signature(fp16, fp16), msg=str(out))
33
42
 
34
43
 
35
- if __name__ == '__main__':
44
+ if __name__ == "__main__":
36
45
  unittest.main()
@@ -11,30 +11,30 @@ class TestImport(unittest.TestCase):
11
11
  """
12
12
 
13
13
  banlist = (
14
- 'numba.cpython.slicing',
15
- 'numba.cpython.tupleobj',
16
- 'numba.cpython.enumimpl',
17
- 'numba.cpython.hashing',
18
- 'numba.cpython.heapq',
19
- 'numba.cpython.iterators',
20
- 'numba.cpython.numbers',
21
- 'numba.cpython.rangeobj',
22
- 'numba.cpython.cmathimpl',
23
- 'numba.cpython.mathimpl',
24
- 'numba.cpython.printimpl',
25
- 'numba.cpython.randomimpl',
26
- 'numba.core.optional',
27
- 'numba.misc.gdb_hook',
28
- 'numba.misc.literal',
29
- 'numba.misc.cffiimpl',
30
- 'numba.np.linalg',
31
- 'numba.np.polynomial',
32
- 'numba.np.arraymath',
33
- 'numba.np.npdatetime',
34
- 'numba.np.npyimpl',
35
- 'numba.typed.typeddict',
36
- 'numba.typed.typedlist',
37
- 'numba.experimental.jitclass.base',
14
+ "numba.cpython.slicing",
15
+ "numba.cpython.tupleobj",
16
+ "numba.cpython.enumimpl",
17
+ "numba.cpython.hashing",
18
+ "numba.cpython.heapq",
19
+ "numba.cpython.iterators",
20
+ "numba.cpython.numbers",
21
+ "numba.cpython.rangeobj",
22
+ "numba.cpython.cmathimpl",
23
+ "numba.cpython.mathimpl",
24
+ "numba.cpython.printimpl",
25
+ "numba.cpython.randomimpl",
26
+ "numba.core.optional",
27
+ "numba.misc.gdb_hook",
28
+ "numba.misc.literal",
29
+ "numba.misc.cffiimpl",
30
+ "numba.np.linalg",
31
+ "numba.np.polynomial",
32
+ "numba.np.arraymath",
33
+ "numba.np.npdatetime",
34
+ "numba.np.npyimpl",
35
+ "numba.typed.typeddict",
36
+ "numba.typed.typedlist",
37
+ "numba.experimental.jitclass.base",
38
38
  )
39
39
 
40
40
  code = "import sys; from numba import cuda; print(list(sys.modules))"
@@ -45,5 +45,5 @@ class TestImport(unittest.TestCase):
45
45
  self.assertFalse(unexpected, "some modules unexpectedly imported")
46
46
 
47
47
 
48
- if __name__ == '__main__':
48
+ if __name__ == "__main__":
49
49
  unittest.main()