numba-cuda 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.1.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.1.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@ regex_pattern = (
13
13
 
14
14
 
15
15
  class TestUserExc(CUDATestCase):
16
-
17
16
  def setUp(self):
18
17
  super().setUp()
19
18
  # LTO optimizes away the exception status due to an oversight
@@ -29,7 +28,7 @@ class TestUserExc(CUDATestCase):
29
28
  elif x == 2:
30
29
  raise MyError("foo")
31
30
 
32
- test_exc[1, 1](0) # no raise
31
+ test_exc[1, 1](0) # no raise
33
32
  with self.assertRaises(MyError) as cm:
34
33
  test_exc[1, 1](1)
35
34
  if not config.ENABLE_CUDASIM:
@@ -43,5 +42,5 @@ class TestUserExc(CUDATestCase):
43
42
  self.assertIn("tid=[0, 0, 0] ctaid=[0, 0, 0]: foo", str(cm.exception))
44
43
 
45
44
 
46
- if __name__ == '__main__':
45
+ if __name__ == "__main__":
47
46
  unittest.main()
@@ -44,12 +44,7 @@ def make_kernel(vtype):
44
44
  res[2] = v.z
45
45
 
46
46
  def kernel_4elem(res):
47
- v = vobj(
48
- base_type(0),
49
- base_type(1),
50
- base_type(2),
51
- base_type(3)
52
- )
47
+ v = vobj(base_type(0), base_type(1), base_type(2), base_type(3))
53
48
  res[0] = v.x
54
49
  res[1] = v.y
55
50
  res[2] = v.z
@@ -59,7 +54,7 @@ def make_kernel(vtype):
59
54
  1: kernel_1elem,
60
55
  2: kernel_2elem,
61
56
  3: kernel_3elem,
62
- 4: kernel_4elem
57
+ 4: kernel_4elem,
63
58
  }[vtype.num_elements]
64
59
  return cuda.jit(host_function)
65
60
 
@@ -83,13 +78,13 @@ def make_fancy_creation_kernel(vtype):
83
78
  three = base_type(3.0)
84
79
  four = base_type(4.0)
85
80
 
86
- j = 0 # index of the result array
81
+ j = 0 # index of the result array
87
82
 
88
83
  # Construct a 1-component vector type, possible combination includes:
89
84
  # 2C1 = 2 combinations.
90
85
 
91
86
  f1_1 = v1(one) # 1
92
- f1_2 = v1(f1_1) # 1
87
+ f1_2 = v1(f1_1) # 1
93
88
 
94
89
  res[0] = f1_1.x
95
90
  res[1] = f1_2.x
@@ -98,11 +93,11 @@ def make_fancy_creation_kernel(vtype):
98
93
  # Construct a 2-component vector type, possible combination includes:
99
94
  # 1 + 2C1 * 2 = 5 combinations
100
95
 
101
- f2_1 = v2(two, three) # 2 3
102
- f2_2 = v2(f1_1, three) # 1 3
103
- f2_3 = v2(two, f1_1) # 2 1
104
- f2_4 = v2(f1_1, f1_1) # 1 1
105
- f2_5 = v2(f2_1) # 2 3
96
+ f2_1 = v2(two, three) # 2 3
97
+ f2_2 = v2(f1_1, three) # 1 3
98
+ f2_3 = v2(two, f1_1) # 2 1
99
+ f2_4 = v2(f1_1, f1_1) # 1 1
100
+ f2_5 = v2(f2_1) # 2 3
106
101
 
107
102
  for v in (f2_1, f2_2, f2_3, f2_4, f2_5):
108
103
  res[j] = v.x
@@ -112,24 +107,37 @@ def make_fancy_creation_kernel(vtype):
112
107
  # Construct a 3-component vector type, possible combination includes:
113
108
  # 1 + 2C1 * 2 + 2^3 = 13 combinations
114
109
 
115
- f3_1 = v3(f2_1, one) # 2 3 1
116
- f3_2 = v3(f2_1, f1_1) # 2 3 1
117
- f3_3 = v3(one, f2_1) # 1 2 3
118
- f3_4 = v3(f1_1, f2_1) # 1 2 3
119
-
120
- f3_5 = v3(one, two, three) # 1 2 3
121
- f3_6 = v3(f1_1, two, three) # 1 2 3
122
- f3_7 = v3(one, f1_1, three) # 1 1 3
123
- f3_8 = v3(one, two, f1_1) # 1 2 1
124
- f3_9 = v3(f1_1, f1_1, three) # 1 1 3
125
- f3_10 = v3(one, f1_1, f1_1) # 1 1 1
126
- f3_11 = v3(f1_1, two, f1_1) # 1 2 1
127
- f3_12 = v3(f1_1, f1_1, f1_1) # 1 1 1
128
-
129
- f3_13 = v3(f3_1) # 2 3 1
130
-
131
- for v in (f3_1, f3_2, f3_3, f3_4, f3_5, f3_6, f3_7, f3_8, f3_9,
132
- f3_10, f3_11, f3_12, f3_13):
110
+ f3_1 = v3(f2_1, one) # 2 3 1
111
+ f3_2 = v3(f2_1, f1_1) # 2 3 1
112
+ f3_3 = v3(one, f2_1) # 1 2 3
113
+ f3_4 = v3(f1_1, f2_1) # 1 2 3
114
+
115
+ f3_5 = v3(one, two, three) # 1 2 3
116
+ f3_6 = v3(f1_1, two, three) # 1 2 3
117
+ f3_7 = v3(one, f1_1, three) # 1 1 3
118
+ f3_8 = v3(one, two, f1_1) # 1 2 1
119
+ f3_9 = v3(f1_1, f1_1, three) # 1 1 3
120
+ f3_10 = v3(one, f1_1, f1_1) # 1 1 1
121
+ f3_11 = v3(f1_1, two, f1_1) # 1 2 1
122
+ f3_12 = v3(f1_1, f1_1, f1_1) # 1 1 1
123
+
124
+ f3_13 = v3(f3_1) # 2 3 1
125
+
126
+ for v in (
127
+ f3_1,
128
+ f3_2,
129
+ f3_3,
130
+ f3_4,
131
+ f3_5,
132
+ f3_6,
133
+ f3_7,
134
+ f3_8,
135
+ f3_9,
136
+ f3_10,
137
+ f3_11,
138
+ f3_12,
139
+ f3_13,
140
+ ):
133
141
  res[j] = v.x
134
142
  res[j + 1] = v.y
135
143
  res[j + 2] = v.z
@@ -138,48 +146,80 @@ def make_fancy_creation_kernel(vtype):
138
146
  # Construct a 4-component vector type, possible combination includes:
139
147
  # 1 + (2C1 * 2 + 1) + 3C1 * 2^2 + 2^4 = 34 combinations
140
148
 
141
- f4_1 = v4(one, two, three, four) # 1 2 3 4
142
- f4_2 = v4(f1_1, two, three, four) # 1 2 3 4
143
- f4_3 = v4(one, f1_1, three, four) # 1 1 3 4
144
- f4_4 = v4(one, two, f1_1, four) # 1 2 1 4
145
- f4_5 = v4(one, two, three, f1_1) # 1 2 3 1
149
+ f4_1 = v4(one, two, three, four) # 1 2 3 4
150
+ f4_2 = v4(f1_1, two, three, four) # 1 2 3 4
151
+ f4_3 = v4(one, f1_1, three, four) # 1 1 3 4
152
+ f4_4 = v4(one, two, f1_1, four) # 1 2 1 4
153
+ f4_5 = v4(one, two, three, f1_1) # 1 2 3 1
146
154
  f4_6 = v4(f1_1, f1_1, three, four) # 1 1 3 4
147
- f4_7 = v4(f1_1, two, f1_1, four) # 1 2 1 4
148
- f4_8 = v4(f1_1, two, three, f1_1) # 1 2 3 1
149
- f4_9 = v4(one, f1_1, f1_1, four) # 1 1 1 4
155
+ f4_7 = v4(f1_1, two, f1_1, four) # 1 2 1 4
156
+ f4_8 = v4(f1_1, two, three, f1_1) # 1 2 3 1
157
+ f4_9 = v4(one, f1_1, f1_1, four) # 1 1 1 4
150
158
  f4_10 = v4(one, f1_1, three, f1_1) # 1 1 3 1
151
- f4_11 = v4(one, two, f1_1, f1_1) # 1 2 1 1
159
+ f4_11 = v4(one, two, f1_1, f1_1) # 1 2 1 1
152
160
  f4_12 = v4(f1_1, f1_1, f1_1, four) # 1 1 1 4
153
- f4_13 = v4(f1_1, f1_1, three, f1_1) # 1 1 3 1
154
- f4_14 = v4(f1_1, two, f1_1, f1_1) # 1 2 1 1
155
- f4_15 = v4(one, f1_1, f1_1, f1_1) # 1 1 1 1
161
+ f4_13 = v4(f1_1, f1_1, three, f1_1) # 1 1 3 1
162
+ f4_14 = v4(f1_1, two, f1_1, f1_1) # 1 2 1 1
163
+ f4_15 = v4(one, f1_1, f1_1, f1_1) # 1 1 1 1
156
164
  f4_16 = v4(f1_1, f1_1, f1_1, f1_1) # 1 1 1 1
157
165
 
158
- f4_17 = v4(f2_1, two, three) # 2 3 2 3
159
- f4_18 = v4(f2_1, f1_1, three) # 2 3 1 3
160
- f4_19 = v4(f2_1, two, f1_1) # 2 3 2 1
161
- f4_20 = v4(f2_1, f1_1, f1_1) # 2 3 1 1
162
- f4_21 = v4(one, f2_1, three) # 1 2 3 3
163
- f4_22 = v4(f1_1, f2_1, three) # 1 2 3 3
164
- f4_23 = v4(one, f2_1, f1_1) # 1 2 3 1
165
- f4_24 = v4(f1_1, f2_1, f1_1) # 1 2 3 1
166
- f4_25 = v4(one, four, f2_1) # 1 4 2 3
167
- f4_26 = v4(f1_1, four, f2_1) # 1 4 2 3
168
- f4_27 = v4(one, f1_1, f2_1) # 1 1 2 3
169
- f4_28 = v4(f1_1, f1_1, f2_1) # 1 1 2 3
170
-
171
- f4_29 = v4(f2_1, f2_1) # 2 3 2 3
172
- f4_30 = v4(f3_1, four) # 2 3 1 4
173
- f4_31 = v4(f3_1, f1_1) # 2 3 1 1
174
- f4_32 = v4(four, f3_1) # 4 2 3 1
175
- f4_33 = v4(f1_1, f3_1) # 1 2 3 1
176
-
177
- f4_34 = v4(f4_1) # 1 2 3 4
178
-
179
- for v in (f4_1, f4_2, f4_3, f4_4, f4_5, f4_6, f4_7, f4_8, f4_9, f4_10,
180
- f4_11, f4_12, f4_13, f4_14, f4_15, f4_16, f4_17, f4_18, f4_19,
181
- f4_20, f4_21, f4_22, f4_23, f4_24, f4_25, f4_26, f4_27, f4_28,
182
- f4_29, f4_30, f4_31, f4_32, f4_33, f4_34):
166
+ f4_17 = v4(f2_1, two, three) # 2 3 2 3
167
+ f4_18 = v4(f2_1, f1_1, three) # 2 3 1 3
168
+ f4_19 = v4(f2_1, two, f1_1) # 2 3 2 1
169
+ f4_20 = v4(f2_1, f1_1, f1_1) # 2 3 1 1
170
+ f4_21 = v4(one, f2_1, three) # 1 2 3 3
171
+ f4_22 = v4(f1_1, f2_1, three) # 1 2 3 3
172
+ f4_23 = v4(one, f2_1, f1_1) # 1 2 3 1
173
+ f4_24 = v4(f1_1, f2_1, f1_1) # 1 2 3 1
174
+ f4_25 = v4(one, four, f2_1) # 1 4 2 3
175
+ f4_26 = v4(f1_1, four, f2_1) # 1 4 2 3
176
+ f4_27 = v4(one, f1_1, f2_1) # 1 1 2 3
177
+ f4_28 = v4(f1_1, f1_1, f2_1) # 1 1 2 3
178
+
179
+ f4_29 = v4(f2_1, f2_1) # 2 3 2 3
180
+ f4_30 = v4(f3_1, four) # 2 3 1 4
181
+ f4_31 = v4(f3_1, f1_1) # 2 3 1 1
182
+ f4_32 = v4(four, f3_1) # 4 2 3 1
183
+ f4_33 = v4(f1_1, f3_1) # 1 2 3 1
184
+
185
+ f4_34 = v4(f4_1) # 1 2 3 4
186
+
187
+ for v in (
188
+ f4_1,
189
+ f4_2,
190
+ f4_3,
191
+ f4_4,
192
+ f4_5,
193
+ f4_6,
194
+ f4_7,
195
+ f4_8,
196
+ f4_9,
197
+ f4_10,
198
+ f4_11,
199
+ f4_12,
200
+ f4_13,
201
+ f4_14,
202
+ f4_15,
203
+ f4_16,
204
+ f4_17,
205
+ f4_18,
206
+ f4_19,
207
+ f4_20,
208
+ f4_21,
209
+ f4_22,
210
+ f4_23,
211
+ f4_24,
212
+ f4_25,
213
+ f4_26,
214
+ f4_27,
215
+ f4_28,
216
+ f4_29,
217
+ f4_30,
218
+ f4_31,
219
+ f4_32,
220
+ f4_33,
221
+ f4_34,
222
+ ):
183
223
  res[j] = v.x
184
224
  res[j + 1] = v.y
185
225
  res[j + 2] = v.z
@@ -190,13 +230,13 @@ def make_fancy_creation_kernel(vtype):
190
230
 
191
231
 
192
232
  class TestCudaVectorType(CUDATestCase):
193
-
194
233
  def test_basic(self):
195
234
  """Basic test that makes sure that vector type and aliases
196
235
  are available within the cuda module from both device and
197
236
  simulator mode. This is an important sanity check, since other
198
237
  tests below tests the vector type objects programmatically.
199
238
  """
239
+
200
240
  @cuda.jit("void(float64[:])")
201
241
  def kernel(arr):
202
242
  v1 = cuda.float64x4(1.0, 3.0, 5.0, 7.0)
@@ -227,66 +267,201 @@ class TestCudaVectorType(CUDATestCase):
227
267
  with self.subTest(vty=vty):
228
268
  kernel = make_fancy_creation_kernel(vty)
229
269
 
230
- expected = np.array([
231
- # 1-component vectors
232
- 1,
233
- 1,
234
- # 2-component vectors
235
- 2, 3,
236
- 1, 3,
237
- 2, 1,
238
- 1, 1,
239
- 2, 3,
240
- # 3-component vectors
241
- 2, 3, 1,
242
- 2, 3, 1,
243
- 1, 2, 3,
244
- 1, 2, 3,
245
- 1, 2, 3,
246
- 1, 2, 3,
247
- 1, 1, 3,
248
- 1, 2, 1,
249
- 1, 1, 3,
250
- 1, 1, 1,
251
- 1, 2, 1,
252
- 1, 1, 1,
253
- 2, 3, 1,
254
- # 4-component vectors
255
- 1, 2, 3, 4,
256
- 1, 2, 3, 4,
257
- 1, 1, 3, 4,
258
- 1, 2, 1, 4,
259
- 1, 2, 3, 1,
260
- 1, 1, 3, 4,
261
- 1, 2, 1, 4,
262
- 1, 2, 3, 1,
263
- 1, 1, 1, 4,
264
- 1, 1, 3, 1,
265
- 1, 2, 1, 1,
266
- 1, 1, 1, 4,
267
- 1, 1, 3, 1,
268
- 1, 2, 1, 1,
269
- 1, 1, 1, 1,
270
- 1, 1, 1, 1,
271
- 2, 3, 2, 3,
272
- 2, 3, 1, 3,
273
- 2, 3, 2, 1,
274
- 2, 3, 1, 1,
275
- 1, 2, 3, 3,
276
- 1, 2, 3, 3,
277
- 1, 2, 3, 1,
278
- 1, 2, 3, 1,
279
- 1, 4, 2, 3,
280
- 1, 4, 2, 3,
281
- 1, 1, 2, 3,
282
- 1, 1, 2, 3,
283
- 2, 3, 2, 3,
284
- 2, 3, 1, 4,
285
- 2, 3, 1, 1,
286
- 4, 2, 3, 1,
287
- 1, 2, 3, 1,
288
- 1, 2, 3, 4
289
- ])
270
+ expected = np.array(
271
+ [
272
+ # 1-component vectors
273
+ 1,
274
+ 1,
275
+ # 2-component vectors
276
+ 2,
277
+ 3,
278
+ 1,
279
+ 3,
280
+ 2,
281
+ 1,
282
+ 1,
283
+ 1,
284
+ 2,
285
+ 3,
286
+ # 3-component vectors
287
+ 2,
288
+ 3,
289
+ 1,
290
+ 2,
291
+ 3,
292
+ 1,
293
+ 1,
294
+ 2,
295
+ 3,
296
+ 1,
297
+ 2,
298
+ 3,
299
+ 1,
300
+ 2,
301
+ 3,
302
+ 1,
303
+ 2,
304
+ 3,
305
+ 1,
306
+ 1,
307
+ 3,
308
+ 1,
309
+ 2,
310
+ 1,
311
+ 1,
312
+ 1,
313
+ 3,
314
+ 1,
315
+ 1,
316
+ 1,
317
+ 1,
318
+ 2,
319
+ 1,
320
+ 1,
321
+ 1,
322
+ 1,
323
+ 2,
324
+ 3,
325
+ 1,
326
+ # 4-component vectors
327
+ 1,
328
+ 2,
329
+ 3,
330
+ 4,
331
+ 1,
332
+ 2,
333
+ 3,
334
+ 4,
335
+ 1,
336
+ 1,
337
+ 3,
338
+ 4,
339
+ 1,
340
+ 2,
341
+ 1,
342
+ 4,
343
+ 1,
344
+ 2,
345
+ 3,
346
+ 1,
347
+ 1,
348
+ 1,
349
+ 3,
350
+ 4,
351
+ 1,
352
+ 2,
353
+ 1,
354
+ 4,
355
+ 1,
356
+ 2,
357
+ 3,
358
+ 1,
359
+ 1,
360
+ 1,
361
+ 1,
362
+ 4,
363
+ 1,
364
+ 1,
365
+ 3,
366
+ 1,
367
+ 1,
368
+ 2,
369
+ 1,
370
+ 1,
371
+ 1,
372
+ 1,
373
+ 1,
374
+ 4,
375
+ 1,
376
+ 1,
377
+ 3,
378
+ 1,
379
+ 1,
380
+ 2,
381
+ 1,
382
+ 1,
383
+ 1,
384
+ 1,
385
+ 1,
386
+ 1,
387
+ 1,
388
+ 1,
389
+ 1,
390
+ 1,
391
+ 2,
392
+ 3,
393
+ 2,
394
+ 3,
395
+ 2,
396
+ 3,
397
+ 1,
398
+ 3,
399
+ 2,
400
+ 3,
401
+ 2,
402
+ 1,
403
+ 2,
404
+ 3,
405
+ 1,
406
+ 1,
407
+ 1,
408
+ 2,
409
+ 3,
410
+ 3,
411
+ 1,
412
+ 2,
413
+ 3,
414
+ 3,
415
+ 1,
416
+ 2,
417
+ 3,
418
+ 1,
419
+ 1,
420
+ 2,
421
+ 3,
422
+ 1,
423
+ 1,
424
+ 4,
425
+ 2,
426
+ 3,
427
+ 1,
428
+ 4,
429
+ 2,
430
+ 3,
431
+ 1,
432
+ 1,
433
+ 2,
434
+ 3,
435
+ 1,
436
+ 1,
437
+ 2,
438
+ 3,
439
+ 2,
440
+ 3,
441
+ 2,
442
+ 3,
443
+ 2,
444
+ 3,
445
+ 1,
446
+ 4,
447
+ 2,
448
+ 3,
449
+ 1,
450
+ 1,
451
+ 4,
452
+ 2,
453
+ 3,
454
+ 1,
455
+ 1,
456
+ 2,
457
+ 3,
458
+ 1,
459
+ 1,
460
+ 2,
461
+ 3,
462
+ 4,
463
+ ]
464
+ )
290
465
  arr = np.zeros(expected.shape)
291
466
  kernel[1, 1](arr)
292
467
  np.testing.assert_almost_equal(arr, expected)