numba-cuda 0.10.0__tar.gz → 0.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/PKG-INFO +1 -1
  2. numba_cuda-0.10.1/numba_cuda/VERSION +1 -0
  3. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/compiler.py +21 -2
  4. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/decorators.py +15 -1
  5. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/dispatcher.py +4 -3
  6. numba_cuda-0.10.1/numba_cuda/numba/cuda/tests/cudapy/test_inline.py +156 -0
  7. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/PKG-INFO +1 -1
  8. numba_cuda-0.10.0/numba_cuda/VERSION +0 -1
  9. numba_cuda-0.10.0/numba_cuda/numba/cuda/tests/cudapy/test_inline.py +0 -59
  10. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/LICENSE +0 -0
  11. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/README.md +0 -0
  12. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/__init__.py +0 -0
  13. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/_version.py +0 -0
  14. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/__init__.py +0 -0
  15. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/api.py +0 -0
  16. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/api_util.py +0 -0
  17. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/args.py +0 -0
  18. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cg.py +0 -0
  19. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/codegen.py +0 -0
  20. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
  21. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cuda_bf16.py +0 -0
  22. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
  23. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadecl.py +0 -0
  24. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
  25. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
  26. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
  27. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
  28. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
  29. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
  30. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
  31. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
  32. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
  33. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +0 -0
  34. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
  35. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
  36. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
  37. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
  38. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
  39. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
  40. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
  41. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/cudamath.py +0 -0
  42. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/debuginfo.py +0 -0
  43. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/descriptor.py +0 -0
  44. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/device_init.py +0 -0
  45. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
  46. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/errors.py +0 -0
  47. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/extending.py +0 -0
  48. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -0
  49. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -0
  50. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -0
  51. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -0
  52. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/include/12/cuda_bf16.h +0 -0
  53. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +0 -0
  54. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/include/12/cuda_fp16.h +0 -0
  55. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +0 -0
  56. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/initialize.py +0 -0
  57. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
  58. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/intrinsics.py +0 -0
  59. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  60. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
  61. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
  62. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/libdevice.py +0 -0
  63. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
  64. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
  65. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
  66. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/locks.py +0 -0
  67. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/lowering.py +0 -0
  68. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/mathimpl.py +0 -0
  69. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/models.py +0 -0
  70. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
  71. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/printimpl.py +0 -0
  72. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/random.py +0 -0
  73. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
  74. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
  75. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
  76. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
  77. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
  78. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/runtime/nrt.py +0 -0
  79. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
  80. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/api.py +0 -0
  81. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
  82. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
  83. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
  84. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
  85. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
  86. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
  87. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
  88. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
  89. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
  90. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
  91. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
  92. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
  93. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
  94. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
  95. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
  96. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/simulator_init.py +0 -0
  97. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/stubs.py +0 -0
  98. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/target.py +0 -0
  99. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/testing.py +0 -0
  100. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
  101. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
  102. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
  103. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
  104. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
  105. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
  106. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
  107. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
  108. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
  109. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
  110. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
  111. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
  112. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
  113. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
  114. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
  115. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
  116. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
  117. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
  118. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
  119. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
  120. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
  121. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +0 -0
  122. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
  123. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
  124. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
  125. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
  126. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
  127. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
  128. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
  129. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
  130. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
  131. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
  132. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
  133. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
  134. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
  135. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
  136. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
  137. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
  138. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
  139. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
  140. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
  141. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
  142. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
  143. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +0 -0
  144. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
  145. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
  146. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
  147. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
  148. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
  149. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
  150. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
  151. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
  152. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
  153. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
  154. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
  155. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
  156. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
  157. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
  158. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
  159. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -0
  160. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
  161. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
  162. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
  163. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
  164. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
  165. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
  166. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
  167. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
  168. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
  169. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
  170. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
  171. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
  172. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
  173. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
  174. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
  175. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
  176. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
  177. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
  178. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
  179. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
  180. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
  181. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
  182. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
  183. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
  184. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
  185. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
  186. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
  187. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
  188. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
  189. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
  190. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
  191. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
  192. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
  193. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
  194. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
  195. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
  196. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
  197. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
  198. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
  199. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
  200. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
  201. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
  202. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
  203. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
  204. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
  205. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
  206. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
  207. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
  208. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
  209. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
  210. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
  211. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
  212. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
  213. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
  214. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
  215. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
  216. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
  217. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
  218. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
  219. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
  220. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
  221. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
  222. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
  223. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
  224. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  225. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
  226. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
  227. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
  228. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
  229. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
  230. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
  231. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  232. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
  233. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
  234. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
  235. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
  236. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
  237. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
  238. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
  239. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
  240. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
  241. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
  242. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
  243. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
  244. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
  245. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
  246. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
  247. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
  248. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
  249. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
  250. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
  251. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +0 -0
  252. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +0 -0
  253. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/support.py +0 -0
  254. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
  255. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
  256. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
  257. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
  258. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/types.py +0 -0
  259. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/ufuncs.py +0 -0
  260. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/utils.py +0 -0
  261. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/vector_types.py +0 -0
  262. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda/numba/cuda/vectorizers.py +0 -0
  263. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/SOURCES.txt +0 -0
  264. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/dependency_links.txt +0 -0
  265. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/requires.txt +0 -0
  266. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/numba_cuda.egg-info/top_level.txt +0 -0
  267. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/pyproject.toml +0 -0
  268. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/setup.cfg +0 -0
  269. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/setup.py +0 -0
  270. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/site-packages/_numba_cuda_redirector.pth +0 -0
  271. {numba_cuda-0.10.0 → numba_cuda-0.10.1}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numba-cuda
3
- Version: 0.10.0
3
+ Version: 0.10.1
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -0,0 +1 @@
1
+ 0.10.1
@@ -278,7 +278,7 @@ def compile_cuda(
278
278
  args,
279
279
  debug=False,
280
280
  lineinfo=False,
281
- inline=False,
281
+ forceinline=False,
282
282
  fastmath=False,
283
283
  nvvm_options=None,
284
284
  cc=None,
@@ -316,7 +316,7 @@ def compile_cuda(
316
316
  else:
317
317
  flags.error_model = "numpy"
318
318
 
319
- if inline:
319
+ if forceinline:
320
320
  flags.forceinline = True
321
321
  if fastmath:
322
322
  flags.fastmath = True
@@ -574,6 +574,7 @@ def compile(
574
574
  abi="c",
575
575
  abi_info=None,
576
576
  output="ptx",
577
+ forceinline=False,
577
578
  ):
578
579
  """Compile a Python function to PTX or LTO-IR for a given set of argument
579
580
  types.
@@ -614,6 +615,11 @@ def compile(
614
615
  :type abi_info: dict
615
616
  :param output: Type of output to generate, either ``"ptx"`` or ``"ltoir"``.
616
617
  :type output: str
618
+ :param forceinline: Enables inlining at the NVVM IR level when set to
619
+ ``True``. This is accomplished by adding the
620
+ ``alwaysinline`` function attribute to the function
621
+ definition. This is only valid when the output is
622
+ ``"ltoir"``.
617
623
  :return: (code, resty): The compiled code and inferred return type
618
624
  :rtype: tuple
619
625
  """
@@ -626,6 +632,12 @@ def compile(
626
632
  if output not in ("ptx", "ltoir"):
627
633
  raise NotImplementedError(f"Unsupported output type: {output}")
628
634
 
635
+ if forceinline and not device:
636
+ raise ValueError("Cannot force-inline kernels")
637
+
638
+ if forceinline and output != "ltoir":
639
+ raise ValueError("Can only designate forced inlining in LTO-IR")
640
+
629
641
  debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
630
642
  opt = (config.OPT != 0) if opt is None else opt
631
643
 
@@ -660,6 +672,7 @@ def compile(
660
672
  fastmath=fastmath,
661
673
  nvvm_options=nvvm_options,
662
674
  cc=cc,
675
+ forceinline=forceinline,
663
676
  )
664
677
  resty = cres.signature.return_type
665
678
 
@@ -699,6 +712,7 @@ def compile_for_current_device(
699
712
  abi="c",
700
713
  abi_info=None,
701
714
  output="ptx",
715
+ forceinline=False,
702
716
  ):
703
717
  """Compile a Python function to PTX or LTO-IR for a given signature for the
704
718
  current device's compute capabilility. This calls :func:`compile` with an
@@ -716,6 +730,7 @@ def compile_for_current_device(
716
730
  abi=abi,
717
731
  abi_info=abi_info,
718
732
  output=output,
733
+ forceinline=forceinline,
719
734
  )
720
735
 
721
736
 
@@ -730,6 +745,7 @@ def compile_ptx(
730
745
  opt=None,
731
746
  abi="numba",
732
747
  abi_info=None,
748
+ forceinline=False,
733
749
  ):
734
750
  """Compile a Python function to PTX for a given signature. See
735
751
  :func:`compile`. The defaults for this function are to compile a kernel
@@ -747,6 +763,7 @@ def compile_ptx(
747
763
  abi=abi,
748
764
  abi_info=abi_info,
749
765
  output="ptx",
766
+ forceinline=forceinline,
750
767
  )
751
768
 
752
769
 
@@ -760,6 +777,7 @@ def compile_ptx_for_current_device(
760
777
  opt=None,
761
778
  abi="numba",
762
779
  abi_info=None,
780
+ forceinline=False,
763
781
  ):
764
782
  """Compile a Python function to PTX for a given signature for the current
765
783
  device's compute capabilility. See :func:`compile_ptx`."""
@@ -775,6 +793,7 @@ def compile_ptx_for_current_device(
775
793
  opt=opt,
776
794
  abi=abi,
777
795
  abi_info=abi_info,
796
+ forceinline=forceinline,
778
797
  )
779
798
 
780
799
 
@@ -17,6 +17,7 @@ def jit(
17
17
  func_or_sig=None,
18
18
  device=False,
19
19
  inline="never",
20
+ forceinline=False,
20
21
  link=[],
21
22
  debug=None,
22
23
  opt=None,
@@ -39,6 +40,14 @@ def jit(
39
40
  .. note:: A kernel cannot have any return value.
40
41
  :param device: Indicates whether this is a device function.
41
42
  :type device: bool
43
+ :param inline: Enables inlining at the Numba IR level when set to
44
+ ``"always"``. See `Notes on Inlining
45
+ <https://numba.readthedocs.io/en/stable/developer/inlining.html>`_.
46
+ :type inline: str
47
+ :param forceinline: Enables inlining at the NVVM IR level when set to
48
+ ``True``. This is accomplished by adding the ``alwaysinline`` function
49
+ attribute to the function definition.
50
+ :type forceinline: bool
42
51
  :param link: A list of files containing PTX or CUDA C/C++ source to link
43
52
  with the function
44
53
  :type link: list
@@ -85,7 +94,9 @@ def jit(
85
94
  DeprecationWarning(
86
95
  "Passing bool to inline argument is deprecated, please refer to "
87
96
  "Numba's documentation on inlining: "
88
- "https://numba.readthedocs.io/en/stable/developer/inlining.html"
97
+ "https://numba.readthedocs.io/en/stable/developer/inlining.html. "
98
+ "You may have wanted the forceinline argument instead, to force "
99
+ "inlining at the NVVM IR level."
89
100
  )
90
101
 
91
102
  inline = "always" if inline else "never"
@@ -140,6 +151,7 @@ def jit(
140
151
  targetoptions["fastmath"] = fastmath
141
152
  targetoptions["device"] = device
142
153
  targetoptions["inline"] = inline
154
+ targetoptions["forceinline"] = forceinline
143
155
  targetoptions["extensions"] = extensions
144
156
 
145
157
  disp = CUDADispatcher(func, targetoptions=targetoptions)
@@ -182,6 +194,7 @@ def jit(
182
194
  func,
183
195
  device=device,
184
196
  inline=inline,
197
+ forceinline=forceinline,
185
198
  debug=debug,
186
199
  opt=opt,
187
200
  lineinfo=lineinfo,
@@ -206,6 +219,7 @@ def jit(
206
219
  targetoptions["fastmath"] = fastmath
207
220
  targetoptions["device"] = device
208
221
  targetoptions["inline"] = inline
222
+ targetoptions["forceinline"] = forceinline
209
223
  targetoptions["extensions"] = extensions
210
224
  disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
211
225
 
@@ -137,6 +137,7 @@ class _Kernel(serialize.ReduceMixin):
137
137
  debug=False,
138
138
  lineinfo=False,
139
139
  inline=False,
140
+ forceinline=False,
140
141
  fastmath=False,
141
142
  extensions=None,
142
143
  max_registers=None,
@@ -182,7 +183,7 @@ class _Kernel(serialize.ReduceMixin):
182
183
  self.argtypes,
183
184
  debug=self.debug,
184
185
  lineinfo=lineinfo,
185
- inline=inline,
186
+ forceinline=forceinline,
186
187
  fastmath=fastmath,
187
188
  nvvm_options=nvvm_options,
188
189
  cc=cc,
@@ -1073,7 +1074,7 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
1073
1074
  with self._compiling_counter:
1074
1075
  debug = self.targetoptions.get("debug")
1075
1076
  lineinfo = self.targetoptions.get("lineinfo")
1076
- inline = self.targetoptions.get("inline")
1077
+ forceinline = self.targetoptions.get("forceinline")
1077
1078
  fastmath = self.targetoptions.get("fastmath")
1078
1079
 
1079
1080
  nvvm_options = {
@@ -1091,7 +1092,7 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
1091
1092
  args,
1092
1093
  debug=debug,
1093
1094
  lineinfo=lineinfo,
1094
- inline=inline,
1095
+ forceinline=forceinline,
1095
1096
  fastmath=fastmath,
1096
1097
  nvvm_options=nvvm_options,
1097
1098
  cc=cc,
@@ -0,0 +1,156 @@
1
+ import re
2
+ import numpy as np
3
+ from numba import cuda, types
4
+ from numba.cuda.testing import (
5
+ unittest,
6
+ CUDATestCase,
7
+ skip_on_cudasim,
8
+ )
9
+
10
+
11
+ @skip_on_cudasim("Cudasim does not support inline and forceinline")
12
+ class TestCudaInline(CUDATestCase):
13
+ def _test_call_inline(self, inline):
14
+ """Test @cuda.jit(inline=...)"""
15
+ a = np.ones(2, dtype=np.int32)
16
+
17
+ sig = (types.int32[::1],)
18
+
19
+ @cuda.jit(inline=inline)
20
+ def set_zero(a):
21
+ a[0] = 0
22
+
23
+ @cuda.jit(sig)
24
+ def call_set_zero(a):
25
+ set_zero(a)
26
+
27
+ call_set_zero[1, 2](a)
28
+
29
+ expected = np.arange(2, dtype=np.int32)
30
+ self.assertTrue(np.all(a == expected))
31
+
32
+ llvm_ir = call_set_zero.inspect_llvm(sig)
33
+ pat = r"call [a-zA-Z0-9]* @"
34
+ match = re.compile(pat).search(llvm_ir)
35
+
36
+ if inline == "always" or inline is True:
37
+ # check that call was inlined
38
+ self.assertIsNone(match, msg=llvm_ir)
39
+ else:
40
+ assert inline == "never" or inline is False
41
+
42
+ # check that call was not inlined
43
+ self.assertIsNotNone(match, msg=llvm_ir)
44
+
45
+ # alwaysinline should not be in the IR when the inline kwarg is used
46
+ self.assertNotIn("alwaysinline", llvm_ir)
47
+
48
+ def test_call_inline_always(self):
49
+ self._test_call_inline("always")
50
+
51
+ def test_call_inline_never(self):
52
+ self._test_call_inline("never")
53
+
54
+ def test_call_inline_true(self):
55
+ self._test_call_inline(True)
56
+
57
+ def test_call_inline_false(self):
58
+ self._test_call_inline(False)
59
+
60
+ def _test_call_forceinline(self, forceinline):
61
+ """Test @cuda.jit(forceinline=...)"""
62
+ a = np.ones(2, dtype=np.int32)
63
+
64
+ sig = (types.int32[::1],)
65
+
66
+ @cuda.jit(forceinline=forceinline)
67
+ def set_zero(a):
68
+ a[0] = 0
69
+
70
+ @cuda.jit(sig)
71
+ def call_set_zero(a):
72
+ set_zero(a)
73
+
74
+ call_set_zero[1, 2](a)
75
+
76
+ expected = np.arange(2, dtype=np.int32)
77
+ self.assertTrue(np.all(a == expected))
78
+
79
+ llvm_ir = call_set_zero.inspect_llvm(sig)
80
+ pat = r"call [a-zA-Z0-9]* @"
81
+ match = re.compile(pat).search(llvm_ir)
82
+
83
+ # Check that call was not inlined at the Numba IR level - the call
84
+ # should still be present in the IR
85
+ self.assertIsNotNone(match)
86
+
87
+ # Check the definition of set_zero - it is a definition where the
88
+ # name does not include an underscore just before "set_zero", because
89
+ # that would match the "call_set_zero" definition
90
+ pat = r"define.*[^_]set_zero.*"
91
+ match = re.compile(pat).search(llvm_ir)
92
+ self.assertIsNotNone(match)
93
+ if forceinline:
94
+ self.assertIn("alwaysinline", match.group())
95
+ else:
96
+ self.assertNotIn("alwaysinline", match.group())
97
+
98
+ # The kernel, "call_set_zero", should never have "alwaysinline" set
99
+ pat = r"define.*call_set_zero.*"
100
+ match = re.compile(pat).search(llvm_ir)
101
+ self.assertIsNotNone(match)
102
+ self.assertNotIn("alwaysinline", match.group())
103
+
104
+ def test_call_forceinline_true(self):
105
+ self._test_call_forceinline(True)
106
+
107
+ def test_call_forceinline_false(self):
108
+ self._test_call_forceinline(False)
109
+
110
+ def test_compile_forceinline_ltoir_only(self):
111
+ def set_zero(a):
112
+ a[0] = 0
113
+
114
+ args = (types.float32[::1],)
115
+ msg = r"Can only designate forced inlining in LTO-IR"
116
+ with self.assertRaisesRegex(ValueError, msg):
117
+ cuda.compile(
118
+ set_zero,
119
+ args,
120
+ device=True,
121
+ forceinline=True,
122
+ )
123
+
124
+ def _compile_set_zero(self, forceinline):
125
+ def set_zero(a):
126
+ a[0] = 0
127
+
128
+ args = (types.float32[::1],)
129
+ ltoir, resty = cuda.compile(
130
+ set_zero,
131
+ args,
132
+ device=True,
133
+ output="ltoir",
134
+ forceinline=forceinline,
135
+ )
136
+
137
+ # Sanity check
138
+ self.assertEqual(resty, types.none)
139
+
140
+ return ltoir
141
+
142
+ def test_compile_forceinline(self):
143
+ ltoir_noinline = self._compile_set_zero(False)
144
+ ltoir_forceinline = self._compile_set_zero(True)
145
+
146
+ # As LTO-IR is opaque, the best we can do is check that changing the
147
+ # flag resulted in a change in the generated LTO-IR in some way.
148
+ self.assertNotEqual(
149
+ ltoir_noinline,
150
+ ltoir_forceinline,
151
+ "forceinline flag appeared to have no effect on LTO-IR",
152
+ )
153
+
154
+
155
+ if __name__ == "__main__":
156
+ unittest.main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numba-cuda
3
- Version: 0.10.0
3
+ Version: 0.10.1
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -1 +0,0 @@
1
- 0.10.0
@@ -1,59 +0,0 @@
1
- import re
2
- import numpy as np
3
- from numba import cuda, types
4
- from numba.cuda.testing import (
5
- unittest,
6
- CUDATestCase,
7
- skip_on_cudasim,
8
- )
9
-
10
-
11
- class TestCudaInline(CUDATestCase):
12
- @skip_on_cudasim("Cudasim does not support inline")
13
- def _test_call_inline(self, inline):
14
- """Test @cuda.jit(inline=...)"""
15
- a = np.ones(2, dtype=np.int32)
16
-
17
- sig = (types.int32[::1],)
18
-
19
- @cuda.jit(inline=inline)
20
- def set_zero(a):
21
- a[0] = 0
22
-
23
- @cuda.jit(sig)
24
- def call_set_zero(a):
25
- set_zero(a)
26
-
27
- call_set_zero[1, 2](a)
28
-
29
- expected = np.arange(2, dtype=np.int32)
30
- self.assertTrue(np.all(a == expected))
31
-
32
- llvm_ir = call_set_zero.inspect_llvm(sig)
33
- pat = r"call [a-zA-Z0-9]* @"
34
- match = re.compile(pat).search(llvm_ir)
35
-
36
- if inline == "always" or inline is True:
37
- # check that call was inlined
38
- self.assertIsNone(match, msg=llvm_ir)
39
- else:
40
- assert inline == "never" or inline is False
41
-
42
- # check that call was not inlined
43
- self.assertIsNotNone(match, msg=llvm_ir)
44
-
45
- def test_call_inline_always(self):
46
- self._test_call_inline("always")
47
-
48
- def test_call_inline_never(self):
49
- self._test_call_inline("never")
50
-
51
- def test_call_inline_true(self):
52
- self._test_call_inline(True)
53
-
54
- def test_call_inline_false(self):
55
- self._test_call_inline(False)
56
-
57
-
58
- if __name__ == "__main__":
59
- unittest.main()
File without changes
File without changes