numba-cuda 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/PKG-INFO +1 -1
  2. numba_cuda-0.6.0/numba_cuda/VERSION +1 -0
  3. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/compiler.py +78 -2
  4. numba_cuda-0.6.0/numba_cuda/numba/cuda/debuginfo.py +44 -0
  5. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/dispatcher.py +58 -11
  6. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/target.py +4 -134
  7. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +81 -0
  8. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +125 -0
  9. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +4 -1
  10. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
  11. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/PKG-INFO +1 -1
  12. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/SOURCES.txt +1 -0
  13. numba_cuda-0.5.0/numba_cuda/VERSION +0 -1
  14. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/LICENSE +0 -0
  15. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/README.md +0 -0
  16. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/__init__.py +0 -0
  17. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/_version.py +0 -0
  18. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/__init__.py +0 -0
  19. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/api.py +0 -0
  20. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/api_util.py +0 -0
  21. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/args.py +0 -0
  22. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cg.py +0 -0
  23. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/codegen.py +0 -0
  24. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
  25. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
  26. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
  27. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
  28. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadecl.py +0 -0
  29. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
  30. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
  31. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
  32. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
  33. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
  34. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
  35. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
  36. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
  37. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
  38. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +0 -0
  39. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
  40. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
  41. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
  42. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
  43. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
  44. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
  45. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
  46. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudamath.py +0 -0
  47. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/decorators.py +0 -0
  48. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/descriptor.py +0 -0
  49. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/device_init.py +0 -0
  50. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
  51. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/errors.py +0 -0
  52. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/extending.py +0 -0
  53. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/initialize.py +0 -0
  54. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
  55. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/intrinsics.py +0 -0
  56. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  57. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
  58. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
  59. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevice.py +0 -0
  60. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
  61. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
  62. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
  63. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/mathimpl.py +0 -0
  64. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/models.py +0 -0
  65. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
  66. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/printimpl.py +0 -0
  67. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/random.py +0 -0
  68. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
  69. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
  70. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
  71. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
  72. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
  73. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/nrt.py +0 -0
  74. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
  75. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/api.py +0 -0
  76. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
  77. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
  78. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
  79. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
  80. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
  81. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
  82. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
  83. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
  84. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
  85. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
  86. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
  87. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
  88. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
  89. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
  90. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
  91. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator_init.py +0 -0
  92. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/stubs.py +0 -0
  93. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/testing.py +0 -0
  94. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
  95. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
  96. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
  97. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
  98. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
  99. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
  100. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
  101. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
  102. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
  103. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
  104. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
  105. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
  106. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
  107. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
  108. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
  109. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
  110. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
  111. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
  112. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
  113. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
  114. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
  115. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
  116. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
  117. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
  118. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
  119. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
  120. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
  121. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
  122. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
  123. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
  124. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
  125. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
  126. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
  127. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
  128. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
  129. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
  130. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
  131. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
  132. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
  133. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
  134. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
  135. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
  136. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
  137. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
  138. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
  139. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
  140. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
  141. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
  142. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
  143. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
  144. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
  145. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
  146. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
  147. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
  148. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
  149. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
  150. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
  151. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
  152. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
  153. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
  154. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
  155. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
  156. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
  157. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
  158. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
  159. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
  160. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
  161. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
  162. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
  163. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
  164. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
  165. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
  166. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
  167. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
  168. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
  169. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
  170. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
  171. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
  172. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
  173. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
  174. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
  175. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
  176. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
  177. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
  178. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
  179. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
  180. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
  181. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
  182. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
  183. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
  184. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
  185. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
  186. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
  187. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
  188. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
  189. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
  190. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
  191. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
  192. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
  193. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
  194. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
  195. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
  196. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
  197. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
  198. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
  199. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
  200. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
  201. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
  202. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
  203. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
  204. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
  205. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
  206. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
  207. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
  208. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
  209. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
  210. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
  211. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
  212. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
  213. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
  214. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  215. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
  216. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
  217. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
  218. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
  219. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
  220. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
  221. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  222. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
  223. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
  224. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
  225. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
  226. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
  227. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
  228. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
  229. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
  230. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
  231. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
  232. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
  233. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
  234. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
  235. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
  236. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
  237. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
  238. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
  239. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
  240. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
  241. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +0 -0
  242. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/support.py +0 -0
  243. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
  244. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
  245. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
  246. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
  247. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/types.py +0 -0
  248. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/ufuncs.py +0 -0
  249. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/utils.py +0 -0
  250. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/vector_types.py +0 -0
  251. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/vectorizers.py +0 -0
  252. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/dependency_links.txt +0 -0
  253. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/requires.txt +0 -0
  254. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/top_level.txt +0 -0
  255. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/pyproject.toml +0 -0
  256. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/setup.cfg +0 -0
  257. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/setup.py +0 -0
  258. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/site-packages/_numba_cuda_redirector.pth +0 -0
  259. {numba_cuda-0.5.0 → numba_cuda-0.6.0}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: numba-cuda
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -0,0 +1 @@
1
+ 0.6.0
@@ -1,14 +1,17 @@
1
1
  from llvmlite import ir
2
2
  from numba.core.typing.templates import ConcreteTemplate
3
+ from numba.core import ir as numba_ir
3
4
  from numba.core import (cgutils, types, typing, funcdesc, config, compiler,
4
5
  sigutils, utils)
5
6
  from numba.core.compiler import (sanitize_compile_result_entries, CompilerBase,
6
7
  DefaultPassBuilder, Flags, Option,
7
8
  CompileResult)
8
9
  from numba.core.compiler_lock import global_compiler_lock
9
- from numba.core.compiler_machinery import (LoweringPass,
10
+ from numba.core.compiler_machinery import (FunctionPass, LoweringPass,
10
11
  PassManager, register_pass)
12
+ from numba.core.interpreter import Interpreter
11
13
  from numba.core.errors import NumbaInvalidConfigWarning
14
+ from numba.core.untyped_passes import TranslateByteCode
12
15
  from numba.core.typed_passes import (IRLegalization, NativeLowering,
13
16
  AnnotateTypes)
14
17
  from warnings import warn
@@ -143,13 +146,74 @@ class CreateLibrary(LoweringPass):
143
146
  return True
144
147
 
145
148
 
149
+ class CUDABytecodeInterpreter(Interpreter):
150
+ # Based on the superclass implementation, but names the resulting variable
151
+ # "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
152
+ # https://github.com/numba/numba/pull/9888
153
+ #
154
+ # This can be removed once that PR is available in an upstream Numba
155
+ # release.
156
+ def _op_JUMP_IF(self, inst, pred, iftrue):
157
+ brs = {
158
+ True: inst.get_jump_target(),
159
+ False: inst.next,
160
+ }
161
+ truebr = brs[iftrue]
162
+ falsebr = brs[not iftrue]
163
+
164
+ name = "$bool%s" % (inst.offset)
165
+ gv_fn = numba_ir.Global("bool", bool, loc=self.loc)
166
+ self.store(value=gv_fn, name=name)
167
+
168
+ callres = numba_ir.Expr.call(self.get(name), (self.get(pred),), (),
169
+ loc=self.loc)
170
+
171
+ pname = "$%spred" % (inst.offset)
172
+ predicate = self.store(value=callres, name=pname)
173
+ bra = numba_ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr,
174
+ loc=self.loc)
175
+ self.current_block.append(bra)
176
+
177
+
178
+ @register_pass(mutates_CFG=True, analysis_only=False)
179
+ class CUDATranslateBytecode(FunctionPass):
180
+ _name = "cuda_translate_bytecode"
181
+
182
+ def __init__(self):
183
+ FunctionPass.__init__(self)
184
+
185
+ def run_pass(self, state):
186
+ func_id = state['func_id']
187
+ bc = state['bc']
188
+ interp = CUDABytecodeInterpreter(func_id)
189
+ func_ir = interp.interpret(bc)
190
+ state['func_ir'] = func_ir
191
+ return True
192
+
193
+
146
194
  class CUDACompiler(CompilerBase):
147
195
  def define_pipelines(self):
148
196
  dpb = DefaultPassBuilder
149
197
  pm = PassManager('cuda')
150
198
 
151
199
  untyped_passes = dpb.define_untyped_pipeline(self.state)
152
- pm.passes.extend(untyped_passes.passes)
200
+
201
+ # Rather than replicating the whole untyped passes definition in
202
+ # numba-cuda, it seems cleaner to take the pass list and replace the
203
+ # TranslateBytecode pass with our own.
204
+
205
+ def replace_translate_pass(implementation, description):
206
+ if implementation is TranslateByteCode:
207
+ return (CUDATranslateBytecode, description)
208
+ else:
209
+ return (implementation, description)
210
+
211
+ cuda_untyped_passes = [
212
+ replace_translate_pass(implementation, description)
213
+ for implementation, description in untyped_passes.passes
214
+ ]
215
+
216
+ pm.passes.extend(cuda_untyped_passes)
153
217
 
154
218
  typed_passes = dpb.define_typed_pipeline(self.state)
155
219
  pm.passes.extend(typed_passes.passes)
@@ -352,6 +416,18 @@ def kernel_fixup(kernel, debug):
352
416
  kernel.return_value = ir.ReturnValue(kernel, ir.VoidType())
353
417
  kernel.args = kernel.args[1:]
354
418
 
419
+ # If debug metadata is present, remove the return value from it
420
+
421
+ if kernel_metadata := getattr(kernel, 'metadata', None):
422
+ if dbg_metadata := kernel_metadata.get('dbg', None):
423
+ for name, value in dbg_metadata.operands:
424
+ if name == "type":
425
+ type_metadata = value
426
+ for tm_name, tm_value in type_metadata.operands:
427
+ if tm_name == 'types':
428
+ types = tm_value
429
+ types.operands = types.operands[1:]
430
+
355
431
  # Mark as a kernel for NVVM
356
432
 
357
433
  nvvm.set_cuda_kernel(kernel)
@@ -0,0 +1,44 @@
1
+ from llvmlite import ir
2
+ from numba.core import types
3
+ from numba.core.debuginfo import DIBuilder
4
+ from numba.cuda.types import GridGroup
5
+
6
+ _BYTE_SIZE = 8
7
+
8
+
9
+ class CUDADIBuilder(DIBuilder):
10
+
11
+ def _var_type(self, lltype, size, datamodel=None):
12
+ is_bool = False
13
+ is_grid_group = False
14
+
15
+ if isinstance(lltype, ir.IntType):
16
+ if datamodel is None:
17
+ if size == 1:
18
+ name = str(lltype)
19
+ is_bool = True
20
+ else:
21
+ name = str(datamodel.fe_type)
22
+ if isinstance(datamodel.fe_type, types.Boolean):
23
+ is_bool = True
24
+ elif isinstance(datamodel.fe_type, GridGroup):
25
+ is_grid_group = True
26
+
27
+ if is_bool or is_grid_group:
28
+ m = self.module
29
+ bitsize = _BYTE_SIZE * size
30
+ # Boolean type workaround until upstream Numba is fixed
31
+ if is_bool:
32
+ ditok = "DW_ATE_boolean"
33
+ # GridGroup type should use numba.cuda implementation
34
+ elif is_grid_group:
35
+ ditok = "DW_ATE_unsigned"
36
+
37
+ return m.add_debug_info('DIBasicType', {
38
+ 'name': name,
39
+ 'size': bitsize,
40
+ 'encoding': ir.DIToken(ditok),
41
+ })
42
+
43
+ # For other cases, use upstream Numba implementation
44
+ return super()._var_type(lltype, size, datamodel=datamodel)
@@ -4,8 +4,9 @@ import re
4
4
  import sys
5
5
  import ctypes
6
6
  import functools
7
+ from collections import defaultdict
7
8
 
8
- from numba.core import config, serialize, sigutils, types, typing, utils
9
+ from numba.core import config, ir, serialize, sigutils, types, typing, utils
9
10
  from numba.core.caching import Cache, CacheImpl
10
11
  from numba.core.compiler_lock import global_compiler_lock
11
12
  from numba.core.dispatcher import Dispatcher
@@ -42,6 +43,55 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
42
43
  reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
43
44
 
44
45
 
46
+ def get_cres_link_objects(cres):
47
+ """Given a compile result, return a set of all linkable code objects that
48
+ are required for it to be fully linked."""
49
+
50
+ link_objects = set()
51
+
52
+ # List of calls into declared device functions
53
+ device_func_calls = [
54
+ (name, v) for name, v in cres.fndesc.typemap.items() if (
55
+ isinstance(v, cuda_types.CUDADispatcher)
56
+ )
57
+ ]
58
+
59
+ # List of tuples with SSA name of calls and corresponding signature
60
+ call_signatures = [
61
+ (call.func.name, sig)
62
+ for call, sig in cres.fndesc.calltypes.items() if (
63
+ isinstance(call, ir.Expr) and call.op == 'call'
64
+ )
65
+ ]
66
+
67
+ # Map SSA names to all invoked signatures
68
+ call_signature_d = defaultdict(list)
69
+ for name, sig in call_signatures:
70
+ call_signature_d[name].append(sig)
71
+
72
+ # Add the link objects from the current function's callees
73
+ for name, v in device_func_calls:
74
+ for sig in call_signature_d.get(name, []):
75
+ called_cres = v.dispatcher.overloads[sig.args]
76
+ called_link_objects = get_cres_link_objects(called_cres)
77
+ link_objects.update(called_link_objects)
78
+
79
+ # From this point onwards, we are only interested in ExternFunction
80
+ # declarations - these are the calls made directly in this function to
81
+ # them.
82
+ for name, v in cres.fndesc.typemap.items():
83
+ if not isinstance(v, Function):
84
+ continue
85
+
86
+ if not isinstance(v.typing_key, ExternFunction):
87
+ continue
88
+
89
+ for obj in v.typing_key.link:
90
+ link_objects.add(obj)
91
+
92
+ return link_objects
93
+
94
+
45
95
  class _Kernel(serialize.ReduceMixin):
46
96
  '''
47
97
  CUDA Kernel specialized for a given set of argument types. When called, this
@@ -159,15 +209,8 @@ class _Kernel(serialize.ReduceMixin):
159
209
 
160
210
  self.maybe_link_nrt(link, tgt_ctx, asm)
161
211
 
162
- for k, v in cres.fndesc.typemap.items():
163
- if not isinstance(v, Function):
164
- continue
165
-
166
- if not isinstance(v.typing_key, ExternFunction):
167
- continue
168
-
169
- for obj in v.typing_key.link:
170
- lib.add_linking_file(obj)
212
+ for obj in get_cres_link_objects(cres):
213
+ lib.add_linking_file(obj)
171
214
 
172
215
  for filepath in link:
173
216
  lib.add_linking_file(filepath)
@@ -267,7 +310,11 @@ class _Kernel(serialize.ReduceMixin):
267
310
  """
268
311
  cufunc = self._codelibrary.get_cufunc()
269
312
 
270
- if hasattr(self, "target_context") and self.target_context.enable_nrt:
313
+ if (
314
+ hasattr(self, "target_context")
315
+ and self.target_context.enable_nrt
316
+ and config.CUDA_NRT_STATS
317
+ ):
271
318
  rtsys.ensure_initialized()
272
319
  rtsys.set_memsys_to_module(cufunc.module)
273
320
  # We don't know which stream the kernel will be launched on, so
@@ -3,8 +3,7 @@ from functools import cached_property
3
3
  import llvmlite.binding as ll
4
4
  from llvmlite import ir
5
5
 
6
- from numba.core import (cgutils, config, debuginfo, itanium_mangler, types,
7
- typing, utils)
6
+ from numba.core import cgutils, config, itanium_mangler, types, typing
8
7
  from numba.core.dispatcher import Dispatcher
9
8
  from numba.core.base import BaseContext
10
9
  from numba.core.callconv import BaseCallConv, MinimalCallConv
@@ -12,7 +11,8 @@ from numba.core.typing import cmathdecl
12
11
  from numba.core import datamodel
13
12
 
14
13
  from .cudadrv import nvvm
15
- from numba.cuda import codegen, nvvmutils, ufuncs
14
+ from numba.cuda import codegen, ufuncs
15
+ from numba.cuda.debuginfo import CUDADIBuilder
16
16
  from numba.cuda.models import cuda_data_manager
17
17
 
18
18
  # -----------------------------------------------------------------------------
@@ -80,7 +80,7 @@ class CUDATargetContext(BaseContext):
80
80
 
81
81
  @property
82
82
  def DIBuilder(self):
83
- return debuginfo.DIBuilder
83
+ return CUDADIBuilder
84
84
 
85
85
  @property
86
86
  def enable_boundscheck(self):
@@ -150,136 +150,6 @@ class CUDATargetContext(BaseContext):
150
150
  return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
151
151
  uid=uid)
152
152
 
153
- def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
154
- nvvm_options, filename, linenum,
155
- max_registers=None, lto=False):
156
- """
157
- Adapt a code library ``codelib`` with the numba compiled CUDA kernel
158
- with name ``fname`` and arguments ``argtypes`` for NVVM.
159
- A new library is created with a wrapper function that can be used as
160
- the kernel entry point for the given kernel.
161
-
162
- Returns the new code library and the wrapper function.
163
-
164
- Parameters:
165
-
166
- codelib: The CodeLibrary containing the device function to wrap
167
- in a kernel call.
168
- fndesc: The FunctionDescriptor of the source function.
169
- debug: Whether to compile with debug.
170
- lineinfo: Whether to emit line info.
171
- nvvm_options: Dict of NVVM options used when compiling the new library.
172
- filename: The source filename that the function is contained in.
173
- linenum: The source line that the function is on.
174
- max_registers: The max_registers argument for the code library.
175
- """
176
- kernel_name = itanium_mangler.prepend_namespace(
177
- fndesc.llvm_func_name, ns='cudapy',
178
- )
179
- library = self.codegen().create_library(f'{codelib.name}_kernel_',
180
- entry_name=kernel_name,
181
- nvvm_options=nvvm_options,
182
- max_registers=max_registers,
183
- lto=lto
184
- )
185
- library.add_linking_library(codelib)
186
- wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
187
- debug, lineinfo, filename,
188
- linenum)
189
- return library, wrapper
190
-
191
- def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
192
- lineinfo, filename, linenum):
193
- """
194
- Generate the kernel wrapper in the given ``library``.
195
- The function being wrapped is described by ``fndesc``.
196
- The wrapper function is returned.
197
- """
198
-
199
- argtypes = fndesc.argtypes
200
- arginfo = self.get_arg_packer(argtypes)
201
- argtys = list(arginfo.argument_types)
202
- wrapfnty = ir.FunctionType(ir.VoidType(), argtys)
203
- wrapper_module = self.create_module("cuda.kernel.wrapper")
204
- fnty = ir.FunctionType(ir.IntType(32),
205
- [self.call_conv.get_return_type(types.pyobject)]
206
- + argtys)
207
- func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)
208
-
209
- prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
210
- wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
211
- builder = ir.IRBuilder(wrapfn.append_basic_block(''))
212
-
213
- if debug or lineinfo:
214
- directives_only = lineinfo and not debug
215
- debuginfo = self.DIBuilder(module=wrapper_module,
216
- filepath=filename,
217
- cgctx=self,
218
- directives_only=directives_only)
219
- debuginfo.mark_subprogram(
220
- wrapfn, kernel_name, fndesc.args, argtypes, linenum,
221
- )
222
- debuginfo.mark_location(builder, linenum)
223
-
224
- # Define error handling variable
225
- def define_error_gv(postfix):
226
- name = wrapfn.name + postfix
227
- gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32),
228
- name)
229
- gv.initializer = ir.Constant(gv.type.pointee, None)
230
- return gv
231
-
232
- gv_exc = define_error_gv("__errcode__")
233
- gv_tid = []
234
- gv_ctaid = []
235
- for i in 'xyz':
236
- gv_tid.append(define_error_gv("__tid%s__" % i))
237
- gv_ctaid.append(define_error_gv("__ctaid%s__" % i))
238
-
239
- callargs = arginfo.from_arguments(builder, wrapfn.args)
240
- status, _ = self.call_conv.call_function(
241
- builder, func, types.void, argtypes, callargs)
242
-
243
- if debug:
244
- # Check error status
245
- with cgutils.if_likely(builder, status.is_ok):
246
- builder.ret_void()
247
-
248
- with builder.if_then(builder.not_(status.is_python_exc)):
249
- # User exception raised
250
- old = ir.Constant(gv_exc.type.pointee, None)
251
-
252
- # Use atomic cmpxchg to prevent rewriting the error status
253
- # Only the first error is recorded
254
-
255
- xchg = builder.cmpxchg(gv_exc, old, status.code,
256
- 'monotonic', 'monotonic')
257
- changed = builder.extract_value(xchg, 1)
258
-
259
- # If the xchange is successful, save the thread ID.
260
- sreg = nvvmutils.SRegBuilder(builder)
261
- with builder.if_then(changed):
262
- for dim, ptr, in zip("xyz", gv_tid):
263
- val = sreg.tid(dim)
264
- builder.store(val, ptr)
265
-
266
- for dim, ptr, in zip("xyz", gv_ctaid):
267
- val = sreg.ctaid(dim)
268
- builder.store(val, ptr)
269
-
270
- builder.ret_void()
271
-
272
- nvvm.set_cuda_kernel(wrapfn)
273
- library.add_ir_module(wrapper_module)
274
- if debug or lineinfo:
275
- debuginfo.finalize()
276
- library.finalize()
277
-
278
- if config.DUMP_LLVM:
279
- utils.dump_llvm(fndesc, wrapper_module)
280
-
281
- return library.get_function(wrapfn.name)
282
-
283
153
  def make_constant_array(self, builder, aryty, arr):
284
154
  """
285
155
  Unlike the parent version. This returns a a pointer in the constant
@@ -72,6 +72,57 @@ class TestCudaDebugInfo(CUDATestCase):
72
72
  def f(x):
73
73
  x[0] = 0
74
74
 
75
+ def test_issue_9888(self):
76
+ # Compiler created symbol should not be emitted in DILocalVariable
77
+ # See Numba Issue #9888 https://github.com/numba/numba/pull/9888
78
+ sig = (types.boolean,)
79
+
80
+ @cuda.jit(sig, debug=True, opt=False)
81
+ def f(cond):
82
+ if cond:
83
+ x = 1 # noqa: F841
84
+ else:
85
+ x = 0 # noqa: F841
86
+
87
+ llvm_ir = f.inspect_llvm(sig)
88
+ # A varible name starting with "bool" in the debug metadata
89
+ pat = r'!DILocalVariable\(.*name:\s+\"bool'
90
+ match = re.compile(pat).search(llvm_ir)
91
+ self.assertIsNone(match, msg=llvm_ir)
92
+
93
+ def test_bool_type(self):
94
+ sig = (types.int32, types.int32)
95
+
96
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
97
+ def f(x, y):
98
+ z = x == y # noqa: F841
99
+
100
+ llvm_ir = f.inspect_llvm(sig)
101
+
102
+ # extract the metadata node id from `type` field of DILocalVariable
103
+ pat = r'!DILocalVariable\(.*name:\s+"z".*type:\s+!(\d+)'
104
+ match = re.compile(pat).search(llvm_ir)
105
+ self.assertIsNotNone(match, msg=llvm_ir)
106
+ mdnode_id = match.group(1)
107
+
108
+ # verify the DIBasicType has correct encoding attribute DW_ATE_boolean
109
+ pat = rf'!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean'
110
+ match = re.compile(pat).search(llvm_ir)
111
+ self.assertIsNotNone(match, msg=llvm_ir)
112
+
113
+ def test_grid_group_type(self):
114
+ sig = (types.int32,)
115
+
116
+ @cuda.jit(sig, debug=True, opt=False)
117
+ def f(x):
118
+ grid = cuda.cg.this_grid() # noqa: F841
119
+
120
+ llvm_ir = f.inspect_llvm(sig)
121
+
122
+ pat = r'!DIBasicType\(.*DW_ATE_unsigned, name: "GridGroup", size: 64'
123
+ match = re.compile(pat).search(llvm_ir)
124
+ self.assertIsNotNone(match, msg=llvm_ir)
125
+
75
126
  @unittest.skip("Wrappers no longer exist")
76
127
  def test_wrapper_has_debuginfo(self):
77
128
  sig = (types.int32[::1],)
@@ -217,6 +268,36 @@ class TestCudaDebugInfo(CUDATestCase):
217
268
  three_device_fns(kernel_debug=False, leaf_debug=True)
218
269
  three_device_fns(kernel_debug=False, leaf_debug=False)
219
270
 
271
+ def test_kernel_args_types(self):
272
+ sig = (types.int32, types.int32)
273
+
274
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
275
+ def f(x, y):
276
+ z = x + y # noqa: F841
277
+
278
+ llvm_ir = f.inspect_llvm(sig)
279
+
280
+ # extract the metadata node id from `types` field of DISubroutineType
281
+ pat = r'!DISubroutineType\(types:\s+!(\d+)\)'
282
+ match = re.compile(pat).search(llvm_ir)
283
+ self.assertIsNotNone(match, msg=llvm_ir)
284
+ mdnode_id = match.group(1)
285
+
286
+ # extract the metadata node ids from the flexible node of types
287
+ pat = rf'!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}'
288
+ match = re.compile(pat).search(llvm_ir)
289
+ self.assertIsNotNone(match, msg=llvm_ir)
290
+ mdnode_id1 = match.group(1)
291
+ mdnode_id2 = match.group(2)
292
+
293
+ # verify each of the two metadata nodes match expected type
294
+ pat = rf'!{mdnode_id1}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
295
+ match = re.compile(pat).search(llvm_ir)
296
+ self.assertIsNotNone(match, msg=llvm_ir)
297
+ pat = rf'!{mdnode_id2}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
298
+ match = re.compile(pat).search(llvm_ir)
299
+ self.assertIsNotNone(match, msg=llvm_ir)
300
+
220
301
 
221
302
  if __name__ == '__main__':
222
303
  unittest.main()
@@ -205,6 +205,14 @@ int times2(int *out, int a)
205
205
  }
206
206
  """)
207
207
 
208
+ times3_cu = cuda.CUSource("""
209
+ extern "C" __device__
210
+ int times3(int *out, int a)
211
+ {
212
+ *out = a * 3;
213
+ return 0;
214
+ }
215
+ """)
208
216
 
209
217
  times4_cu = cuda.CUSource("""
210
218
  extern "C" __device__
@@ -351,6 +359,123 @@ class TestDeclareDevice(CUDATestCase):
351
359
  kernel[1, 1](x, 1)
352
360
  np.testing.assert_equal(x[0], 323845807)
353
361
 
362
+ def test_declared_in_called_function(self):
363
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
364
+
365
+ @cuda.jit
366
+ def device_func(x):
367
+ return times2(x)
368
+
369
+ @cuda.jit
370
+ def kernel(r, x):
371
+ i = cuda.grid(1)
372
+ if i < len(r):
373
+ r[i] = device_func(x[i])
374
+
375
+ x = np.arange(10, dtype=np.int32)
376
+ r = np.empty_like(x)
377
+
378
+ kernel[1, 32](r, x)
379
+
380
+ np.testing.assert_equal(r, x * 2)
381
+
382
+ def test_declared_in_called_function_twice(self):
383
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
384
+
385
+ @cuda.jit
386
+ def device_func_1(x):
387
+ return times2(x)
388
+
389
+ @cuda.jit
390
+ def device_func_2(x):
391
+ return device_func_1(x)
392
+
393
+ @cuda.jit
394
+ def kernel(r, x):
395
+ i = cuda.grid(1)
396
+ if i < len(r):
397
+ r[i] = device_func_2(x[i])
398
+
399
+ x = np.arange(10, dtype=np.int32)
400
+ r = np.empty_like(x)
401
+
402
+ kernel[1, 32](r, x)
403
+
404
+ np.testing.assert_equal(r, x * 2)
405
+
406
+ def test_declared_in_called_function_two_calls(self):
407
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
408
+
409
+ @cuda.jit
410
+ def device_func(x):
411
+ return times2(x)
412
+
413
+ @cuda.jit
414
+ def kernel(r, x):
415
+ i = cuda.grid(1)
416
+ if i < len(r):
417
+ r[i] = device_func(x[i]) + device_func(x[i] + i)
418
+
419
+ x = np.arange(10, dtype=np.int32)
420
+ r = np.empty_like(x)
421
+
422
+ kernel[1, 32](r, x)
423
+
424
+ np.testing.assert_equal(r, x * 6)
425
+
426
+ def test_call_declared_function_twice(self):
427
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
428
+
429
+ @cuda.jit
430
+ def kernel(r, x):
431
+ i = cuda.grid(1)
432
+ if i < len(r):
433
+ r[i] = times2(x[i]) + times2(x[i] + i)
434
+
435
+ x = np.arange(10, dtype=np.int32)
436
+ r = np.empty_like(x)
437
+
438
+ kernel[1, 32](r, x)
439
+
440
+ np.testing.assert_equal(r, x * 6)
441
+
442
+ def test_declared_in_called_function_and_parent(self):
443
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
444
+
445
+ @cuda.jit
446
+ def device_func(x):
447
+ return times2(x)
448
+
449
+ @cuda.jit
450
+ def kernel(r, x):
451
+ i = cuda.grid(1)
452
+ if i < len(r):
453
+ r[i] = device_func(x[i]) + times2(x[i])
454
+
455
+ x = np.arange(10, dtype=np.int32)
456
+ r = np.empty_like(x)
457
+
458
+ kernel[1, 32](r, x)
459
+
460
+ np.testing.assert_equal(r, x * 4)
461
+
462
+ def test_call_two_different_declared_functions(self):
463
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
464
+ times3 = cuda.declare_device('times3', 'int32(int32)', link=times3_cu)
465
+
466
+ @cuda.jit
467
+ def kernel(r, x):
468
+ i = cuda.grid(1)
469
+ if i < len(r):
470
+ r[i] = times2(x[i]) + times3(x[i])
471
+
472
+ x = np.arange(10, dtype=np.int32)
473
+ r = np.empty_like(x)
474
+
475
+ kernel[1, 32](r, x)
476
+
477
+ np.testing.assert_equal(r, x * 5)
478
+
354
479
 
355
480
  if __name__ == '__main__':
356
481
  unittest.main()
@@ -171,7 +171,10 @@ class TestNrtStatistics(CUDATestCase):
171
171
  arr = cuda_arange(5 * tmp[0]) # noqa: F841
172
172
  return None
173
173
 
174
- with override_config('CUDA_ENABLE_NRT', True):
174
+ with (
175
+ override_config('CUDA_ENABLE_NRT', True),
176
+ override_config('CUDA_NRT_STATS', True)
177
+ ):
175
178
  # Switch on stats
176
179
  rtsys.memsys_enable_stats()
177
180
  # check the stats are on
@@ -18,7 +18,10 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
18
18
  super(TestNrtRefCt, self).tearDown()
19
19
 
20
20
  def run(self, result=None):
21
- with override_config("CUDA_ENABLE_NRT", True):
21
+ with (
22
+ override_config("CUDA_ENABLE_NRT", True),
23
+ override_config('CUDA_NRT_STATS', True)
24
+ ):
22
25
  super(TestNrtRefCt, self).run(result)
23
26
 
24
27
  def test_no_return(self):