numba-cuda 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/PKG-INFO +1 -1
  2. numba_cuda-0.7.0/numba_cuda/VERSION +1 -0
  3. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/compiler.py +80 -2
  4. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/nvvm.py +72 -53
  5. numba_cuda-0.7.0/numba_cuda/numba/cuda/debuginfo.py +44 -0
  6. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/dispatcher.py +58 -11
  7. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/target.py +4 -134
  8. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +2 -1
  9. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +91 -1
  10. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +125 -0
  11. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +4 -1
  12. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
  13. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda.egg-info/PKG-INFO +1 -1
  14. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda.egg-info/SOURCES.txt +1 -0
  15. numba_cuda-0.5.0/numba_cuda/VERSION +0 -1
  16. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/LICENSE +0 -0
  17. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/README.md +0 -0
  18. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/__init__.py +0 -0
  19. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/_version.py +0 -0
  20. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/__init__.py +0 -0
  21. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/api.py +0 -0
  22. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/api_util.py +0 -0
  23. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/args.py +0 -0
  24. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cg.py +0 -0
  25. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/codegen.py +0 -0
  26. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
  27. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
  28. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
  29. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
  30. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadecl.py +0 -0
  31. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
  32. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
  33. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
  34. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
  35. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
  36. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
  37. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
  38. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
  39. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
  40. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +0 -0
  41. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
  42. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
  43. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
  44. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
  45. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
  46. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
  47. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/cudamath.py +0 -0
  48. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/decorators.py +0 -0
  49. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/descriptor.py +0 -0
  50. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/device_init.py +0 -0
  51. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
  52. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/errors.py +0 -0
  53. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/extending.py +0 -0
  54. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/initialize.py +0 -0
  55. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
  56. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/intrinsics.py +0 -0
  57. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  58. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
  59. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
  60. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/libdevice.py +0 -0
  61. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
  62. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
  63. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
  64. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/mathimpl.py +0 -0
  65. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/models.py +0 -0
  66. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
  67. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/printimpl.py +0 -0
  68. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/random.py +0 -0
  69. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
  70. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
  71. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
  72. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
  73. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
  74. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/runtime/nrt.py +0 -0
  75. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
  76. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/api.py +0 -0
  77. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
  78. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
  79. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
  80. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
  81. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
  82. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
  83. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
  84. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
  85. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
  86. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
  87. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
  88. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
  89. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
  90. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
  91. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
  92. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/simulator_init.py +0 -0
  93. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/stubs.py +0 -0
  94. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/testing.py +0 -0
  95. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
  96. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
  97. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
  98. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
  99. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
  100. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
  101. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
  102. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
  103. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
  104. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
  105. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
  106. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
  107. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
  108. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
  109. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
  110. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
  111. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
  112. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
  113. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
  114. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
  115. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
  116. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
  117. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
  118. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
  119. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
  120. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
  121. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
  122. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
  123. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
  124. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
  125. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
  126. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
  127. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
  128. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
  129. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
  130. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
  131. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
  132. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
  133. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
  134. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
  135. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
  136. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
  137. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
  138. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
  139. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
  140. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
  141. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
  142. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
  143. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
  144. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
  145. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
  146. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
  147. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
  148. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
  149. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
  150. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
  151. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
  152. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
  153. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
  154. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
  155. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
  156. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
  157. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
  158. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
  159. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
  160. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
  161. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
  162. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
  163. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
  164. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
  165. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
  166. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
  167. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
  168. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
  169. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
  170. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
  171. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
  172. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
  173. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
  174. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
  175. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
  176. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
  177. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
  178. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
  179. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
  180. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
  181. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
  182. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
  183. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
  184. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
  185. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
  186. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
  187. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
  188. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
  189. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
  190. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
  191. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
  192. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
  193. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
  194. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
  195. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
  196. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
  197. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
  198. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
  199. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
  200. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
  201. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
  202. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
  203. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
  204. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
  205. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
  206. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
  207. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
  208. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
  209. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
  210. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
  211. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
  212. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
  213. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
  214. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  215. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
  216. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
  217. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
  218. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
  219. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
  220. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
  221. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  222. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
  223. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
  224. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
  225. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
  226. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
  227. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
  228. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
  229. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
  230. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
  231. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
  232. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
  233. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
  234. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
  235. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
  236. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
  237. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
  238. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
  239. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
  240. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
  241. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +0 -0
  242. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/support.py +0 -0
  243. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
  244. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
  245. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
  246. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
  247. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/types.py +0 -0
  248. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/ufuncs.py +0 -0
  249. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/utils.py +0 -0
  250. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/vector_types.py +0 -0
  251. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda/numba/cuda/vectorizers.py +0 -0
  252. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda.egg-info/dependency_links.txt +0 -0
  253. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda.egg-info/requires.txt +0 -0
  254. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/numba_cuda.egg-info/top_level.txt +0 -0
  255. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/pyproject.toml +0 -0
  256. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/setup.cfg +0 -0
  257. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/setup.py +0 -0
  258. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/site-packages/_numba_cuda_redirector.pth +0 -0
  259. {numba_cuda-0.5.0 → numba_cuda-0.7.0}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: numba-cuda
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -0,0 +1 @@
1
+ 0.7.0
@@ -1,14 +1,17 @@
1
1
  from llvmlite import ir
2
2
  from numba.core.typing.templates import ConcreteTemplate
3
+ from numba.core import ir as numba_ir
3
4
  from numba.core import (cgutils, types, typing, funcdesc, config, compiler,
4
5
  sigutils, utils)
5
6
  from numba.core.compiler import (sanitize_compile_result_entries, CompilerBase,
6
7
  DefaultPassBuilder, Flags, Option,
7
8
  CompileResult)
8
9
  from numba.core.compiler_lock import global_compiler_lock
9
- from numba.core.compiler_machinery import (LoweringPass,
10
+ from numba.core.compiler_machinery import (FunctionPass, LoweringPass,
10
11
  PassManager, register_pass)
12
+ from numba.core.interpreter import Interpreter
11
13
  from numba.core.errors import NumbaInvalidConfigWarning
14
+ from numba.core.untyped_passes import TranslateByteCode
12
15
  from numba.core.typed_passes import (IRLegalization, NativeLowering,
13
16
  AnnotateTypes)
14
17
  from warnings import warn
@@ -143,13 +146,74 @@ class CreateLibrary(LoweringPass):
143
146
  return True
144
147
 
145
148
 
149
+ class CUDABytecodeInterpreter(Interpreter):
150
+ # Based on the superclass implementation, but names the resulting variable
151
+ # "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
152
+ # https://github.com/numba/numba/pull/9888
153
+ #
154
+ # This can be removed once that PR is available in an upstream Numba
155
+ # release.
156
+ def _op_JUMP_IF(self, inst, pred, iftrue):
157
+ brs = {
158
+ True: inst.get_jump_target(),
159
+ False: inst.next,
160
+ }
161
+ truebr = brs[iftrue]
162
+ falsebr = brs[not iftrue]
163
+
164
+ name = "$bool%s" % (inst.offset)
165
+ gv_fn = numba_ir.Global("bool", bool, loc=self.loc)
166
+ self.store(value=gv_fn, name=name)
167
+
168
+ callres = numba_ir.Expr.call(self.get(name), (self.get(pred),), (),
169
+ loc=self.loc)
170
+
171
+ pname = "$%spred" % (inst.offset)
172
+ predicate = self.store(value=callres, name=pname)
173
+ bra = numba_ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr,
174
+ loc=self.loc)
175
+ self.current_block.append(bra)
176
+
177
+
178
+ @register_pass(mutates_CFG=True, analysis_only=False)
179
+ class CUDATranslateBytecode(FunctionPass):
180
+ _name = "cuda_translate_bytecode"
181
+
182
+ def __init__(self):
183
+ FunctionPass.__init__(self)
184
+
185
+ def run_pass(self, state):
186
+ func_id = state['func_id']
187
+ bc = state['bc']
188
+ interp = CUDABytecodeInterpreter(func_id)
189
+ func_ir = interp.interpret(bc)
190
+ state['func_ir'] = func_ir
191
+ return True
192
+
193
+
146
194
  class CUDACompiler(CompilerBase):
147
195
  def define_pipelines(self):
148
196
  dpb = DefaultPassBuilder
149
197
  pm = PassManager('cuda')
150
198
 
151
199
  untyped_passes = dpb.define_untyped_pipeline(self.state)
152
- pm.passes.extend(untyped_passes.passes)
200
+
201
+ # Rather than replicating the whole untyped passes definition in
202
+ # numba-cuda, it seems cleaner to take the pass list and replace the
203
+ # TranslateBytecode pass with our own.
204
+
205
+ def replace_translate_pass(implementation, description):
206
+ if implementation is TranslateByteCode:
207
+ return (CUDATranslateBytecode, description)
208
+ else:
209
+ return (implementation, description)
210
+
211
+ cuda_untyped_passes = [
212
+ replace_translate_pass(implementation, description)
213
+ for implementation, description in untyped_passes.passes
214
+ ]
215
+
216
+ pm.passes.extend(cuda_untyped_passes)
153
217
 
154
218
  typed_passes = dpb.define_typed_pipeline(self.state)
155
219
  pm.passes.extend(typed_passes.passes)
@@ -352,6 +416,20 @@ def kernel_fixup(kernel, debug):
352
416
  kernel.return_value = ir.ReturnValue(kernel, ir.VoidType())
353
417
  kernel.args = kernel.args[1:]
354
418
 
419
+ # If debug metadata is present, remove the return value from it
420
+
421
+ if kernel_metadata := getattr(kernel, 'metadata', None):
422
+ if dbg_metadata := kernel_metadata.get('dbg', None):
423
+ for name, value in dbg_metadata.operands:
424
+ if name == "type":
425
+ type_metadata = value
426
+ for tm_name, tm_value in type_metadata.operands:
427
+ if tm_name == 'types':
428
+ types = tm_value
429
+ types.operands = types.operands[1:]
430
+ if config.DUMP_LLVM:
431
+ types._clear_string_cache()
432
+
355
433
  # Mark as a kernel for NVVM
356
434
 
357
435
  nvvm.set_cuda_kernel(kernel)
@@ -199,12 +199,52 @@ class NVVM(object):
199
199
 
200
200
 
201
201
  class CompilationUnit(object):
202
- def __init__(self):
202
+ """
203
+ A CompilationUnit is a set of LLVM modules that are compiled to PTX or
204
+ LTO-IR with NVVM.
205
+
206
+ Compilation options are accepted as a dict mapping option names to values,
207
+ with the following considerations:
208
+
209
+ - Underscores (`_`) in option names are converted to dashes (`-`), to match
210
+ NVVM's option name format.
211
+ - Options that take a value will be emitted in the form "-<name>=<value>".
212
+ - Booleans passed as option values will be converted to integers.
213
+ - Options which take no value (such as `-gen-lto`) should have a value of
214
+ `None` and will be emitted in the form "-<name>".
215
+
216
+ For documentation on NVVM compilation options, see the CUDA Toolkit
217
+ Documentation:
218
+
219
+ https://docs.nvidia.com/cuda/libnvvm-api/index.html#_CPPv418nvvmCompileProgram11nvvmProgramiPPKc
220
+ """
221
+
222
+ def __init__(self, options):
203
223
  self.driver = NVVM()
204
224
  self._handle = nvvm_program()
205
225
  err = self.driver.nvvmCreateProgram(byref(self._handle))
206
226
  self.driver.check_error(err, 'Failed to create CU')
207
227
 
228
+ def stringify_option(k, v):
229
+ k = k.replace('_', '-')
230
+
231
+ if v is None:
232
+ return f'-{k}'.encode('utf-8')
233
+
234
+ if isinstance(v, bool):
235
+ v = int(v)
236
+
237
+ return f'-{k}={v}'.encode('utf-8')
238
+
239
+ options = [stringify_option(k, v) for k, v in options.items()]
240
+ option_ptrs = (c_char_p * len(options))(*[c_char_p(x) for x in options])
241
+
242
+ # We keep both the options and the pointers to them so that options are
243
+ # not destroyed before we've used their values
244
+ self.options = options
245
+ self.option_ptrs = option_ptrs
246
+ self.n_options = len(options)
247
+
208
248
  def __del__(self):
209
249
  driver = NVVM()
210
250
  err = driver.nvvmDestroyProgram(byref(self._handle))
@@ -230,60 +270,35 @@ class CompilationUnit(object):
230
270
  len(buffer), None)
231
271
  self.driver.check_error(err, 'Failed to add module')
232
272
 
233
- def compile(self, **options):
234
- """Perform Compilation.
235
-
236
- Compilation options are accepted as keyword arguments, with the
237
- following considerations:
238
-
239
- - Underscores (`_`) in option names are converted to dashes (`-`), to
240
- match NVVM's option name format.
241
- - Options that take a value will be emitted in the form
242
- "-<name>=<value>".
243
- - Booleans passed as option values will be converted to integers.
244
- - Options which take no value (such as `-gen-lto`) should have a value
245
- of `None` passed in and will be emitted in the form "-<name>".
246
-
247
- For documentation on NVVM compilation options, see the CUDA Toolkit
248
- Documentation:
249
-
250
- https://docs.nvidia.com/cuda/libnvvm-api/index.html#_CPPv418nvvmCompileProgram11nvvmProgramiPPKc
273
+ def verify(self):
251
274
  """
252
-
253
- def stringify_option(k, v):
254
- k = k.replace('_', '-')
255
-
256
- if v is None:
257
- return f'-{k}'
258
-
259
- if isinstance(v, bool):
260
- v = int(v)
261
-
262
- return f'-{k}={v}'
263
-
264
- options = [stringify_option(k, v) for k, v in options.items()]
265
-
266
- c_opts = (c_char_p * len(options))(*[c_char_p(x.encode('utf8'))
267
- for x in options])
268
- # verify
269
- err = self.driver.nvvmVerifyProgram(self._handle, len(options), c_opts)
275
+ Run the NVVM verifier on all code added to the compilation unit.
276
+ """
277
+ err = self.driver.nvvmVerifyProgram(self._handle, self.n_options,
278
+ self.option_ptrs)
270
279
  self._try_error(err, 'Failed to verify\n')
271
280
 
272
- # compile
273
- err = self.driver.nvvmCompileProgram(self._handle, len(options), c_opts)
281
+ def compile(self):
282
+ """
283
+ Compile all modules added to the compilation unit and return the
284
+ resulting PTX or LTO-IR (depending on the options).
285
+ """
286
+ err = self.driver.nvvmCompileProgram(self._handle, self.n_options,
287
+ self.option_ptrs)
274
288
  self._try_error(err, 'Failed to compile\n')
275
289
 
276
- # get result
277
- reslen = c_size_t()
278
- err = self.driver.nvvmGetCompiledResultSize(self._handle, byref(reslen))
290
+ # Get result
291
+ result_size = c_size_t()
292
+ err = self.driver.nvvmGetCompiledResultSize(self._handle,
293
+ byref(result_size))
279
294
 
280
295
  self._try_error(err, 'Failed to get size of compiled result.')
281
296
 
282
- output_buffer = (c_char * reslen.value)()
297
+ output_buffer = (c_char * result_size.value)()
283
298
  err = self.driver.nvvmGetCompiledResult(self._handle, output_buffer)
284
299
  self._try_error(err, 'Failed to get compiled result.')
285
300
 
286
- # get log
301
+ # Get log
287
302
  self.log = self.get_log()
288
303
  if self.log:
289
304
  warnings.warn(self.log, category=NvvmWarning)
@@ -615,40 +630,44 @@ def llvm_replace(llvmir):
615
630
  for decl, fn in replacements:
616
631
  llvmir = llvmir.replace(decl, fn)
617
632
 
618
- llvmir = llvm140_to_70_ir(llvmir)
633
+ llvmir = llvm150_to_70_ir(llvmir)
619
634
 
620
635
  return llvmir
621
636
 
622
637
 
623
- def compile_ir(llvmir, **opts):
638
+ def compile_ir(llvmir, **options):
624
639
  if isinstance(llvmir, str):
625
640
  llvmir = [llvmir]
626
641
 
627
- if opts.pop('fastmath', False):
628
- opts.update({
642
+ if options.pop('fastmath', False):
643
+ options.update({
629
644
  'ftz': True,
630
645
  'fma': True,
631
646
  'prec_div': False,
632
647
  'prec_sqrt': False,
633
648
  })
634
649
 
635
- cu = CompilationUnit()
636
- libdevice = LibDevice()
650
+ cu = CompilationUnit(options)
637
651
 
638
652
  for mod in llvmir:
639
653
  mod = llvm_replace(mod)
640
654
  cu.add_module(mod.encode('utf8'))
655
+ cu.verify()
656
+
657
+ # We add libdevice following verification so that it is not subject to the
658
+ # verifier's requirements
659
+ libdevice = LibDevice()
641
660
  cu.lazy_add_module(libdevice.get())
642
661
 
643
- return cu.compile(**opts)
662
+ return cu.compile()
644
663
 
645
664
 
646
665
  re_attributes_def = re.compile(r"^attributes #\d+ = \{ ([\w\s]+)\ }")
647
666
 
648
667
 
649
- def llvm140_to_70_ir(ir):
668
+ def llvm150_to_70_ir(ir):
650
669
  """
651
- Convert LLVM 14.0 IR for LLVM 7.0.
670
+ Convert LLVM 15.0 IR for LLVM 7.0.
652
671
  """
653
672
  buf = []
654
673
  for line in ir.splitlines():
@@ -0,0 +1,44 @@
1
+ from llvmlite import ir
2
+ from numba.core import types
3
+ from numba.core.debuginfo import DIBuilder
4
+ from numba.cuda.types import GridGroup
5
+
6
+ _BYTE_SIZE = 8
7
+
8
+
9
+ class CUDADIBuilder(DIBuilder):
10
+
11
+ def _var_type(self, lltype, size, datamodel=None):
12
+ is_bool = False
13
+ is_grid_group = False
14
+
15
+ if isinstance(lltype, ir.IntType):
16
+ if datamodel is None:
17
+ if size == 1:
18
+ name = str(lltype)
19
+ is_bool = True
20
+ else:
21
+ name = str(datamodel.fe_type)
22
+ if isinstance(datamodel.fe_type, types.Boolean):
23
+ is_bool = True
24
+ elif isinstance(datamodel.fe_type, GridGroup):
25
+ is_grid_group = True
26
+
27
+ if is_bool or is_grid_group:
28
+ m = self.module
29
+ bitsize = _BYTE_SIZE * size
30
+ # Boolean type workaround until upstream Numba is fixed
31
+ if is_bool:
32
+ ditok = "DW_ATE_boolean"
33
+ # GridGroup type should use numba.cuda implementation
34
+ elif is_grid_group:
35
+ ditok = "DW_ATE_unsigned"
36
+
37
+ return m.add_debug_info('DIBasicType', {
38
+ 'name': name,
39
+ 'size': bitsize,
40
+ 'encoding': ir.DIToken(ditok),
41
+ })
42
+
43
+ # For other cases, use upstream Numba implementation
44
+ return super()._var_type(lltype, size, datamodel=datamodel)
@@ -4,8 +4,9 @@ import re
4
4
  import sys
5
5
  import ctypes
6
6
  import functools
7
+ from collections import defaultdict
7
8
 
8
- from numba.core import config, serialize, sigutils, types, typing, utils
9
+ from numba.core import config, ir, serialize, sigutils, types, typing, utils
9
10
  from numba.core.caching import Cache, CacheImpl
10
11
  from numba.core.compiler_lock import global_compiler_lock
11
12
  from numba.core.dispatcher import Dispatcher
@@ -42,6 +43,55 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
42
43
  reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
43
44
 
44
45
 
46
+ def get_cres_link_objects(cres):
47
+ """Given a compile result, return a set of all linkable code objects that
48
+ are required for it to be fully linked."""
49
+
50
+ link_objects = set()
51
+
52
+ # List of calls into declared device functions
53
+ device_func_calls = [
54
+ (name, v) for name, v in cres.fndesc.typemap.items() if (
55
+ isinstance(v, cuda_types.CUDADispatcher)
56
+ )
57
+ ]
58
+
59
+ # List of tuples with SSA name of calls and corresponding signature
60
+ call_signatures = [
61
+ (call.func.name, sig)
62
+ for call, sig in cres.fndesc.calltypes.items() if (
63
+ isinstance(call, ir.Expr) and call.op == 'call'
64
+ )
65
+ ]
66
+
67
+ # Map SSA names to all invoked signatures
68
+ call_signature_d = defaultdict(list)
69
+ for name, sig in call_signatures:
70
+ call_signature_d[name].append(sig)
71
+
72
+ # Add the link objects from the current function's callees
73
+ for name, v in device_func_calls:
74
+ for sig in call_signature_d.get(name, []):
75
+ called_cres = v.dispatcher.overloads[sig.args]
76
+ called_link_objects = get_cres_link_objects(called_cres)
77
+ link_objects.update(called_link_objects)
78
+
79
+ # From this point onwards, we are only interested in ExternFunction
80
+ # declarations - these are the calls made directly in this function to
81
+ # them.
82
+ for name, v in cres.fndesc.typemap.items():
83
+ if not isinstance(v, Function):
84
+ continue
85
+
86
+ if not isinstance(v.typing_key, ExternFunction):
87
+ continue
88
+
89
+ for obj in v.typing_key.link:
90
+ link_objects.add(obj)
91
+
92
+ return link_objects
93
+
94
+
45
95
  class _Kernel(serialize.ReduceMixin):
46
96
  '''
47
97
  CUDA Kernel specialized for a given set of argument types. When called, this
@@ -159,15 +209,8 @@ class _Kernel(serialize.ReduceMixin):
159
209
 
160
210
  self.maybe_link_nrt(link, tgt_ctx, asm)
161
211
 
162
- for k, v in cres.fndesc.typemap.items():
163
- if not isinstance(v, Function):
164
- continue
165
-
166
- if not isinstance(v.typing_key, ExternFunction):
167
- continue
168
-
169
- for obj in v.typing_key.link:
170
- lib.add_linking_file(obj)
212
+ for obj in get_cres_link_objects(cres):
213
+ lib.add_linking_file(obj)
171
214
 
172
215
  for filepath in link:
173
216
  lib.add_linking_file(filepath)
@@ -267,7 +310,11 @@ class _Kernel(serialize.ReduceMixin):
267
310
  """
268
311
  cufunc = self._codelibrary.get_cufunc()
269
312
 
270
- if hasattr(self, "target_context") and self.target_context.enable_nrt:
313
+ if (
314
+ hasattr(self, "target_context")
315
+ and self.target_context.enable_nrt
316
+ and config.CUDA_NRT_STATS
317
+ ):
271
318
  rtsys.ensure_initialized()
272
319
  rtsys.set_memsys_to_module(cufunc.module)
273
320
  # We don't know which stream the kernel will be launched on, so
@@ -3,8 +3,7 @@ from functools import cached_property
3
3
  import llvmlite.binding as ll
4
4
  from llvmlite import ir
5
5
 
6
- from numba.core import (cgutils, config, debuginfo, itanium_mangler, types,
7
- typing, utils)
6
+ from numba.core import cgutils, config, itanium_mangler, types, typing
8
7
  from numba.core.dispatcher import Dispatcher
9
8
  from numba.core.base import BaseContext
10
9
  from numba.core.callconv import BaseCallConv, MinimalCallConv
@@ -12,7 +11,8 @@ from numba.core.typing import cmathdecl
12
11
  from numba.core import datamodel
13
12
 
14
13
  from .cudadrv import nvvm
15
- from numba.cuda import codegen, nvvmutils, ufuncs
14
+ from numba.cuda import codegen, ufuncs
15
+ from numba.cuda.debuginfo import CUDADIBuilder
16
16
  from numba.cuda.models import cuda_data_manager
17
17
 
18
18
  # -----------------------------------------------------------------------------
@@ -80,7 +80,7 @@ class CUDATargetContext(BaseContext):
80
80
 
81
81
  @property
82
82
  def DIBuilder(self):
83
- return debuginfo.DIBuilder
83
+ return CUDADIBuilder
84
84
 
85
85
  @property
86
86
  def enable_boundscheck(self):
@@ -150,136 +150,6 @@ class CUDATargetContext(BaseContext):
150
150
  return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
151
151
  uid=uid)
152
152
 
153
- def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
154
- nvvm_options, filename, linenum,
155
- max_registers=None, lto=False):
156
- """
157
- Adapt a code library ``codelib`` with the numba compiled CUDA kernel
158
- with name ``fname`` and arguments ``argtypes`` for NVVM.
159
- A new library is created with a wrapper function that can be used as
160
- the kernel entry point for the given kernel.
161
-
162
- Returns the new code library and the wrapper function.
163
-
164
- Parameters:
165
-
166
- codelib: The CodeLibrary containing the device function to wrap
167
- in a kernel call.
168
- fndesc: The FunctionDescriptor of the source function.
169
- debug: Whether to compile with debug.
170
- lineinfo: Whether to emit line info.
171
- nvvm_options: Dict of NVVM options used when compiling the new library.
172
- filename: The source filename that the function is contained in.
173
- linenum: The source line that the function is on.
174
- max_registers: The max_registers argument for the code library.
175
- """
176
- kernel_name = itanium_mangler.prepend_namespace(
177
- fndesc.llvm_func_name, ns='cudapy',
178
- )
179
- library = self.codegen().create_library(f'{codelib.name}_kernel_',
180
- entry_name=kernel_name,
181
- nvvm_options=nvvm_options,
182
- max_registers=max_registers,
183
- lto=lto
184
- )
185
- library.add_linking_library(codelib)
186
- wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
187
- debug, lineinfo, filename,
188
- linenum)
189
- return library, wrapper
190
-
191
- def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
192
- lineinfo, filename, linenum):
193
- """
194
- Generate the kernel wrapper in the given ``library``.
195
- The function being wrapped is described by ``fndesc``.
196
- The wrapper function is returned.
197
- """
198
-
199
- argtypes = fndesc.argtypes
200
- arginfo = self.get_arg_packer(argtypes)
201
- argtys = list(arginfo.argument_types)
202
- wrapfnty = ir.FunctionType(ir.VoidType(), argtys)
203
- wrapper_module = self.create_module("cuda.kernel.wrapper")
204
- fnty = ir.FunctionType(ir.IntType(32),
205
- [self.call_conv.get_return_type(types.pyobject)]
206
- + argtys)
207
- func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)
208
-
209
- prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
210
- wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
211
- builder = ir.IRBuilder(wrapfn.append_basic_block(''))
212
-
213
- if debug or lineinfo:
214
- directives_only = lineinfo and not debug
215
- debuginfo = self.DIBuilder(module=wrapper_module,
216
- filepath=filename,
217
- cgctx=self,
218
- directives_only=directives_only)
219
- debuginfo.mark_subprogram(
220
- wrapfn, kernel_name, fndesc.args, argtypes, linenum,
221
- )
222
- debuginfo.mark_location(builder, linenum)
223
-
224
- # Define error handling variable
225
- def define_error_gv(postfix):
226
- name = wrapfn.name + postfix
227
- gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32),
228
- name)
229
- gv.initializer = ir.Constant(gv.type.pointee, None)
230
- return gv
231
-
232
- gv_exc = define_error_gv("__errcode__")
233
- gv_tid = []
234
- gv_ctaid = []
235
- for i in 'xyz':
236
- gv_tid.append(define_error_gv("__tid%s__" % i))
237
- gv_ctaid.append(define_error_gv("__ctaid%s__" % i))
238
-
239
- callargs = arginfo.from_arguments(builder, wrapfn.args)
240
- status, _ = self.call_conv.call_function(
241
- builder, func, types.void, argtypes, callargs)
242
-
243
- if debug:
244
- # Check error status
245
- with cgutils.if_likely(builder, status.is_ok):
246
- builder.ret_void()
247
-
248
- with builder.if_then(builder.not_(status.is_python_exc)):
249
- # User exception raised
250
- old = ir.Constant(gv_exc.type.pointee, None)
251
-
252
- # Use atomic cmpxchg to prevent rewriting the error status
253
- # Only the first error is recorded
254
-
255
- xchg = builder.cmpxchg(gv_exc, old, status.code,
256
- 'monotonic', 'monotonic')
257
- changed = builder.extract_value(xchg, 1)
258
-
259
- # If the xchange is successful, save the thread ID.
260
- sreg = nvvmutils.SRegBuilder(builder)
261
- with builder.if_then(changed):
262
- for dim, ptr, in zip("xyz", gv_tid):
263
- val = sreg.tid(dim)
264
- builder.store(val, ptr)
265
-
266
- for dim, ptr, in zip("xyz", gv_ctaid):
267
- val = sreg.ctaid(dim)
268
- builder.store(val, ptr)
269
-
270
- builder.ret_void()
271
-
272
- nvvm.set_cuda_kernel(wrapfn)
273
- library.add_ir_module(wrapper_module)
274
- if debug or lineinfo:
275
- debuginfo.finalize()
276
- library.finalize()
277
-
278
- if config.DUMP_LLVM:
279
- utils.dump_llvm(fndesc, wrapper_module)
280
-
281
- return library.get_function(wrapfn.name)
282
-
283
153
  def make_constant_array(self, builder, aryty, arr):
284
154
  """
285
155
  Unlike the parent version. This returns a a pointer in the constant
@@ -261,7 +261,8 @@ class TestLinker(CUDATestCase):
261
261
 
262
262
 
263
263
  @unittest.skipIf(
264
- not PYNVJITLINK_INSTALLED, reason="Pynvjitlink is not installed"
264
+ not PYNVJITLINK_INSTALLED or not TEST_BIN_DIR,
265
+ reason="pynvjitlink not enabled"
265
266
  )
266
267
  class TestLinkerUsage(CUDATestCase):
267
268
  """Test that whether pynvjitlink can be enabled by both environment variable