numba-cuda 0.4.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/PKG-INFO +20 -2
  2. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/README.md +19 -1
  3. numba_cuda-0.6.0/numba_cuda/VERSION +1 -0
  4. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/compiler.py +85 -8
  5. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadecl.py +6 -2
  6. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +13 -9
  7. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -1
  8. numba_cuda-0.6.0/numba_cuda/numba/cuda/debuginfo.py +44 -0
  9. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/decorators.py +9 -2
  10. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/dispatcher.py +62 -4
  11. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/target.py +4 -134
  12. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/testing.py +11 -1
  13. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +81 -0
  14. numba_cuda-0.6.0/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +481 -0
  15. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +10 -7
  16. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +4 -1
  17. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
  18. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/PKG-INFO +20 -2
  19. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/SOURCES.txt +1 -0
  20. numba_cuda-0.4.0/numba_cuda/VERSION +0 -1
  21. numba_cuda-0.4.0/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -222
  22. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/LICENSE +0 -0
  23. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/__init__.py +0 -0
  24. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/_version.py +0 -0
  25. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/__init__.py +0 -0
  26. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/api.py +0 -0
  27. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/api_util.py +0 -0
  28. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/args.py +0 -0
  29. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cg.py +0 -0
  30. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/codegen.py +0 -0
  31. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
  32. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
  33. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
  34. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
  35. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
  36. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
  37. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
  38. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
  39. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
  40. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
  41. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
  42. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
  43. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
  44. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
  45. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
  46. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
  47. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
  48. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
  49. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
  50. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/cudamath.py +0 -0
  51. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/descriptor.py +0 -0
  52. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/device_init.py +0 -0
  53. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
  54. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/errors.py +0 -0
  55. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/extending.py +0 -0
  56. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/initialize.py +0 -0
  57. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
  58. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/intrinsics.py +0 -0
  59. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  60. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
  61. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
  62. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevice.py +0 -0
  63. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
  64. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
  65. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
  66. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/mathimpl.py +0 -0
  67. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/models.py +0 -0
  68. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
  69. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/printimpl.py +0 -0
  70. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/random.py +0 -0
  71. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
  72. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
  73. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
  74. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
  75. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/nrt.cu +0 -0
  76. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/runtime/nrt.py +0 -0
  77. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
  78. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/api.py +0 -0
  79. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
  80. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
  81. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
  82. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
  83. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
  84. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
  85. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
  86. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
  87. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
  88. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
  89. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
  90. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
  91. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
  92. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
  93. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
  94. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/simulator_init.py +0 -0
  95. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/stubs.py +0 -0
  96. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
  97. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
  98. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
  99. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
  100. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
  101. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
  102. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
  103. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
  104. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
  105. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
  106. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
  107. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
  108. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
  109. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
  110. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
  111. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
  112. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
  113. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
  114. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
  115. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
  116. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
  117. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
  118. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
  119. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
  120. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
  121. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
  122. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
  123. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
  124. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
  125. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
  126. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
  127. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
  128. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
  129. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
  130. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
  131. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
  132. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
  133. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
  134. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
  135. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
  136. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
  137. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
  138. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
  139. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
  140. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
  141. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
  142. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
  143. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
  144. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
  145. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
  146. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
  147. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
  148. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
  149. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
  150. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
  151. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
  152. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
  153. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
  154. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
  155. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
  156. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
  157. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
  158. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
  159. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
  160. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
  161. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
  162. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
  163. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
  164. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
  165. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
  166. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
  167. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
  168. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
  169. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
  170. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
  171. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
  172. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
  173. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
  174. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
  175. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
  176. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
  177. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
  178. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
  179. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
  180. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
  181. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
  182. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
  183. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
  184. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
  185. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
  186. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
  187. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
  188. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
  189. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
  190. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
  191. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
  192. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
  193. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
  194. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
  195. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
  196. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
  197. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
  198. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
  199. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
  200. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
  201. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
  202. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
  203. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
  204. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
  205. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
  206. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
  207. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
  208. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
  209. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
  210. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
  211. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
  212. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
  213. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
  214. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
  215. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
  216. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  217. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
  218. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
  219. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
  220. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
  221. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
  222. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
  223. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  224. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
  225. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
  226. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
  227. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
  228. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
  229. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
  230. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
  231. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
  232. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
  233. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
  234. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
  235. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
  236. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
  237. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
  238. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
  239. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
  240. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
  241. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
  242. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +0 -0
  243. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/support.py +0 -0
  244. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
  245. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
  246. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
  247. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
  248. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/types.py +0 -0
  249. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/ufuncs.py +0 -0
  250. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/utils.py +0 -0
  251. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/vector_types.py +0 -0
  252. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda/numba/cuda/vectorizers.py +0 -0
  253. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/dependency_links.txt +0 -0
  254. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/requires.txt +0 -0
  255. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/numba_cuda.egg-info/top_level.txt +0 -0
  256. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/pyproject.toml +0 -0
  257. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/setup.cfg +0 -0
  258. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/setup.py +0 -0
  259. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/site-packages/_numba_cuda_redirector.pth +0 -0
  260. {numba_cuda-0.4.0 → numba_cuda-0.6.0}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: numba-cuda
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -27,7 +27,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
27
27
  To raise questions or initiate discussions, please use the [Numba Discourse
28
28
  forum](https://numba.discourse.group).
29
29
 
30
- ## Building from source
30
+ ## Installation with pip
31
+
32
+ ```shell
33
+ pip install numba-cuda
34
+ ```
35
+
36
+ ## Installation with Conda
37
+
38
+ ```shell
39
+ conda install -c conda-forge numba-cuda
40
+ ```
41
+
42
+ ## Installation from source
31
43
 
32
44
  Install as an editable install:
33
45
 
@@ -53,3 +65,9 @@ which will show a path like:
53
65
  ```
54
66
  <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
55
67
  ```
68
+
69
+ ## Contributing Guide
70
+
71
+ Review the
72
+ [CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
73
+ file for information on how to contribute code and issues to the project.
@@ -12,7 +12,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
12
12
  To raise questions or initiate discussions, please use the [Numba Discourse
13
13
  forum](https://numba.discourse.group).
14
14
 
15
- ## Building from source
15
+ ## Installation with pip
16
+
17
+ ```shell
18
+ pip install numba-cuda
19
+ ```
20
+
21
+ ## Installation with Conda
22
+
23
+ ```shell
24
+ conda install -c conda-forge numba-cuda
25
+ ```
26
+
27
+ ## Installation from source
16
28
 
17
29
  Install as an editable install:
18
30
 
@@ -38,3 +50,9 @@ which will show a path like:
38
50
  ```
39
51
  <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
40
52
  ```
53
+
54
+ ## Contributing Guide
55
+
56
+ Review the
57
+ [CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
58
+ file for information on how to contribute code and issues to the project.
@@ -0,0 +1 @@
1
+ 0.6.0
@@ -1,14 +1,17 @@
1
1
  from llvmlite import ir
2
2
  from numba.core.typing.templates import ConcreteTemplate
3
+ from numba.core import ir as numba_ir
3
4
  from numba.core import (cgutils, types, typing, funcdesc, config, compiler,
4
5
  sigutils, utils)
5
6
  from numba.core.compiler import (sanitize_compile_result_entries, CompilerBase,
6
7
  DefaultPassBuilder, Flags, Option,
7
8
  CompileResult)
8
9
  from numba.core.compiler_lock import global_compiler_lock
9
- from numba.core.compiler_machinery import (LoweringPass,
10
+ from numba.core.compiler_machinery import (FunctionPass, LoweringPass,
10
11
  PassManager, register_pass)
12
+ from numba.core.interpreter import Interpreter
11
13
  from numba.core.errors import NumbaInvalidConfigWarning
14
+ from numba.core.untyped_passes import TranslateByteCode
12
15
  from numba.core.typed_passes import (IRLegalization, NativeLowering,
13
16
  AnnotateTypes)
14
17
  from warnings import warn
@@ -143,13 +146,74 @@ class CreateLibrary(LoweringPass):
143
146
  return True
144
147
 
145
148
 
149
+ class CUDABytecodeInterpreter(Interpreter):
150
+ # Based on the superclass implementation, but names the resulting variable
151
+ # "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
152
+ # https://github.com/numba/numba/pull/9888
153
+ #
154
+ # This can be removed once that PR is available in an upstream Numba
155
+ # release.
156
+ def _op_JUMP_IF(self, inst, pred, iftrue):
157
+ brs = {
158
+ True: inst.get_jump_target(),
159
+ False: inst.next,
160
+ }
161
+ truebr = brs[iftrue]
162
+ falsebr = brs[not iftrue]
163
+
164
+ name = "$bool%s" % (inst.offset)
165
+ gv_fn = numba_ir.Global("bool", bool, loc=self.loc)
166
+ self.store(value=gv_fn, name=name)
167
+
168
+ callres = numba_ir.Expr.call(self.get(name), (self.get(pred),), (),
169
+ loc=self.loc)
170
+
171
+ pname = "$%spred" % (inst.offset)
172
+ predicate = self.store(value=callres, name=pname)
173
+ bra = numba_ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr,
174
+ loc=self.loc)
175
+ self.current_block.append(bra)
176
+
177
+
178
+ @register_pass(mutates_CFG=True, analysis_only=False)
179
+ class CUDATranslateBytecode(FunctionPass):
180
+ _name = "cuda_translate_bytecode"
181
+
182
+ def __init__(self):
183
+ FunctionPass.__init__(self)
184
+
185
+ def run_pass(self, state):
186
+ func_id = state['func_id']
187
+ bc = state['bc']
188
+ interp = CUDABytecodeInterpreter(func_id)
189
+ func_ir = interp.interpret(bc)
190
+ state['func_ir'] = func_ir
191
+ return True
192
+
193
+
146
194
  class CUDACompiler(CompilerBase):
147
195
  def define_pipelines(self):
148
196
  dpb = DefaultPassBuilder
149
197
  pm = PassManager('cuda')
150
198
 
151
199
  untyped_passes = dpb.define_untyped_pipeline(self.state)
152
- pm.passes.extend(untyped_passes.passes)
200
+
201
+ # Rather than replicating the whole untyped passes definition in
202
+ # numba-cuda, it seems cleaner to take the pass list and replace the
203
+ # TranslateBytecode pass with our own.
204
+
205
+ def replace_translate_pass(implementation, description):
206
+ if implementation is TranslateByteCode:
207
+ return (CUDATranslateBytecode, description)
208
+ else:
209
+ return (implementation, description)
210
+
211
+ cuda_untyped_passes = [
212
+ replace_translate_pass(implementation, description)
213
+ for implementation, description in untyped_passes.passes
214
+ ]
215
+
216
+ pm.passes.extend(cuda_untyped_passes)
153
217
 
154
218
  typed_passes = dpb.define_typed_pipeline(self.state)
155
219
  pm.passes.extend(typed_passes.passes)
@@ -352,6 +416,18 @@ def kernel_fixup(kernel, debug):
352
416
  kernel.return_value = ir.ReturnValue(kernel, ir.VoidType())
353
417
  kernel.args = kernel.args[1:]
354
418
 
419
+ # If debug metadata is present, remove the return value from it
420
+
421
+ if kernel_metadata := getattr(kernel, 'metadata', None):
422
+ if dbg_metadata := kernel_metadata.get('dbg', None):
423
+ for name, value in dbg_metadata.operands:
424
+ if name == "type":
425
+ type_metadata = value
426
+ for tm_name, tm_value in type_metadata.operands:
427
+ if tm_name == 'types':
428
+ types = tm_value
429
+ types.operands = types.operands[1:]
430
+
355
431
  # Mark as a kernel for NVVM
356
432
 
357
433
  nvvm.set_cuda_kernel(kernel)
@@ -570,16 +646,16 @@ def compile_ptx_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
570
646
  abi=abi, abi_info=abi_info)
571
647
 
572
648
 
573
- def declare_device_function(name, restype, argtypes):
574
- return declare_device_function_template(name, restype, argtypes).key
649
+ def declare_device_function(name, restype, argtypes, link):
650
+ return declare_device_function_template(name, restype, argtypes, link).key
575
651
 
576
652
 
577
- def declare_device_function_template(name, restype, argtypes):
653
+ def declare_device_function_template(name, restype, argtypes, link):
578
654
  from .descriptor import cuda_target
579
655
  typingctx = cuda_target.typing_context
580
656
  targetctx = cuda_target.target_context
581
657
  sig = typing.signature(restype, *argtypes)
582
- extfn = ExternFunction(name, sig)
658
+ extfn = ExternFunction(name, sig, link)
583
659
 
584
660
  class device_function_template(ConcreteTemplate):
585
661
  key = extfn
@@ -593,7 +669,8 @@ def declare_device_function_template(name, restype, argtypes):
593
669
  return device_function_template
594
670
 
595
671
 
596
- class ExternFunction(object):
597
- def __init__(self, name, sig):
672
+ class ExternFunction:
673
+ def __init__(self, name, sig, link):
598
674
  self.name = name
599
675
  self.sig = sig
676
+ self.link = link
@@ -403,16 +403,20 @@ _genfp16_binary_operator(operator.itruediv)
403
403
 
404
404
 
405
405
  def _resolve_wrapped_unary(fname):
406
+ link = tuple()
406
407
  decl = declare_device_function_template(f'__numba_wrapper_{fname}',
407
408
  types.float16,
408
- (types.float16,))
409
+ (types.float16,),
410
+ link)
409
411
  return types.Function(decl)
410
412
 
411
413
 
412
414
  def _resolve_wrapped_binary(fname):
415
+ link = tuple()
413
416
  decl = declare_device_function_template(f'__numba_wrapper_{fname}',
414
417
  types.float16,
415
- (types.float16, types.float16,))
418
+ (types.float16, types.float16,),
419
+ link)
416
420
  return types.Function(decl)
417
421
 
418
422
 
@@ -2,8 +2,12 @@ from .mappings import FILE_EXTENSION_MAP
2
2
 
3
3
 
4
4
  class LinkableCode:
5
- """An object that can be passed in the `link` list argument to `@cuda.jit`
6
- kernels to supply code to be linked from memory."""
5
+ """An object that holds code to be linked from memory.
6
+
7
+ :param data: A buffer containing the data to link.
8
+ :param name: The name of the file to be referenced in any compilation or
9
+ linking errors that may be produced.
10
+ """
7
11
 
8
12
  def __init__(self, data, name=None):
9
13
  self.data = data
@@ -15,49 +19,49 @@ class LinkableCode:
15
19
 
16
20
 
17
21
  class PTXSource(LinkableCode):
18
- """PTX Source code in memory"""
22
+ """PTX source code in memory."""
19
23
 
20
24
  kind = FILE_EXTENSION_MAP["ptx"]
21
25
  default_name = "<unnamed-ptx>"
22
26
 
23
27
 
24
28
  class CUSource(LinkableCode):
25
- """CUDA C/C++ Source code in memory"""
29
+ """CUDA C/C++ source code in memory."""
26
30
 
27
31
  kind = "cu"
28
32
  default_name = "<unnamed-cu>"
29
33
 
30
34
 
31
35
  class Fatbin(LinkableCode):
32
- """A fatbin ELF in memory"""
36
+ """An ELF Fatbin in memory."""
33
37
 
34
38
  kind = FILE_EXTENSION_MAP["fatbin"]
35
39
  default_name = "<unnamed-fatbin>"
36
40
 
37
41
 
38
42
  class Cubin(LinkableCode):
39
- """A cubin ELF in memory"""
43
+ """An ELF Cubin in memory."""
40
44
 
41
45
  kind = FILE_EXTENSION_MAP["cubin"]
42
46
  default_name = "<unnamed-cubin>"
43
47
 
44
48
 
45
49
  class Archive(LinkableCode):
46
- """An archive of objects in memory"""
50
+ """An archive of objects in memory."""
47
51
 
48
52
  kind = FILE_EXTENSION_MAP["a"]
49
53
  default_name = "<unnamed-archive>"
50
54
 
51
55
 
52
56
  class Object(LinkableCode):
53
- """An object file in memory"""
57
+ """An object file in memory."""
54
58
 
55
59
  kind = FILE_EXTENSION_MAP["o"]
56
60
  default_name = "<unnamed-object>"
57
61
 
58
62
 
59
63
  class LTOIR(LinkableCode):
60
- """An LTOIR file in memory"""
64
+ """An LTOIR file in memory."""
61
65
 
62
66
  kind = "ltoir"
63
67
  default_name = "<unnamed-ltoir>"
@@ -314,7 +314,9 @@ COMPUTE_CAPABILITIES = (
314
314
  (6, 0), (6, 1), (6, 2),
315
315
  (7, 0), (7, 2), (7, 5),
316
316
  (8, 0), (8, 6), (8, 7), (8, 9),
317
- (9, 0)
317
+ (9, 0),
318
+ (10, 0), (10, 1),
319
+ (12, 0),
318
320
  )
319
321
 
320
322
  # Maps CTK version -> (min supported cc, max supported cc) inclusive
@@ -331,6 +333,9 @@ CTK_SUPPORTED = {
331
333
  (12, 2): ((5, 0), (9, 0)),
332
334
  (12, 3): ((5, 0), (9, 0)),
333
335
  (12, 4): ((5, 0), (9, 0)),
336
+ (12, 5): ((5, 0), (9, 0)),
337
+ (12, 6): ((5, 0), (9, 0)),
338
+ (12, 8): ((5, 0), (12, 0)),
334
339
  }
335
340
 
336
341
 
@@ -0,0 +1,44 @@
1
+ from llvmlite import ir
2
+ from numba.core import types
3
+ from numba.core.debuginfo import DIBuilder
4
+ from numba.cuda.types import GridGroup
5
+
6
+ _BYTE_SIZE = 8
7
+
8
+
9
+ class CUDADIBuilder(DIBuilder):
10
+
11
+ def _var_type(self, lltype, size, datamodel=None):
12
+ is_bool = False
13
+ is_grid_group = False
14
+
15
+ if isinstance(lltype, ir.IntType):
16
+ if datamodel is None:
17
+ if size == 1:
18
+ name = str(lltype)
19
+ is_bool = True
20
+ else:
21
+ name = str(datamodel.fe_type)
22
+ if isinstance(datamodel.fe_type, types.Boolean):
23
+ is_bool = True
24
+ elif isinstance(datamodel.fe_type, GridGroup):
25
+ is_grid_group = True
26
+
27
+ if is_bool or is_grid_group:
28
+ m = self.module
29
+ bitsize = _BYTE_SIZE * size
30
+ # Boolean type workaround until upstream Numba is fixed
31
+ if is_bool:
32
+ ditok = "DW_ATE_boolean"
33
+ # GridGroup type should use numba.cuda implementation
34
+ elif is_grid_group:
35
+ ditok = "DW_ATE_unsigned"
36
+
37
+ return m.add_debug_info('DIBasicType', {
38
+ 'name': name,
39
+ 'size': bitsize,
40
+ 'encoding': ir.DIToken(ditok),
41
+ })
42
+
43
+ # For other cases, use upstream Numba implementation
44
+ return super()._var_type(lltype, size, datamodel=datamodel)
@@ -173,7 +173,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
173
173
  return disp
174
174
 
175
175
 
176
- def declare_device(name, sig):
176
+ def declare_device(name, sig, link=None):
177
177
  """
178
178
  Declare the signature of a foreign function. Returns a descriptor that can
179
179
  be used to call the function from a Python kernel.
@@ -181,10 +181,17 @@ def declare_device(name, sig):
181
181
  :param name: The name of the foreign function.
182
182
  :type name: str
183
183
  :param sig: The Numba signature of the function.
184
+ :param link: External code to link when calling the function.
184
185
  """
186
+ if link is None:
187
+ link = tuple()
188
+ else:
189
+ if not isinstance(link, (list, tuple, set)):
190
+ link = (link,)
191
+
185
192
  argtypes, restype = sigutils.normalize_signature(sig)
186
193
  if restype is None:
187
194
  msg = 'Return type must be provided for device declarations'
188
195
  raise TypeError(msg)
189
196
 
190
- return declare_device_function(name, restype, argtypes)
197
+ return declare_device_function(name, restype, argtypes, link)
@@ -4,17 +4,19 @@ import re
4
4
  import sys
5
5
  import ctypes
6
6
  import functools
7
+ from collections import defaultdict
7
8
 
8
- from numba.core import config, serialize, sigutils, types, typing, utils
9
+ from numba.core import config, ir, serialize, sigutils, types, typing, utils
9
10
  from numba.core.caching import Cache, CacheImpl
10
11
  from numba.core.compiler_lock import global_compiler_lock
11
12
  from numba.core.dispatcher import Dispatcher
12
13
  from numba.core.errors import NumbaPerformanceWarning
13
14
  from numba.core.typing.typeof import Purpose, typeof
14
-
15
+ from numba.core.types.functions import Function
15
16
  from numba.cuda.api import get_current_device
16
17
  from numba.cuda.args import wrap_arg
17
- from numba.cuda.compiler import compile_cuda, CUDACompiler, kernel_fixup
18
+ from numba.cuda.compiler import (compile_cuda, CUDACompiler, kernel_fixup,
19
+ ExternFunction)
18
20
  from numba.cuda.cudadrv import driver
19
21
  from numba.cuda.cudadrv.devices import get_context
20
22
  from numba.cuda.descriptor import cuda_target
@@ -41,6 +43,55 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
41
43
  reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
42
44
 
43
45
 
46
+ def get_cres_link_objects(cres):
47
+ """Given a compile result, return a set of all linkable code objects that
48
+ are required for it to be fully linked."""
49
+
50
+ link_objects = set()
51
+
52
+ # List of calls into declared device functions
53
+ device_func_calls = [
54
+ (name, v) for name, v in cres.fndesc.typemap.items() if (
55
+ isinstance(v, cuda_types.CUDADispatcher)
56
+ )
57
+ ]
58
+
59
+ # List of tuples with SSA name of calls and corresponding signature
60
+ call_signatures = [
61
+ (call.func.name, sig)
62
+ for call, sig in cres.fndesc.calltypes.items() if (
63
+ isinstance(call, ir.Expr) and call.op == 'call'
64
+ )
65
+ ]
66
+
67
+ # Map SSA names to all invoked signatures
68
+ call_signature_d = defaultdict(list)
69
+ for name, sig in call_signatures:
70
+ call_signature_d[name].append(sig)
71
+
72
+ # Add the link objects from the current function's callees
73
+ for name, v in device_func_calls:
74
+ for sig in call_signature_d.get(name, []):
75
+ called_cres = v.dispatcher.overloads[sig.args]
76
+ called_link_objects = get_cres_link_objects(called_cres)
77
+ link_objects.update(called_link_objects)
78
+
79
+ # From this point onwards, we are only interested in ExternFunction
80
+ # declarations - these are the calls made directly in this function to
81
+ # them.
82
+ for name, v in cres.fndesc.typemap.items():
83
+ if not isinstance(v, Function):
84
+ continue
85
+
86
+ if not isinstance(v.typing_key, ExternFunction):
87
+ continue
88
+
89
+ for obj in v.typing_key.link:
90
+ link_objects.add(obj)
91
+
92
+ return link_objects
93
+
94
+
44
95
  class _Kernel(serialize.ReduceMixin):
45
96
  '''
46
97
  CUDA Kernel specialized for a given set of argument types. When called, this
@@ -158,6 +209,9 @@ class _Kernel(serialize.ReduceMixin):
158
209
 
159
210
  self.maybe_link_nrt(link, tgt_ctx, asm)
160
211
 
212
+ for obj in get_cres_link_objects(cres):
213
+ lib.add_linking_file(obj)
214
+
161
215
  for filepath in link:
162
216
  lib.add_linking_file(filepath)
163
217
 
@@ -256,7 +310,11 @@ class _Kernel(serialize.ReduceMixin):
256
310
  """
257
311
  cufunc = self._codelibrary.get_cufunc()
258
312
 
259
- if hasattr(self, "target_context") and self.target_context.enable_nrt:
313
+ if (
314
+ hasattr(self, "target_context")
315
+ and self.target_context.enable_nrt
316
+ and config.CUDA_NRT_STATS
317
+ ):
260
318
  rtsys.ensure_initialized()
261
319
  rtsys.set_memsys_to_module(cufunc.module)
262
320
  # We don't know which stream the kernel will be launched on, so
@@ -3,8 +3,7 @@ from functools import cached_property
3
3
  import llvmlite.binding as ll
4
4
  from llvmlite import ir
5
5
 
6
- from numba.core import (cgutils, config, debuginfo, itanium_mangler, types,
7
- typing, utils)
6
+ from numba.core import cgutils, config, itanium_mangler, types, typing
8
7
  from numba.core.dispatcher import Dispatcher
9
8
  from numba.core.base import BaseContext
10
9
  from numba.core.callconv import BaseCallConv, MinimalCallConv
@@ -12,7 +11,8 @@ from numba.core.typing import cmathdecl
12
11
  from numba.core import datamodel
13
12
 
14
13
  from .cudadrv import nvvm
15
- from numba.cuda import codegen, nvvmutils, ufuncs
14
+ from numba.cuda import codegen, ufuncs
15
+ from numba.cuda.debuginfo import CUDADIBuilder
16
16
  from numba.cuda.models import cuda_data_manager
17
17
 
18
18
  # -----------------------------------------------------------------------------
@@ -80,7 +80,7 @@ class CUDATargetContext(BaseContext):
80
80
 
81
81
  @property
82
82
  def DIBuilder(self):
83
- return debuginfo.DIBuilder
83
+ return CUDADIBuilder
84
84
 
85
85
  @property
86
86
  def enable_boundscheck(self):
@@ -150,136 +150,6 @@ class CUDATargetContext(BaseContext):
150
150
  return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
151
151
  uid=uid)
152
152
 
153
- def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
154
- nvvm_options, filename, linenum,
155
- max_registers=None, lto=False):
156
- """
157
- Adapt a code library ``codelib`` with the numba compiled CUDA kernel
158
- with name ``fname`` and arguments ``argtypes`` for NVVM.
159
- A new library is created with a wrapper function that can be used as
160
- the kernel entry point for the given kernel.
161
-
162
- Returns the new code library and the wrapper function.
163
-
164
- Parameters:
165
-
166
- codelib: The CodeLibrary containing the device function to wrap
167
- in a kernel call.
168
- fndesc: The FunctionDescriptor of the source function.
169
- debug: Whether to compile with debug.
170
- lineinfo: Whether to emit line info.
171
- nvvm_options: Dict of NVVM options used when compiling the new library.
172
- filename: The source filename that the function is contained in.
173
- linenum: The source line that the function is on.
174
- max_registers: The max_registers argument for the code library.
175
- """
176
- kernel_name = itanium_mangler.prepend_namespace(
177
- fndesc.llvm_func_name, ns='cudapy',
178
- )
179
- library = self.codegen().create_library(f'{codelib.name}_kernel_',
180
- entry_name=kernel_name,
181
- nvvm_options=nvvm_options,
182
- max_registers=max_registers,
183
- lto=lto
184
- )
185
- library.add_linking_library(codelib)
186
- wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
187
- debug, lineinfo, filename,
188
- linenum)
189
- return library, wrapper
190
-
191
- def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
192
- lineinfo, filename, linenum):
193
- """
194
- Generate the kernel wrapper in the given ``library``.
195
- The function being wrapped is described by ``fndesc``.
196
- The wrapper function is returned.
197
- """
198
-
199
- argtypes = fndesc.argtypes
200
- arginfo = self.get_arg_packer(argtypes)
201
- argtys = list(arginfo.argument_types)
202
- wrapfnty = ir.FunctionType(ir.VoidType(), argtys)
203
- wrapper_module = self.create_module("cuda.kernel.wrapper")
204
- fnty = ir.FunctionType(ir.IntType(32),
205
- [self.call_conv.get_return_type(types.pyobject)]
206
- + argtys)
207
- func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)
208
-
209
- prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
210
- wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
211
- builder = ir.IRBuilder(wrapfn.append_basic_block(''))
212
-
213
- if debug or lineinfo:
214
- directives_only = lineinfo and not debug
215
- debuginfo = self.DIBuilder(module=wrapper_module,
216
- filepath=filename,
217
- cgctx=self,
218
- directives_only=directives_only)
219
- debuginfo.mark_subprogram(
220
- wrapfn, kernel_name, fndesc.args, argtypes, linenum,
221
- )
222
- debuginfo.mark_location(builder, linenum)
223
-
224
- # Define error handling variable
225
- def define_error_gv(postfix):
226
- name = wrapfn.name + postfix
227
- gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32),
228
- name)
229
- gv.initializer = ir.Constant(gv.type.pointee, None)
230
- return gv
231
-
232
- gv_exc = define_error_gv("__errcode__")
233
- gv_tid = []
234
- gv_ctaid = []
235
- for i in 'xyz':
236
- gv_tid.append(define_error_gv("__tid%s__" % i))
237
- gv_ctaid.append(define_error_gv("__ctaid%s__" % i))
238
-
239
- callargs = arginfo.from_arguments(builder, wrapfn.args)
240
- status, _ = self.call_conv.call_function(
241
- builder, func, types.void, argtypes, callargs)
242
-
243
- if debug:
244
- # Check error status
245
- with cgutils.if_likely(builder, status.is_ok):
246
- builder.ret_void()
247
-
248
- with builder.if_then(builder.not_(status.is_python_exc)):
249
- # User exception raised
250
- old = ir.Constant(gv_exc.type.pointee, None)
251
-
252
- # Use atomic cmpxchg to prevent rewriting the error status
253
- # Only the first error is recorded
254
-
255
- xchg = builder.cmpxchg(gv_exc, old, status.code,
256
- 'monotonic', 'monotonic')
257
- changed = builder.extract_value(xchg, 1)
258
-
259
- # If the xchange is successful, save the thread ID.
260
- sreg = nvvmutils.SRegBuilder(builder)
261
- with builder.if_then(changed):
262
- for dim, ptr, in zip("xyz", gv_tid):
263
- val = sreg.tid(dim)
264
- builder.store(val, ptr)
265
-
266
- for dim, ptr, in zip("xyz", gv_ctaid):
267
- val = sreg.ctaid(dim)
268
- builder.store(val, ptr)
269
-
270
- builder.ret_void()
271
-
272
- nvvm.set_cuda_kernel(wrapfn)
273
- library.add_ir_module(wrapper_module)
274
- if debug or lineinfo:
275
- debuginfo.finalize()
276
- library.finalize()
277
-
278
- if config.DUMP_LLVM:
279
- utils.dump_llvm(fndesc, wrapper_module)
280
-
281
- return library.get_function(wrapfn.name)
282
-
283
153
  def make_constant_array(self, builder, aryty, arr):
284
154
  """
285
155
  Unlike the parent version. This returns a a pointer in the constant