numba-cuda 0.0.16__tar.gz → 0.0.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/PKG-INFO +1 -1
  2. numba_cuda-0.0.18/numba_cuda/VERSION +1 -0
  3. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/codegen.py +15 -3
  4. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/driver.py +209 -47
  5. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
  6. numba_cuda-0.0.18/numba_cuda/numba/cuda/cudadrv/linkable_code.py +63 -0
  7. numba_cuda-0.0.18/numba_cuda/numba/cuda/cudadrv/mappings.py +24 -0
  8. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/device_init.py +3 -0
  9. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/dispatcher.py +2 -2
  10. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/intrinsics.py +6 -1
  11. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/printimpl.py +11 -0
  12. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/target.py +4 -2
  13. numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +199 -0
  14. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +44 -4
  15. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  16. numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +51 -0
  17. numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +163 -0
  18. numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +19 -0
  19. numba_cuda-0.0.18/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +3 -0
  20. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/PKG-INFO +1 -1
  21. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/SOURCES.txt +7 -0
  22. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/pyproject.toml +1 -1
  23. numba_cuda-0.0.16/numba_cuda/VERSION +0 -1
  24. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/LICENSE +0 -0
  25. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/README.md +0 -0
  26. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/__init__.py +0 -0
  27. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/_version.py +0 -0
  28. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/__init__.py +0 -0
  29. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/api.py +0 -0
  30. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/api_util.py +0 -0
  31. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/args.py +0 -0
  32. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cg.py +0 -0
  33. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/compiler.py +0 -0
  34. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
  35. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
  36. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
  37. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
  38. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadecl.py +0 -0
  39. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
  40. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
  41. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
  42. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
  43. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
  44. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
  45. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
  46. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
  47. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
  48. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
  49. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
  50. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
  51. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
  52. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/cudamath.py +0 -0
  53. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/decorators.py +0 -0
  54. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/descriptor.py +0 -0
  55. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
  56. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/errors.py +0 -0
  57. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/extending.py +0 -0
  58. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/initialize.py +0 -0
  59. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
  60. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  61. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
  62. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
  63. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/libdevice.py +0 -0
  64. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
  65. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
  66. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
  67. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/mathimpl.py +0 -0
  68. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/models.py +0 -0
  69. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
  70. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/random.py +0 -0
  71. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
  72. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/api.py +0 -0
  73. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
  74. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
  75. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
  76. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
  77. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
  78. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
  79. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
  80. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
  81. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
  82. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
  83. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
  84. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
  85. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
  86. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
  87. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
  88. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/simulator_init.py +0 -0
  89. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/stubs.py +0 -0
  90. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/testing.py +0 -0
  91. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
  92. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
  93. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
  94. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
  95. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
  96. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
  97. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
  98. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
  99. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
  100. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
  101. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
  102. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
  103. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
  104. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
  105. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
  106. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
  107. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
  108. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
  109. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
  110. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
  111. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
  112. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
  113. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
  114. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
  115. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
  116. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
  117. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
  118. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
  119. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
  120. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
  121. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
  122. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
  123. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
  124. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
  125. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
  126. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
  127. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
  128. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
  129. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
  130. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
  131. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
  132. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
  133. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
  134. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
  135. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
  136. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
  137. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
  138. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
  139. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
  140. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
  141. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
  142. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
  143. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
  144. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
  145. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
  146. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
  147. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -0
  148. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
  149. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
  150. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
  151. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
  152. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
  153. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
  154. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
  155. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
  156. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
  157. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
  158. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
  159. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
  160. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
  161. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
  162. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
  163. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
  164. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
  165. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
  166. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
  167. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
  168. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
  169. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
  170. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
  171. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
  172. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
  173. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
  174. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
  175. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
  176. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
  177. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
  178. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
  179. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
  180. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
  181. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
  182. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
  183. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
  184. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
  185. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
  186. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
  187. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
  188. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
  189. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
  190. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
  191. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
  192. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
  193. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
  194. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
  195. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
  196. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
  197. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
  198. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
  199. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
  200. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
  201. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
  202. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
  203. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
  204. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
  205. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
  206. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
  207. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
  208. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
  209. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  210. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
  211. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
  212. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
  213. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
  214. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
  215. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
  216. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  217. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
  218. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
  219. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
  220. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
  221. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
  222. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
  223. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
  224. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
  225. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
  226. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
  227. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
  228. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
  229. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
  230. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
  231. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
  232. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
  233. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
  234. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
  235. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/types.py +0 -0
  236. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/ufuncs.py +0 -0
  237. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/vector_types.py +0 -0
  238. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda/numba/cuda/vectorizers.py +0 -0
  239. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/dependency_links.txt +0 -0
  240. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/requires.txt +0 -0
  241. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/numba_cuda.egg-info/top_level.txt +0 -0
  242. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/setup.cfg +0 -0
  243. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/setup.py +0 -0
  244. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/site-packages/_numba_cuda_redirector.pth +0 -0
  245. {numba_cuda-0.0.16 → numba_cuda-0.0.18}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numba-cuda
3
- Version: 0.0.16
3
+ Version: 0.0.18
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -0,0 +1 @@
1
+ 0.0.18
@@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
59
59
  get_cufunc), which may be of different compute capabilities.
60
60
  """
61
61
 
62
- def __init__(self, codegen, name, entry_name=None, max_registers=None,
63
- nvvm_options=None):
62
+ def __init__(
63
+ self,
64
+ codegen,
65
+ name,
66
+ entry_name=None,
67
+ max_registers=None,
68
+ lto=False,
69
+ nvvm_options=None
70
+ ):
64
71
  """
65
72
  codegen:
66
73
  Codegen object.
@@ -71,6 +78,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
71
78
  kernel and not a device function.
72
79
  max_registers:
73
80
  The maximum register usage to aim for when linking.
81
+ lto:
82
+ Whether to enable link-time optimization.
74
83
  nvvm_options:
75
84
  Dict of options to pass to NVVM.
76
85
  """
@@ -103,6 +112,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
103
112
  self._cufunc_cache = {}
104
113
 
105
114
  self._max_registers = max_registers
115
+ self._lto = lto
106
116
  if nvvm_options is None:
107
117
  nvvm_options = {}
108
118
  self._nvvm_options = nvvm_options
@@ -178,7 +188,9 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
178
188
  if cubin:
179
189
  return cubin
180
190
 
181
- linker = driver.Linker.new(max_registers=self._max_registers, cc=cc)
191
+ linker = driver.Linker.new(
192
+ max_registers=self._max_registers, cc=cc, lto=self._lto
193
+ )
182
194
 
183
195
  if linker.lto:
184
196
  ltoir = self.get_ltoir(cc=cc)
@@ -10,7 +10,6 @@ subsequent deallocation could further corrupt the CUDA context and causes the
10
10
  system to freeze in some cases.
11
11
 
12
12
  """
13
-
14
13
  import sys
15
14
  import os
16
15
  import ctypes
@@ -19,6 +18,7 @@ import functools
19
18
  import warnings
20
19
  import logging
21
20
  import threading
21
+ import traceback
22
22
  import asyncio
23
23
  import pathlib
24
24
  from itertools import product
@@ -35,6 +35,8 @@ from numba.core import utils, serialize, config
35
35
  from .error import CudaSupportError, CudaDriverError
36
36
  from .drvapi import API_PROTOTYPES
37
37
  from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
38
+ from .mappings import FILE_EXTENSION_MAP
39
+ from .linkable_code import LinkableCode
38
40
  from numba.cuda.cudadrv import enums, drvapi, nvrtc
39
41
 
40
42
  USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
@@ -56,6 +58,52 @@ _py_decref.argtypes = [ctypes.py_object]
56
58
  _py_incref.argtypes = [ctypes.py_object]
57
59
 
58
60
 
61
+ def _readenv(name, ctor, default):
62
+ value = os.environ.get(name)
63
+ if value is None:
64
+ return default() if callable(default) else default
65
+ try:
66
+ if ctor is bool:
67
+ return value.lower() in {'1', "true"}
68
+ return ctor(value)
69
+ except Exception:
70
+ warnings.warn(
71
+ f"Environment variable '{name}' is defined but its associated "
72
+ f"value '{value}' could not be parsed.\n"
73
+ "The parse failed with exception:\n"
74
+ f"{traceback.format_exc()}",
75
+ RuntimeWarning
76
+ )
77
+ return default
78
+
79
+
80
+ _MVC_ERROR_MESSAGE = (
81
+ "Minor version compatibility requires ptxcompiler and cubinlinker packages "
82
+ "to be available"
83
+ )
84
+
85
+ ENABLE_PYNVJITLINK = (
86
+ _readenv("NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False)
87
+ or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
88
+ )
89
+ if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
90
+ config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
91
+
92
+ if ENABLE_PYNVJITLINK:
93
+ try:
94
+ from pynvjitlink.api import NvJitLinker, NvJitLinkError
95
+ except ImportError:
96
+ raise ImportError(
97
+ "Using pynvjitlink requires the pynvjitlink package to be available"
98
+ )
99
+
100
+ if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
101
+ raise ValueError(
102
+ "Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
103
+ "CUDA_ENABLE_PYNVJITLINK at the same time"
104
+ )
105
+
106
+
59
107
  def make_logger():
60
108
  logger = logging.getLogger(__name__)
61
109
  # is logging configured?
@@ -432,7 +480,7 @@ class Driver(object):
432
480
 
433
481
  def get_version(self):
434
482
  """
435
- Returns the CUDA Runtime version as a tuple (major, minor).
483
+ Returns the CUDA Driver version as a tuple (major, minor).
436
484
  """
437
485
  if USE_NV_BINDING:
438
486
  version = driver.cuDriverGetVersion()
@@ -2546,38 +2594,47 @@ def launch_kernel(cufunc_handle,
2546
2594
  extra)
2547
2595
 
2548
2596
 
2549
- if USE_NV_BINDING:
2550
- jitty = binding.CUjitInputType
2551
- FILE_EXTENSION_MAP = {
2552
- 'o': jitty.CU_JIT_INPUT_OBJECT,
2553
- 'ptx': jitty.CU_JIT_INPUT_PTX,
2554
- 'a': jitty.CU_JIT_INPUT_LIBRARY,
2555
- 'lib': jitty.CU_JIT_INPUT_LIBRARY,
2556
- 'cubin': jitty.CU_JIT_INPUT_CUBIN,
2557
- 'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
2558
- }
2559
- else:
2560
- FILE_EXTENSION_MAP = {
2561
- 'o': enums.CU_JIT_INPUT_OBJECT,
2562
- 'ptx': enums.CU_JIT_INPUT_PTX,
2563
- 'a': enums.CU_JIT_INPUT_LIBRARY,
2564
- 'lib': enums.CU_JIT_INPUT_LIBRARY,
2565
- 'cubin': enums.CU_JIT_INPUT_CUBIN,
2566
- 'fatbin': enums.CU_JIT_INPUT_FATBINARY,
2567
- }
2568
-
2569
-
2570
2597
  class Linker(metaclass=ABCMeta):
2571
2598
  """Abstract base class for linkers"""
2572
2599
 
2573
2600
  @classmethod
2574
- def new(cls, max_registers=0, lineinfo=False, cc=None):
2575
- if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2576
- return MVCLinker(max_registers, lineinfo, cc)
2577
- elif USE_NV_BINDING:
2578
- return CudaPythonLinker(max_registers, lineinfo, cc)
2601
+ def new(cls,
2602
+ max_registers=0,
2603
+ lineinfo=False,
2604
+ cc=None,
2605
+ lto=None,
2606
+ additional_flags=None
2607
+ ):
2608
+
2609
+ driver_ver = driver.get_version()
2610
+ if (
2611
+ config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
2612
+ and driver_ver >= (12, 0)
2613
+ ):
2614
+ raise ValueError(
2615
+ "Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC"
2616
+ )
2617
+ if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
2618
+ raise ValueError(
2619
+ "Enabling pynvjitlink requires CUDA 12."
2620
+ )
2621
+ if config.CUDA_ENABLE_PYNVJITLINK:
2622
+ linker = PyNvJitLinker
2623
+
2624
+ elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2625
+ linker = MVCLinker
2626
+ else:
2627
+ if USE_NV_BINDING:
2628
+ linker = CudaPythonLinker
2629
+ else:
2630
+ linker = CtypesLinker
2631
+
2632
+ if linker is PyNvJitLinker:
2633
+ return linker(max_registers, lineinfo, cc, lto, additional_flags)
2634
+ elif additional_flags or lto:
2635
+ raise ValueError("LTO and additional flags require PyNvJitLinker")
2579
2636
  else:
2580
- return CtypesLinker(max_registers, lineinfo, cc)
2637
+ return linker(max_registers, lineinfo, cc)
2581
2638
 
2582
2639
  @abstractmethod
2583
2640
  def __init__(self, max_registers, lineinfo, cc):
@@ -2626,19 +2683,42 @@ class Linker(metaclass=ABCMeta):
2626
2683
  cu = f.read()
2627
2684
  self.add_cu(cu, os.path.basename(path))
2628
2685
 
2629
- def add_file_guess_ext(self, path):
2630
- """Add a file to the link, guessing its type from its extension."""
2631
- ext = os.path.splitext(path)[1][1:]
2632
- if ext == '':
2633
- raise RuntimeError("Don't know how to link file with no extension")
2634
- elif ext == 'cu':
2635
- self.add_cu_file(path)
2686
+ def add_file_guess_ext(self, path_or_code):
2687
+ """
2688
+ Add a file or LinkableCode object to the link. If a file is
2689
+ passed, the type will be inferred from the extension. A LinkableCode
2690
+ object represents a file already in memory.
2691
+ """
2692
+ if isinstance(path_or_code, str):
2693
+ ext = pathlib.Path(path_or_code).suffix
2694
+ if ext == '':
2695
+ raise RuntimeError(
2696
+ "Don't know how to link file with no extension"
2697
+ )
2698
+ elif ext == '.cu':
2699
+ self.add_cu_file(path_or_code)
2700
+ else:
2701
+ kind = FILE_EXTENSION_MAP.get(ext.lstrip('.'), None)
2702
+ if kind is None:
2703
+ raise RuntimeError(
2704
+ "Don't know how to link file with extension "
2705
+ f"{ext}"
2706
+ )
2707
+ self.add_file(path_or_code, kind)
2708
+ return
2636
2709
  else:
2637
- kind = FILE_EXTENSION_MAP.get(ext, None)
2638
- if kind is None:
2639
- raise RuntimeError("Don't know how to link file with extension "
2640
- f".{ext}")
2641
- self.add_file(path, kind)
2710
+ # Otherwise, we should have been given a LinkableCode object
2711
+ if not isinstance(path_or_code, LinkableCode):
2712
+ raise TypeError(
2713
+ "Expected path to file or a LinkableCode object"
2714
+ )
2715
+
2716
+ if path_or_code.kind == "cu":
2717
+ self.add_cu(path_or_code.data, path_or_code.name)
2718
+ else:
2719
+ self.add_data(
2720
+ path_or_code.data, path_or_code.kind, path_or_code.name
2721
+ )
2642
2722
 
2643
2723
  @abstractmethod
2644
2724
  def complete(self):
@@ -2649,12 +2729,6 @@ class Linker(metaclass=ABCMeta):
2649
2729
  """
2650
2730
 
2651
2731
 
2652
- _MVC_ERROR_MESSAGE = (
2653
- "Minor version compatibility requires ptxcompiler and cubinlinker packages "
2654
- "to be available"
2655
- )
2656
-
2657
-
2658
2732
  class MVCLinker(Linker):
2659
2733
  """
2660
2734
  Linker supporting Minor Version Compatibility, backed by the cubinlinker
@@ -2930,6 +3004,94 @@ class CudaPythonLinker(Linker):
2930
3004
  return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
2931
3005
 
2932
3006
 
3007
+ class PyNvJitLinker(Linker):
3008
+ def __init__(
3009
+ self,
3010
+ max_registers=None,
3011
+ lineinfo=False,
3012
+ cc=None,
3013
+ lto=False,
3014
+ additional_flags=None,
3015
+ ):
3016
+
3017
+ if cc is None:
3018
+ raise RuntimeError("PyNvJitLinker requires CC to be specified")
3019
+ if not any(isinstance(cc, t) for t in [list, tuple]):
3020
+ raise TypeError("`cc` must be a list or tuple of length 2")
3021
+
3022
+ sm_ver = f"{cc[0] * 10 + cc[1]}"
3023
+ arch = f"-arch=sm_{sm_ver}"
3024
+ options = [arch]
3025
+ if max_registers:
3026
+ options.append(f"-maxrregcount={max_registers}")
3027
+ if lineinfo:
3028
+ options.append("-lineinfo")
3029
+ if lto:
3030
+ options.append("-lto")
3031
+ if additional_flags is not None:
3032
+ options.extend(additional_flags)
3033
+
3034
+ self._linker = NvJitLinker(*options)
3035
+ self.lto = lto
3036
+ self.options = options
3037
+
3038
+ @property
3039
+ def info_log(self):
3040
+ return self._linker.info_log
3041
+
3042
+ @property
3043
+ def error_log(self):
3044
+ return self._linker.error_log
3045
+
3046
+ def add_ptx(self, ptx, name="<cudapy-ptx>"):
3047
+ self._linker.add_ptx(ptx, name)
3048
+
3049
+ def add_fatbin(self, fatbin, name="<external-fatbin>"):
3050
+ self._linker.add_fatbin(fatbin, name)
3051
+
3052
+ def add_ltoir(self, ltoir, name="<external-ltoir>"):
3053
+ self._linker.add_ltoir(ltoir, name)
3054
+
3055
+ def add_object(self, obj, name="<external-object>"):
3056
+ self._linker.add_object(obj, name)
3057
+
3058
+ def add_file(self, path, kind):
3059
+ try:
3060
+ with open(path, "rb") as f:
3061
+ data = f.read()
3062
+ except FileNotFoundError:
3063
+ raise LinkerError(f"{path} not found")
3064
+
3065
+ name = pathlib.Path(path).name
3066
+ self.add_data(data, kind, name)
3067
+
3068
+ def add_data(self, data, kind, name):
3069
+ if kind == FILE_EXTENSION_MAP["cubin"]:
3070
+ fn = self._linker.add_cubin
3071
+ elif kind == FILE_EXTENSION_MAP["fatbin"]:
3072
+ fn = self._linker.add_fatbin
3073
+ elif kind == FILE_EXTENSION_MAP["a"]:
3074
+ fn = self._linker.add_library
3075
+ elif kind == FILE_EXTENSION_MAP["ptx"]:
3076
+ return self.add_ptx(data, name)
3077
+ elif kind == FILE_EXTENSION_MAP["o"]:
3078
+ fn = self._linker.add_object
3079
+ elif kind == FILE_EXTENSION_MAP["ltoir"]:
3080
+ fn = self._linker.add_ltoir
3081
+ else:
3082
+ raise LinkerError(f"Don't know how to link {kind}")
3083
+
3084
+ try:
3085
+ fn(data, name)
3086
+ except NvJitLinkError as e:
3087
+ raise LinkerError from e
3088
+
3089
+ def complete(self):
3090
+ try:
3091
+ return self._linker.get_linked_cubin()
3092
+ except NvJitLinkError as e:
3093
+ raise LinkerError from e
3094
+
2933
3095
  # -----------------------------------------------------------------------------
2934
3096
 
2935
3097
 
@@ -309,6 +309,9 @@ CU_JIT_INPUT_OBJECT = 3
309
309
  # Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
310
310
  CU_JIT_INPUT_LIBRARY = 4
311
311
 
312
+ # LTO IR
313
+ CU_JIT_INPUT_NVVM = 5
314
+
312
315
  CU_JIT_NUM_INPUT_TYPES = 6
313
316
 
314
317
 
@@ -0,0 +1,63 @@
1
+ from .mappings import FILE_EXTENSION_MAP
2
+
3
+
4
+ class LinkableCode:
5
+ """An object that can be passed in the `link` list argument to `@cuda.jit`
6
+ kernels to supply code to be linked from memory."""
7
+
8
+ def __init__(self, data, name=None):
9
+ self.data = data
10
+ self._name = name
11
+
12
+ @property
13
+ def name(self):
14
+ return self._name or self.default_name
15
+
16
+
17
+ class PTXSource(LinkableCode):
18
+ """PTX Source code in memory"""
19
+
20
+ kind = FILE_EXTENSION_MAP["ptx"]
21
+ default_name = "<unnamed-ptx>"
22
+
23
+
24
+ class CUSource(LinkableCode):
25
+ """CUDA C/C++ Source code in memory"""
26
+
27
+ kind = "cu"
28
+ default_name = "<unnamed-cu>"
29
+
30
+
31
+ class Fatbin(LinkableCode):
32
+ """A fatbin ELF in memory"""
33
+
34
+ kind = FILE_EXTENSION_MAP["fatbin"]
35
+ default_name = "<unnamed-fatbin>"
36
+
37
+
38
+ class Cubin(LinkableCode):
39
+ """A cubin ELF in memory"""
40
+
41
+ kind = FILE_EXTENSION_MAP["cubin"]
42
+ default_name = "<unnamed-cubin>"
43
+
44
+
45
+ class Archive(LinkableCode):
46
+ """An archive of objects in memory"""
47
+
48
+ kind = FILE_EXTENSION_MAP["a"]
49
+ default_name = "<unnamed-archive>"
50
+
51
+
52
+ class Object(LinkableCode):
53
+ """An object file in memory"""
54
+
55
+ kind = FILE_EXTENSION_MAP["o"]
56
+ default_name = "<unnamed-object>"
57
+
58
+
59
+ class LTOIR(LinkableCode):
60
+ """An LTOIR file in memory"""
61
+
62
+ kind = "ltoir"
63
+ default_name = "<unnamed-ltoir>"
@@ -0,0 +1,24 @@
1
+ from numba import config
2
+ from . import enums
3
+ if config.CUDA_USE_NVIDIA_BINDING:
4
+ from cuda import cuda
5
+ jitty = cuda.CUjitInputType
6
+ FILE_EXTENSION_MAP = {
7
+ 'o': jitty.CU_JIT_INPUT_OBJECT,
8
+ 'ptx': jitty.CU_JIT_INPUT_PTX,
9
+ 'a': jitty.CU_JIT_INPUT_LIBRARY,
10
+ 'lib': jitty.CU_JIT_INPUT_LIBRARY,
11
+ 'cubin': jitty.CU_JIT_INPUT_CUBIN,
12
+ 'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
13
+ 'ltoir': jitty.CU_JIT_INPUT_NVVM,
14
+ }
15
+ else:
16
+ FILE_EXTENSION_MAP = {
17
+ 'o': enums.CU_JIT_INPUT_OBJECT,
18
+ 'ptx': enums.CU_JIT_INPUT_PTX,
19
+ 'a': enums.CU_JIT_INPUT_LIBRARY,
20
+ 'lib': enums.CU_JIT_INPUT_LIBRARY,
21
+ 'cubin': enums.CU_JIT_INPUT_CUBIN,
22
+ 'fatbin': enums.CU_JIT_INPUT_FATBINARY,
23
+ 'ltoir': enums.CU_JIT_INPUT_NVVM,
24
+ }
@@ -31,6 +31,9 @@ from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
31
31
  shfl_xor_sync)
32
32
 
33
33
  from .kernels import reduction
34
+ from numba.cuda.cudadrv.linkable_code import (
35
+ Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
36
+ )
34
37
 
35
38
  reduce = Reduce = reduction.Reduce
36
39
 
@@ -46,7 +46,7 @@ class _Kernel(serialize.ReduceMixin):
46
46
  @global_compiler_lock
47
47
  def __init__(self, py_func, argtypes, link=None, debug=False,
48
48
  lineinfo=False, inline=False, fastmath=False, extensions=None,
49
- max_registers=None, opt=True, device=False):
49
+ max_registers=None, lto=False, opt=True, device=False):
50
50
 
51
51
  if device:
52
52
  raise RuntimeError('Cannot compile a device function as a kernel')
@@ -94,7 +94,7 @@ class _Kernel(serialize.ReduceMixin):
94
94
  lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc,
95
95
  debug, lineinfo, nvvm_options,
96
96
  filename, linenum,
97
- max_registers)
97
+ max_registers, lto)
98
98
 
99
99
  if not link:
100
100
  link = []
@@ -4,7 +4,7 @@ from numba import cuda, types
4
4
  from numba.core import cgutils
5
5
  from numba.core.errors import RequireLiteralValue
6
6
  from numba.core.typing import signature
7
- from numba.core.extending import overload_attribute
7
+ from numba.core.extending import overload_attribute, overload_method
8
8
  from numba.cuda import nvvmutils
9
9
  from numba.cuda.extending import intrinsic
10
10
 
@@ -196,3 +196,8 @@ def syncthreads_or(typingctx, predicate):
196
196
  '''
197
197
  fname = 'llvm.nvvm.barrier0.or'
198
198
  return _syncthreads_predicate(typingctx, predicate, fname)
199
+
200
+
201
+ @overload_method(types.Integer, 'bit_count', target='cuda')
202
+ def integer_bit_count(i):
203
+ return lambda i: cuda.popc(i)
@@ -63,6 +63,17 @@ def dim3_print_impl(ty, context, builder, val):
63
63
  return rawfmt, [x, y, z]
64
64
 
65
65
 
66
+ @print_item.register(types.Boolean)
67
+ def bool_print_impl(ty, context, builder, val):
68
+ true_string = context.insert_string_const_addrspace(builder, "True")
69
+ false_string = context.insert_string_const_addrspace(builder, "False")
70
+ res_ptr = cgutils.alloca_once_value(builder, false_string)
71
+ with builder.if_then(val):
72
+ builder.store(true_string, res_ptr)
73
+ rawfmt = "%s"
74
+ return rawfmt, [builder.load(res_ptr)]
75
+
76
+
66
77
  @lower(print, types.VarArg(types.Any))
67
78
  def print_varargs(context, builder, sig, args):
68
79
  """This function is a generic 'print' wrapper for arbitrary types.
@@ -148,7 +148,7 @@ class CUDATargetContext(BaseContext):
148
148
 
149
149
  def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
150
150
  nvvm_options, filename, linenum,
151
- max_registers=None):
151
+ max_registers=None, lto=False):
152
152
  """
153
153
  Adapt a code library ``codelib`` with the numba compiled CUDA kernel
154
154
  with name ``fname`` and arguments ``argtypes`` for NVVM.
@@ -175,7 +175,9 @@ class CUDATargetContext(BaseContext):
175
175
  library = self.codegen().create_library(f'{codelib.name}_kernel_',
176
176
  entry_name=kernel_name,
177
177
  nvvm_options=nvvm_options,
178
- max_registers=max_registers)
178
+ max_registers=max_registers,
179
+ lto=lto
180
+ )
179
181
  library.add_linking_library(codelib)
180
182
  wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
181
183
  debug, lineinfo, filename,