numba-cuda 0.0.18__tar.gz → 0.0.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/PKG-INFO +1 -1
  2. numba_cuda-0.0.19/numba_cuda/VERSION +1 -0
  3. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cuda_paths.py +68 -0
  4. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -1
  5. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/libs.py +38 -0
  6. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +9 -4
  7. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/dispatcher.py +46 -6
  8. numba_cuda-0.0.19/numba_cuda/numba/cuda/runtime/nrt.cu +190 -0
  9. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/api.py +14 -0
  10. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/target.py +4 -0
  11. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +2 -2
  12. numba_cuda-0.0.19/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +48 -0
  13. numba_cuda-0.0.19/numba_cuda/numba/cuda/tests/nrt/__init__.py +8 -0
  14. numba_cuda-0.0.19/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +42 -0
  15. numba_cuda-0.0.19/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +110 -0
  16. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +8 -1
  17. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda.egg-info/PKG-INFO +1 -1
  18. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda.egg-info/SOURCES.txt +5 -0
  19. numba_cuda-0.0.18/numba_cuda/VERSION +0 -1
  20. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/LICENSE +0 -0
  21. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/README.md +0 -0
  22. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/__init__.py +0 -0
  23. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/_version.py +0 -0
  24. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/__init__.py +0 -0
  25. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/api.py +0 -0
  26. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/api_util.py +0 -0
  27. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/args.py +0 -0
  28. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cg.py +0 -0
  29. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/codegen.py +0 -0
  30. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/compiler.py +0 -0
  31. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
  32. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
  33. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
  34. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadecl.py +0 -0
  35. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
  36. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
  37. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/driver.py +0 -0
  38. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
  39. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
  40. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
  41. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
  42. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +0 -0
  43. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
  44. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
  45. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
  46. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
  47. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
  48. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
  49. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/cudamath.py +0 -0
  50. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/decorators.py +0 -0
  51. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/descriptor.py +0 -0
  52. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/device_init.py +0 -0
  53. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
  54. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/errors.py +0 -0
  55. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/extending.py +0 -0
  56. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/initialize.py +0 -0
  57. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
  58. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/intrinsics.py +0 -0
  59. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  60. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
  61. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
  62. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/libdevice.py +0 -0
  63. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
  64. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
  65. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
  66. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/mathimpl.py +0 -0
  67. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/models.py +0 -0
  68. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
  69. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/printimpl.py +0 -0
  70. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/random.py +0 -0
  71. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
  72. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
  73. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
  74. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
  75. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
  76. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
  77. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
  78. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
  79. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
  80. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
  81. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
  82. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
  83. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
  84. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
  85. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
  86. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
  87. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/simulator_init.py +0 -0
  88. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/stubs.py +0 -0
  89. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/testing.py +0 -0
  90. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
  91. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
  92. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
  93. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
  94. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
  95. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
  96. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
  97. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
  98. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
  99. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
  100. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
  101. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
  102. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
  103. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
  104. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
  105. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
  106. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
  107. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
  108. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
  109. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
  110. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
  111. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
  112. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
  113. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
  114. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
  115. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
  116. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
  117. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
  118. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
  119. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
  120. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
  121. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
  122. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
  123. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
  124. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
  125. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
  126. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
  127. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
  128. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
  129. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
  130. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
  131. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
  132. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
  133. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
  134. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
  135. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
  136. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
  137. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
  138. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
  139. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
  140. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
  141. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
  142. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
  143. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
  144. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
  145. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
  146. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
  147. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -0
  148. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
  149. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
  150. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
  151. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
  152. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
  153. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
  154. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
  155. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
  156. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
  157. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
  158. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
  159. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
  160. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
  161. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
  162. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
  163. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
  164. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
  165. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
  166. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
  167. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
  168. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
  169. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
  170. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
  171. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
  172. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
  173. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
  174. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
  175. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
  176. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
  177. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
  178. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
  179. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
  180. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
  181. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
  182. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
  183. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
  184. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
  185. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
  186. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
  187. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
  188. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
  189. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
  190. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
  191. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
  192. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
  193. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
  194. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
  195. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
  196. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
  197. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
  198. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
  199. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
  200. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
  201. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
  202. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
  203. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
  204. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
  205. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
  206. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
  207. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
  208. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
  209. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
  210. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  211. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
  212. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
  213. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
  214. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
  215. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
  216. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
  217. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  218. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
  219. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
  220. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
  221. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
  222. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
  223. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
  224. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
  225. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
  226. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
  227. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
  228. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
  229. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
  230. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
  231. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
  232. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
  233. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
  234. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
  235. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
  236. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
  237. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
  238. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
  239. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/types.py +0 -0
  240. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/ufuncs.py +0 -0
  241. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/vector_types.py +0 -0
  242. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda/numba/cuda/vectorizers.py +0 -0
  243. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda.egg-info/dependency_links.txt +0 -0
  244. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda.egg-info/requires.txt +0 -0
  245. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/numba_cuda.egg-info/top_level.txt +0 -0
  246. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/pyproject.toml +0 -0
  247. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/setup.cfg +0 -0
  248. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/setup.py +0 -0
  249. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/site-packages/_numba_cuda_redirector.pth +0 -0
  250. {numba_cuda-0.0.18 → numba_cuda-0.0.19}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numba-cuda
3
- Version: 0.0.18
3
+ Version: 0.0.19
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -0,0 +1 @@
1
+ 0.0.19
@@ -2,9 +2,11 @@ import sys
2
2
  import re
3
3
  import os
4
4
  from collections import namedtuple
5
+ import platform
5
6
 
6
7
  from numba.core.config import IS_WIN32
7
8
  from numba.misc.findlib import find_lib, find_file
9
+ from numba import config
8
10
 
9
11
 
10
12
  _env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info'])
@@ -241,6 +243,7 @@ def get_cuda_paths():
241
243
  'libdevice': _get_libdevice_paths(),
242
244
  'cudalib_dir': _get_cudalib_dir(),
243
245
  'static_cudalib_dir': _get_static_cudalib_dir(),
246
+ 'include_dir': _get_include_dir(),
244
247
  }
245
248
  # Cache result
246
249
  get_cuda_paths._cached_result = d
@@ -256,3 +259,68 @@ def get_debian_pkg_libdevice():
256
259
  if not os.path.exists(pkg_libdevice_location):
257
260
  return None
258
261
  return pkg_libdevice_location
262
+
263
+
264
+ def get_current_cuda_target_name():
265
+ """Determine conda's CTK target folder based on system and machine arch.
266
+
267
+ CTK's conda package delivers headers based on its architecture type. For example,
268
+ `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and
269
+ `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the
270
+ nuances at cudart's conda feedstock:
271
+ https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501
272
+ """
273
+ system = platform.system()
274
+ machine = platform.machine()
275
+
276
+ if system == "Linux":
277
+ arch_to_targets = {
278
+ 'x86_64': 'x86_64-linux',
279
+ 'aarch64': 'sbsa-linux'
280
+ }
281
+ elif system == "Windows":
282
+ arch_to_targets = {
283
+ 'AMD64': 'x64',
284
+ }
285
+ else:
286
+ arch_to_targets = {}
287
+
288
+ return arch_to_targets.get(machine, None)
289
+
290
+
291
+ def get_conda_include_dir():
292
+ """
293
+ Return the include directory in the current conda environment, if one
294
+ is active and it exists.
295
+ """
296
+ is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
297
+ if not is_conda_env:
298
+ return
299
+
300
+ if platform.system() == "Windows":
301
+ include_dir = os.path.join(
302
+ sys.prefix, 'Library', 'include'
303
+ )
304
+ elif target_name := get_current_cuda_target_name():
305
+ include_dir = os.path.join(
306
+ sys.prefix, 'targets', target_name, 'include'
307
+ )
308
+ else:
309
+ # A fallback when target cannot determined
310
+ # though usually it shouldn't.
311
+ include_dir = os.path.join(sys.prefix, 'include')
312
+
313
+ if os.path.exists(include_dir):
314
+ return include_dir
315
+ return
316
+
317
+
318
+ def _get_include_dir():
319
+ """Find the root include directory."""
320
+ options = [
321
+ ('Conda environment (NVIDIA package)', get_conda_include_dir()),
322
+ ('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH),
323
+ # TODO: add others
324
+ ]
325
+ by, include_dir = _find_valid_path(options)
326
+ return _env_path_tuple(by, include_dir)
@@ -876,7 +876,10 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
876
876
  sentry_contiguous(obj)
877
877
  devobj = from_array_like(obj, stream=stream)
878
878
  if copy:
879
- if config.CUDA_WARN_ON_IMPLICIT_COPY:
879
+ if (
880
+ config.CUDA_WARN_ON_IMPLICIT_COPY
881
+ and not config.DISABLE_PERFORMANCE_WARNINGS
882
+ ):
880
883
  if (
881
884
  not user_explicit and
882
885
  (not isinstance(obj, DeviceNDArray)
@@ -18,6 +18,7 @@ from numba.misc.findlib import find_lib
18
18
  from numba.cuda.cuda_paths import get_cuda_paths
19
19
  from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
20
20
  from numba.cuda.cudadrv.error import CudaSupportError
21
+ from numba.core import config
21
22
 
22
23
 
23
24
  if sys.platform == 'win32':
@@ -60,6 +61,24 @@ def get_cudalib(lib, static=False):
60
61
  return max(candidates) if candidates else namepattern % lib
61
62
 
62
63
 
64
+ def get_cuda_include_dir():
65
+ """
66
+ Find the path to cuda include dir based on a list of default locations.
67
+ Note that this does not list the `CUDA_INCLUDE_PATH` entry in user
68
+ configuration.
69
+ """
70
+
71
+ return get_cuda_paths()['include_dir'].info
72
+
73
+
74
+ def check_cuda_include_dir(path):
75
+ if path is None or not os.path.exists(path):
76
+ raise FileNotFoundError(f"{path} not found")
77
+
78
+ if not os.path.exists(os.path.join(path, "cuda_runtime.h")):
79
+ raise FileNotFoundError(f"Unable to find cuda_runtime.h from {path}")
80
+
81
+
63
82
  def open_cudalib(lib):
64
83
  path = get_cudalib(lib)
65
84
  return ctypes.CDLL(path)
@@ -75,6 +94,8 @@ def _get_source_variable(lib, static=False):
75
94
  return get_cuda_paths()['nvvm'].by
76
95
  elif lib == 'libdevice':
77
96
  return get_cuda_paths()['libdevice'].by
97
+ elif lib == 'include_dir':
98
+ return get_cuda_paths()['include_dir'].by
78
99
  else:
79
100
  dir_type = 'static_cudalib_dir' if static else 'cudalib_dir'
80
101
  return get_cuda_paths()[dir_type].by
@@ -173,4 +194,21 @@ def test():
173
194
  print('\tERROR: failed to find %s:\n%s' % (lib, e))
174
195
  failed = True
175
196
 
197
+ # Check cuda include paths
198
+
199
+ print("Include directory configuration variable:")
200
+ print(f"\tCUDA_INCLUDE_PATH={config.CUDA_INCLUDE_PATH}")
201
+
202
+ where = _get_source_variable('include_dir')
203
+ print(f'Finding include directory from {where}')
204
+ include = get_cuda_include_dir()
205
+ print('\tLocated at', include)
206
+ try:
207
+ print('\tChecking include directory', end='...')
208
+ check_cuda_include_dir(include)
209
+ print('\tok')
210
+ except FileNotFoundError as e:
211
+ print('\tERROR: failed to find cuda include directory:\n%s' % e)
212
+ failed = True
213
+
176
214
  return not failed
@@ -1,9 +1,8 @@
1
1
  from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
2
2
  from enum import IntEnum
3
- from numba.core import config
4
3
  from numba.cuda.cudadrv.error import (NvrtcError, NvrtcCompilationError,
5
4
  NvrtcSupportError)
6
-
5
+ from numba.cuda.cuda_paths import get_cuda_paths
7
6
  import functools
8
7
  import os
9
8
  import threading
@@ -233,12 +232,18 @@ def compile(src, name, cc):
233
232
  # being optimized away.
234
233
  major, minor = cc
235
234
  arch = f'--gpu-architecture=compute_{major}{minor}'
236
- include = f'-I{config.CUDA_INCLUDE_PATH}'
235
+
236
+ cuda_include = [
237
+ f"-I{get_cuda_paths()['include_dir'].info}",
238
+ ]
237
239
 
238
240
  cudadrv_path = os.path.dirname(os.path.abspath(__file__))
239
241
  numba_cuda_path = os.path.dirname(cudadrv_path)
240
242
  numba_include = f'-I{numba_cuda_path}'
241
- options = [arch, include, numba_include, '-rdc', 'true']
243
+ options = [arch, *cuda_include, numba_include, '-rdc', 'true']
244
+
245
+ if nvrtc.get_version() < (12, 0):
246
+ options += ["-std=c++17"]
242
247
 
243
248
  # Compile the program
244
249
  compile_error = nvrtc.compile_program(program, options)
@@ -1,5 +1,6 @@
1
1
  import numpy as np
2
2
  import os
3
+ import re
3
4
  import sys
4
5
  import ctypes
5
6
  import functools
@@ -43,6 +44,21 @@ class _Kernel(serialize.ReduceMixin):
43
44
  object launches the kernel on the device.
44
45
  '''
45
46
 
47
+ NRT_functions = [
48
+ "NRT_Allocate",
49
+ "NRT_MemInfo_init",
50
+ "NRT_MemInfo_new",
51
+ "NRT_Free",
52
+ "NRT_dealloc",
53
+ "NRT_MemInfo_destroy",
54
+ "NRT_MemInfo_call_dtor",
55
+ "NRT_MemInfo_data_fast",
56
+ "NRT_MemInfo_alloc_aligned",
57
+ "NRT_Allocate_External",
58
+ "NRT_decref",
59
+ "NRT_incref"
60
+ ]
61
+
46
62
  @global_compiler_lock
47
63
  def __init__(self, py_func, argtypes, link=None, debug=False,
48
64
  lineinfo=False, inline=False, fastmath=False, extensions=None,
@@ -105,16 +121,20 @@ class _Kernel(serialize.ReduceMixin):
105
121
  if self.cooperative:
106
122
  lib.needs_cudadevrt = True
107
123
 
124
+ basedir = os.path.dirname(os.path.abspath(__file__))
125
+ asm = lib.get_asm_str()
126
+
108
127
  res = [fn for fn in cuda_fp16_math_funcs
109
- if (f'__numba_wrapper_{fn}' in lib.get_asm_str())]
128
+ if (f'__numba_wrapper_{fn}' in asm)]
110
129
 
111
130
  if res:
112
131
  # Path to the source containing the foreign function
113
- basedir = os.path.dirname(os.path.abspath(__file__))
114
132
  functions_cu_path = os.path.join(basedir,
115
133
  'cpp_function_wrappers.cu')
116
134
  link.append(functions_cu_path)
117
135
 
136
+ link = self.maybe_link_nrt(link, tgt_ctx, asm)
137
+
118
138
  for filepath in link:
119
139
  lib.add_linking_file(filepath)
120
140
 
@@ -136,6 +156,25 @@ class _Kernel(serialize.ReduceMixin):
136
156
  self.lifted = []
137
157
  self.reload_init = []
138
158
 
159
+ def maybe_link_nrt(self, link, tgt_ctx, asm):
160
+ if not tgt_ctx.enable_nrt:
161
+ return link
162
+
163
+ all_nrt = "|".join(self.NRT_functions)
164
+ pattern = (
165
+ r'\.extern\s+\.func\s+(?:\s*\(.+\)\s*)?('
166
+ + all_nrt + r')\s*\([^)]*\)\s*;'
167
+ )
168
+
169
+ nrt_in_asm = re.findall(pattern, asm)
170
+
171
+ basedir = os.path.dirname(os.path.abspath(__file__))
172
+ if nrt_in_asm:
173
+ nrt_path = os.path.join(basedir, 'runtime', 'nrt.cu')
174
+ link.append(nrt_path)
175
+
176
+ return link
177
+
139
178
  @property
140
179
  def library(self):
141
180
  return self._codelibrary
@@ -385,7 +424,6 @@ class _Kernel(serialize.ReduceMixin):
385
424
 
386
425
  if isinstance(ty, types.Array):
387
426
  devary = wrap_arg(val).to_device(retr, stream)
388
-
389
427
  c_intp = ctypes.c_ssize_t
390
428
 
391
429
  meminfo = ctypes.c_void_p(0)
@@ -519,7 +557,10 @@ class _LaunchConfiguration:
519
557
  self.stream = stream
520
558
  self.sharedmem = sharedmem
521
559
 
522
- if config.CUDA_LOW_OCCUPANCY_WARNINGS:
560
+ if (
561
+ config.CUDA_LOW_OCCUPANCY_WARNINGS
562
+ and not config.DISABLE_PERFORMANCE_WARNINGS
563
+ ):
523
564
  # Warn when the grid has fewer than 128 blocks. This number is
524
565
  # chosen somewhat heuristically - ideally the minimum is 2 times
525
566
  # the number of SMs, but the number of SMs varies between devices -
@@ -708,8 +749,7 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
708
749
  *args*.
709
750
  '''
710
751
  cc = get_current_device().compute_capability
711
- argtypes = tuple(
712
- [self.typingctx.resolve_argument_type(a) for a in args])
752
+ argtypes = tuple(self.typeof_pyval(a) for a in args)
713
753
  if self.specialized:
714
754
  raise RuntimeError('Dispatcher already specialized')
715
755
 
@@ -0,0 +1,190 @@
1
+ #ifndef _NRT_H
2
+ #define _NRT_H
3
+
4
+ #include <cuda/atomic>
5
+
6
+ typedef void (*NRT_dtor_function)(void* ptr, size_t size, void* info);
7
+ typedef void (*NRT_dealloc_func)(void* ptr, void* dealloc_info);
8
+
9
+ typedef struct MemInfo NRT_MemInfo;
10
+
11
+ extern "C" {
12
+ struct MemInfo {
13
+ cuda::atomic<size_t, cuda::thread_scope_device> refct;
14
+ NRT_dtor_function dtor;
15
+ void* dtor_info;
16
+ void* data;
17
+ size_t size;
18
+ };
19
+ }
20
+
21
+ // Globally needed variables
22
+ struct NRT_MemSys {
23
+ struct {
24
+ bool enabled;
25
+ cuda::atomic<size_t, cuda::thread_scope_device> alloc;
26
+ cuda::atomic<size_t, cuda::thread_scope_device> free;
27
+ cuda::atomic<size_t, cuda::thread_scope_device> mi_alloc;
28
+ cuda::atomic<size_t, cuda::thread_scope_device> mi_free;
29
+ } stats;
30
+ };
31
+
32
+ static __device__ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo **mi);
33
+ static __device__ void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out);
34
+ extern "C" __device__ void* NRT_Allocate_External(size_t size);
35
+
36
+ /* The Memory System object */
37
+ __device__ NRT_MemSys* TheMSys;
38
+
39
+ extern "C" __device__ void* NRT_Allocate(size_t size)
40
+ {
41
+ void* ptr = NULL;
42
+ ptr = malloc(size);
43
+ // if (TheMSys->stats.enabled) { TheMSys->stats.alloc++; }
44
+ return ptr;
45
+ }
46
+
47
+ extern "C" __device__ void NRT_MemInfo_init(NRT_MemInfo* mi,
48
+ void* data,
49
+ size_t size,
50
+ NRT_dtor_function dtor,
51
+ void* dtor_info)
52
+ // NRT_MemSys* TheMSys)
53
+ {
54
+ mi->refct = 1; /* starts with 1 refct */
55
+ mi->dtor = dtor;
56
+ mi->dtor_info = dtor_info;
57
+ mi->data = data;
58
+ mi->size = size;
59
+ // if (TheMSys->stats.enabled) { TheMSys->stats.mi_alloc++; }
60
+ }
61
+
62
+ extern "C"
63
+ __device__ NRT_MemInfo* NRT_MemInfo_new(
64
+ void* data, size_t size, NRT_dtor_function dtor, void* dtor_info)
65
+ {
66
+ NRT_MemInfo* mi = (NRT_MemInfo*)NRT_Allocate(sizeof(NRT_MemInfo));
67
+ if (mi != NULL) { NRT_MemInfo_init(mi, data, size, dtor, dtor_info); }
68
+ return mi;
69
+ }
70
+
71
+ extern "C" __device__ void NRT_Free(void* ptr)
72
+ {
73
+ free(ptr);
74
+ //if (TheMSys->stats.enabled) { TheMSys->stats.free++; }
75
+ }
76
+
77
+ extern "C" __device__ void NRT_dealloc(NRT_MemInfo* mi)
78
+ {
79
+ NRT_Free(mi);
80
+ }
81
+
82
+ extern "C" __device__ void NRT_MemInfo_destroy(NRT_MemInfo* mi)
83
+ {
84
+ NRT_dealloc(mi);
85
+ //if (TheMSys->stats.enabled) { TheMSys->stats.mi_free++; }
86
+ }
87
+ extern "C" __device__ void NRT_MemInfo_call_dtor(NRT_MemInfo* mi)
88
+ {
89
+ if (mi->dtor) /* We have a destructor */
90
+ mi->dtor(mi->data, mi->size, NULL);
91
+ /* Clear and release MemInfo */
92
+ NRT_MemInfo_destroy(mi);
93
+ }
94
+
95
+ extern "C" __device__ void* NRT_MemInfo_data_fast(NRT_MemInfo *mi)
96
+ {
97
+ return mi->data;
98
+ }
99
+
100
+ extern "C" __device__ NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align) {
101
+ NRT_MemInfo *mi = NULL;
102
+ void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi);
103
+ if (data == NULL) {
104
+ return NULL; /* return early as allocation failed */
105
+ }
106
+ //NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_aligned %p\n", data));
107
+ NRT_MemInfo_init(mi, data, size, NULL, NULL);
108
+ return mi;
109
+ }
110
+
111
+ static
112
+ __device__ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align,
113
+ NRT_MemInfo **mi)
114
+ {
115
+ size_t offset = 0, intptr = 0, remainder = 0;
116
+ //NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data_align %p\n", allocator));
117
+ char *base = (char *)nrt_allocate_meminfo_and_data(size + 2 * align, mi);
118
+ if (base == NULL) {
119
+ return NULL; /* return early as allocation failed */
120
+ }
121
+ intptr = (size_t) base;
122
+ /*
123
+ * See if the allocation is aligned already...
124
+ * Check if align is a power of 2, if so the modulo can be avoided.
125
+ */
126
+ if((align & (align - 1)) == 0)
127
+ {
128
+ remainder = intptr & (align - 1);
129
+ }
130
+ else
131
+ {
132
+ remainder = intptr % align;
133
+ }
134
+ if (remainder == 0){ /* Yes */
135
+ offset = 0;
136
+ } else { /* No, move forward `offset` bytes */
137
+ offset = align - remainder;
138
+ }
139
+ return (void*)((char *)base + offset);
140
+ }
141
+
142
+ static
143
+ __device__ void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out) {
144
+ NRT_MemInfo *mi = NULL;
145
+ //NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data %p\n", allocator));
146
+ char *base = (char *)NRT_Allocate_External(sizeof(NRT_MemInfo) + size);
147
+ if (base == NULL) {
148
+ *mi_out = NULL; /* set meminfo to NULL as allocation failed */
149
+ return NULL; /* return early as allocation failed */
150
+ }
151
+ mi = (NRT_MemInfo *) base;
152
+ *mi_out = mi;
153
+ return (void*)((char *)base + sizeof(NRT_MemInfo));
154
+ }
155
+
156
+ extern "C" __device__ void* NRT_Allocate_External(size_t size) {
157
+ void *ptr = NULL;
158
+ ptr = malloc(size);
159
+ //NRT_Debug(nrt_debug_print("NRT_Allocate_External bytes=%zu ptr=%p\n", size, ptr));
160
+
161
+ //if (TheMSys.stats.enabled)
162
+ //{
163
+ // TheMSys.stats.alloc++;
164
+ //}
165
+ return ptr;
166
+ }
167
+
168
+
169
+ /*
170
+ c++ version of the NRT_decref function that usually is added to
171
+ the final kernel link in PTX form by numba. This version may be
172
+ used by c++ APIs that accept ownership of live objects and must
173
+ manage them going forward.
174
+ */
175
+ extern "C" __device__ void NRT_decref(NRT_MemInfo* mi)
176
+ {
177
+ if (mi != NULL) {
178
+ mi->refct--;
179
+ if (mi->refct == 0) { NRT_MemInfo_call_dtor(mi); }
180
+ }
181
+ }
182
+
183
+ #endif
184
+
185
+ extern "C" __device__ void NRT_incref(NRT_MemInfo* mi)
186
+ {
187
+ if (mi != NULL) {
188
+ mi->refct++;
189
+ }
190
+ }
@@ -35,6 +35,20 @@ class stream(object):
35
35
  pass
36
36
 
37
37
 
38
+ # Default stream APIs. Since execution from the perspective of the host is
39
+ # synchronous in the simulator, these can be the same as the stream class.
40
+ default_stream = stream
41
+ legacy_default_stream = stream
42
+ per_thread_default_stream = stream
43
+
44
+
45
+ # There is no way to use external streams with the simulator. Since the
46
+ # implementation is not really using streams, we can't meaningfully interact
47
+ # with external ones.
48
+ def external_stream(ptr):
49
+ raise RuntimeError("External streams are unsupported in the simulator")
50
+
51
+
38
52
  def synchronize():
39
53
  pass
40
54
 
@@ -74,6 +74,10 @@ class CUDATargetContext(BaseContext):
74
74
  datamodel.default_manager
75
75
  )
76
76
 
77
+ @property
78
+ def enable_nrt(self):
79
+ return getattr(config, 'CUDA_ENABLE_NRT', False)
80
+
77
81
  @property
78
82
  def DIBuilder(self):
79
83
  return debuginfo.DIBuilder
@@ -126,8 +126,8 @@ class TestPrint(CUDATestCase):
126
126
 
127
127
  def test_bool(self):
128
128
  output, _ = self.run_code(printbool_usecase)
129
- expected = "True\nFalse\nTrue\nTrue\nFalse\nFalse"
130
- self.assertEqual(output.strip(), expected)
129
+ expected = "True\r?\nFalse\r?\nTrue\r?\nTrue\r?\nFalse\r?\nFalse"
130
+ self.assertRegex(output.strip(), expected)
131
131
 
132
132
  def test_printempty(self):
133
133
  output, _ = self.run_code(printempty_usecase)
@@ -0,0 +1,48 @@
1
+ from numba.cuda.testing import (skip_on_cudasim, skip_unless_cudasim, unittest,
2
+ CUDATestCase)
3
+ from numba import cuda
4
+
5
+ # Basic tests that stream APIs execute on the hardware and in the simulator.
6
+ #
7
+ # Correctness of semantics is exercised elsewhere in the test suite (though we
8
+ # could improve the comprehensiveness of testing by adding more correctness
9
+ # tests here in future).
10
+
11
+
12
+ class TestStreamAPI(CUDATestCase):
13
+ def test_stream_create_and_sync(self):
14
+ s = cuda.stream()
15
+ s.synchronize()
16
+
17
+ def test_default_stream_create_and_sync(self):
18
+ s = cuda.default_stream()
19
+ s.synchronize()
20
+
21
+ def test_legacy_default_stream_create_and_sync(self):
22
+ s = cuda.legacy_default_stream()
23
+ s.synchronize()
24
+
25
+ def test_ptd_stream_create_and_sync(self):
26
+ s = cuda.per_thread_default_stream()
27
+ s.synchronize()
28
+
29
+ @skip_on_cudasim("External streams are unsupported on the simulator")
30
+ def test_external_stream_create(self):
31
+ # A dummy pointer value
32
+ ptr = 0x12345678
33
+ s = cuda.external_stream(ptr)
34
+ # We don't test synchronization on the stream because it's not a real
35
+ # stream - we used a dummy pointer for testing the API, so we just
36
+ # ensure that the stream handle matches the external stream pointer.
37
+ self.assertEqual(ptr, s.handle.value)
38
+
39
+ @skip_unless_cudasim("External streams are usable with hardware")
40
+ def test_external_stream_simulator_unavailable(self):
41
+ ptr = 0x12345678
42
+ msg = "External streams are unsupported in the simulator"
43
+ with self.assertRaisesRegex(RuntimeError, msg):
44
+ cuda.external_stream(ptr)
45
+
46
+
47
+ if __name__ == '__main__':
48
+ unittest.main()
@@ -0,0 +1,8 @@
1
+ from numba.cuda.testing import ensure_supported_ccs_initialized
2
+ from numba.cuda.tests import load_testsuite
3
+ import os
4
+
5
+
6
+ def load_tests(loader, tests, pattern):
7
+ ensure_supported_ccs_initialized()
8
+ return load_testsuite(loader, os.path.dirname(__file__))
@@ -0,0 +1,42 @@
1
+
2
+ from numba.core import errors, types
3
+ from numba.core.extending import overload
4
+ from numba.np.arrayobj import (_check_const_str_dtype, is_nonelike,
5
+ ty_parse_dtype, ty_parse_shape, numpy_empty_nd)
6
+
7
+
8
+ # Typical tests for allocation use array construction (e.g. np.zeros, np.empty,
9
+ # etc.) to induce allocations. These don't work in the CUDA target because they
10
+ # need keyword arguments, which are presently not supported properly in the
11
+ # CUDA target.
12
+ #
13
+ # To work around this, we can define our own function, that works like
14
+ # the desired one, except that it uses only positional arguments.
15
+ #
16
+ # Once the CUDA target supports keyword arguments, this workaround will no
17
+ # longer be necessary and the tests in this module should be switched to use
18
+ # the relevant NumPy functions instead.
19
+ def cuda_empty(shape, dtype):
20
+ pass
21
+
22
+
23
+ @overload(cuda_empty)
24
+ def ol_cuda_empty(shape, dtype):
25
+ _check_const_str_dtype("empty", dtype)
26
+ if (dtype is float or
27
+ (isinstance(dtype, types.Function) and dtype.typing_key is float) or
28
+ is_nonelike(dtype)): #default
29
+ nb_dtype = types.double
30
+ else:
31
+ nb_dtype = ty_parse_dtype(dtype)
32
+
33
+ ndim = ty_parse_shape(shape)
34
+ if nb_dtype is not None and ndim is not None:
35
+ retty = types.Array(dtype=nb_dtype, ndim=ndim, layout='C')
36
+
37
+ def impl(shape, dtype):
38
+ return numpy_empty_nd(shape, dtype, retty)
39
+ return impl
40
+ else:
41
+ msg = f"Cannot parse input types to function np.empty({shape}, {dtype})"
42
+ raise errors.TypingError(msg)