numba-cuda 0.3.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/PKG-INFO +21 -3
  2. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/README.md +19 -1
  3. numba_cuda-0.5.0/numba_cuda/VERSION +1 -0
  4. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/compiler.py +7 -6
  5. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadecl.py +6 -2
  6. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -1
  7. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/driver.py +1 -20
  8. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +13 -9
  9. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +5 -1
  10. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -1
  11. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/decorators.py +9 -2
  12. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/dispatcher.py +22 -3
  13. numba_cuda-0.5.0/numba_cuda/numba/cuda/runtime/__init__.py +1 -0
  14. numba_cuda-0.5.0/numba_cuda/numba/cuda/runtime/memsys.cu +94 -0
  15. numba_cuda-0.5.0/numba_cuda/numba/cuda/runtime/memsys.cuh +17 -0
  16. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/runtime/nrt.cu +19 -22
  17. numba_cuda-0.5.0/numba_cuda/numba/cuda/runtime/nrt.py +318 -0
  18. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/testing.py +11 -1
  19. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/__init__.py +1 -0
  20. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +31 -0
  21. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +145 -11
  22. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +10 -7
  23. numba_cuda-0.5.0/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +146 -0
  24. numba_cuda-0.5.0/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +232 -0
  25. numba_cuda-0.5.0/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +114 -0
  26. numba_cuda-0.5.0/numba_cuda/numba/cuda/tests/support.py +11 -0
  27. numba_cuda-0.5.0/numba_cuda/numba/cuda/utils.py +22 -0
  28. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda.egg-info/PKG-INFO +21 -3
  29. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda.egg-info/SOURCES.txt +7 -0
  30. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/pyproject.toml +1 -1
  31. numba_cuda-0.3.0/numba_cuda/VERSION +0 -1
  32. numba_cuda-0.3.0/numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +0 -42
  33. numba_cuda-0.3.0/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +0 -110
  34. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/LICENSE +0 -0
  35. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/__init__.py +0 -0
  36. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/_version.py +0 -0
  37. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/__init__.py +0 -0
  38. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/api.py +0 -0
  39. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/api_util.py +0 -0
  40. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/args.py +0 -0
  41. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cg.py +0 -0
  42. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/codegen.py +0 -0
  43. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
  44. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cuda_fp16.h +0 -0
  45. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cuda_fp16.hpp +0 -0
  46. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
  47. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
  48. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
  49. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
  50. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
  51. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
  52. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
  53. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
  54. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
  55. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
  56. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
  57. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
  58. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudaimpl.py +0 -0
  59. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/cudamath.py +0 -0
  60. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/descriptor.py +0 -0
  61. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/device_init.py +0 -0
  62. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
  63. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/errors.py +0 -0
  64. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/extending.py +0 -0
  65. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/initialize.py +0 -0
  66. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
  67. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/intrinsics.py +0 -0
  68. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  69. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
  70. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
  71. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/libdevice.py +0 -0
  72. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
  73. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
  74. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
  75. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/mathimpl.py +0 -0
  76. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/models.py +0 -0
  77. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
  78. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/printimpl.py +0 -0
  79. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/random.py +0 -0
  80. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
  81. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
  82. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/api.py +0 -0
  83. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
  84. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
  85. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
  86. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
  87. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
  88. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
  89. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
  90. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
  91. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
  92. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
  93. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
  94. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
  95. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
  96. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
  97. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
  98. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/simulator_init.py +0 -0
  99. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/stubs.py +0 -0
  100. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/target.py +0 -0
  101. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
  102. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
  103. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
  104. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
  105. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
  106. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
  107. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
  108. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
  109. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
  110. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
  111. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
  112. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
  113. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
  114. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
  115. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
  116. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
  117. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
  118. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
  119. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
  120. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
  121. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
  122. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
  123. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
  124. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
  125. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
  126. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
  127. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
  128. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
  129. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
  130. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
  131. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
  132. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
  133. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
  134. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
  135. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
  136. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
  137. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
  138. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
  139. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
  140. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
  141. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
  142. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
  143. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
  144. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
  145. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
  146. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
  147. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
  148. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
  149. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
  150. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
  151. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
  152. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
  153. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
  154. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
  155. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
  156. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -0
  157. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
  158. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
  159. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
  160. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
  161. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -0
  162. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
  163. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
  164. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
  165. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
  166. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
  167. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
  168. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
  169. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
  170. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
  171. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
  172. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
  173. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
  174. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
  175. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
  176. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
  177. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
  178. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
  179. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
  180. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
  181. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
  182. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
  183. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
  184. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
  185. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
  186. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
  187. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
  188. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
  189. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
  190. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
  191. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
  192. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
  193. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
  194. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
  195. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
  196. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
  197. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
  198. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
  199. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
  200. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
  201. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
  202. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
  203. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
  204. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
  205. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
  206. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
  207. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
  208. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
  209. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
  210. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
  211. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
  212. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
  213. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
  214. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
  215. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
  216. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
  217. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
  218. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
  219. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
  220. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  221. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
  222. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
  223. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
  224. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
  225. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
  226. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
  227. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  228. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
  229. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
  230. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
  231. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
  232. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
  233. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
  234. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
  235. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
  236. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
  237. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
  238. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
  239. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
  240. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
  241. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
  242. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
  243. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
  244. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
  245. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
  246. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +0 -0
  247. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +0 -0
  248. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +0 -0
  249. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
  250. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/types.py +0 -0
  251. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/ufuncs.py +0 -0
  252. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/vector_types.py +0 -0
  253. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda/numba/cuda/vectorizers.py +0 -0
  254. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda.egg-info/dependency_links.txt +0 -0
  255. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda.egg-info/requires.txt +0 -0
  256. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/numba_cuda.egg-info/top_level.txt +0 -0
  257. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/setup.cfg +0 -0
  258. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/setup.py +0 -0
  259. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/site-packages/_numba_cuda_redirector.pth +0 -0
  260. {numba_cuda-0.3.0 → numba_cuda-0.5.0}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: numba-cuda
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -27,7 +27,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
27
27
  To raise questions or initiate discussions, please use the [Numba Discourse
28
28
  forum](https://numba.discourse.group).
29
29
 
30
- ## Building from source
30
+ ## Installation with pip
31
+
32
+ ```shell
33
+ pip install numba-cuda
34
+ ```
35
+
36
+ ## Installation with Conda
37
+
38
+ ```shell
39
+ conda install -c conda-forge numba-cuda
40
+ ```
41
+
42
+ ## Installation from source
31
43
 
32
44
  Install as an editable install:
33
45
 
@@ -53,3 +65,9 @@ which will show a path like:
53
65
  ```
54
66
  <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
55
67
  ```
68
+
69
+ ## Contributing Guide
70
+
71
+ Review the
72
+ [CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
73
+ file for information on how to contribute code and issues to the project.
@@ -12,7 +12,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
12
12
  To raise questions or initiate discussions, please use the [Numba Discourse
13
13
  forum](https://numba.discourse.group).
14
14
 
15
- ## Building from source
15
+ ## Installation with pip
16
+
17
+ ```shell
18
+ pip install numba-cuda
19
+ ```
20
+
21
+ ## Installation with Conda
22
+
23
+ ```shell
24
+ conda install -c conda-forge numba-cuda
25
+ ```
26
+
27
+ ## Installation from source
16
28
 
17
29
  Install as an editable install:
18
30
 
@@ -38,3 +50,9 @@ which will show a path like:
38
50
  ```
39
51
  <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
40
52
  ```
53
+
54
+ ## Contributing Guide
55
+
56
+ Review the
57
+ [CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
58
+ file for information on how to contribute code and issues to the project.
@@ -0,0 +1 @@
1
+ 0.5.0
@@ -570,16 +570,16 @@ def compile_ptx_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
570
570
  abi=abi, abi_info=abi_info)
571
571
 
572
572
 
573
- def declare_device_function(name, restype, argtypes):
574
- return declare_device_function_template(name, restype, argtypes).key
573
+ def declare_device_function(name, restype, argtypes, link):
574
+ return declare_device_function_template(name, restype, argtypes, link).key
575
575
 
576
576
 
577
- def declare_device_function_template(name, restype, argtypes):
577
+ def declare_device_function_template(name, restype, argtypes, link):
578
578
  from .descriptor import cuda_target
579
579
  typingctx = cuda_target.typing_context
580
580
  targetctx = cuda_target.target_context
581
581
  sig = typing.signature(restype, *argtypes)
582
- extfn = ExternFunction(name, sig)
582
+ extfn = ExternFunction(name, sig, link)
583
583
 
584
584
  class device_function_template(ConcreteTemplate):
585
585
  key = extfn
@@ -593,7 +593,8 @@ def declare_device_function_template(name, restype, argtypes):
593
593
  return device_function_template
594
594
 
595
595
 
596
- class ExternFunction(object):
597
- def __init__(self, name, sig):
596
+ class ExternFunction:
597
+ def __init__(self, name, sig, link):
598
598
  self.name = name
599
599
  self.sig = sig
600
+ self.link = link
@@ -403,16 +403,20 @@ _genfp16_binary_operator(operator.itruediv)
403
403
 
404
404
 
405
405
  def _resolve_wrapped_unary(fname):
406
+ link = tuple()
406
407
  decl = declare_device_function_template(f'__numba_wrapper_{fname}',
407
408
  types.float16,
408
- (types.float16,))
409
+ (types.float16,),
410
+ link)
409
411
  return types.Function(decl)
410
412
 
411
413
 
412
414
  def _resolve_wrapped_binary(fname):
415
+ link = tuple()
413
416
  decl = declare_device_function_template(f'__numba_wrapper_{fname}',
414
417
  types.float16,
415
- (types.float16, types.float16,))
418
+ (types.float16, types.float16,),
419
+ link)
416
420
  return types.Function(decl)
417
421
 
418
422
 
@@ -570,10 +570,13 @@ class DeviceNDArray(DeviceNDArrayBase):
570
570
  '''
571
571
  return self._dummy.is_c_contig
572
572
 
573
- def __array__(self, dtype=None):
573
+ def __array__(self, dtype=None, copy=None):
574
574
  """
575
575
  :return: an `numpy.ndarray`, so copies to the host.
576
576
  """
577
+ if copy is False:
578
+ msg = "`copy=False` is not supported. A copy is always created."
579
+ raise ValueError(msg)
577
580
  if dtype:
578
581
  return self.copy_to_host().__array__(dtype)
579
582
  else:
@@ -18,7 +18,6 @@ import functools
18
18
  import warnings
19
19
  import logging
20
20
  import threading
21
- import traceback
22
21
  import asyncio
23
22
  import pathlib
24
23
  import subprocess
@@ -40,6 +39,7 @@ from .drvapi import API_PROTOTYPES
40
39
  from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
41
40
  from .mappings import FILE_EXTENSION_MAP
42
41
  from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
42
+ from numba.cuda.utils import _readenv
43
43
  from numba.cuda.cudadrv import enums, drvapi, nvrtc
44
44
 
45
45
  try:
@@ -66,25 +66,6 @@ _py_decref.argtypes = [ctypes.py_object]
66
66
  _py_incref.argtypes = [ctypes.py_object]
67
67
 
68
68
 
69
- def _readenv(name, ctor, default):
70
- value = os.environ.get(name)
71
- if value is None:
72
- return default() if callable(default) else default
73
- try:
74
- if ctor is bool:
75
- return value.lower() in {'1', "true"}
76
- return ctor(value)
77
- except Exception:
78
- warnings.warn(
79
- f"Environment variable '{name}' is defined but its associated "
80
- f"value '{value}' could not be parsed.\n"
81
- "The parse failed with exception:\n"
82
- f"{traceback.format_exc()}",
83
- RuntimeWarning
84
- )
85
- return default
86
-
87
-
88
69
  _MVC_ERROR_MESSAGE = (
89
70
  "Minor version compatibility requires ptxcompiler and cubinlinker packages "
90
71
  "to be available"
@@ -2,8 +2,12 @@ from .mappings import FILE_EXTENSION_MAP
2
2
 
3
3
 
4
4
  class LinkableCode:
5
- """An object that can be passed in the `link` list argument to `@cuda.jit`
6
- kernels to supply code to be linked from memory."""
5
+ """An object that holds code to be linked from memory.
6
+
7
+ :param data: A buffer containing the data to link.
8
+ :param name: The name of the file to be referenced in any compilation or
9
+ linking errors that may be produced.
10
+ """
7
11
 
8
12
  def __init__(self, data, name=None):
9
13
  self.data = data
@@ -15,49 +19,49 @@ class LinkableCode:
15
19
 
16
20
 
17
21
  class PTXSource(LinkableCode):
18
- """PTX Source code in memory"""
22
+ """PTX source code in memory."""
19
23
 
20
24
  kind = FILE_EXTENSION_MAP["ptx"]
21
25
  default_name = "<unnamed-ptx>"
22
26
 
23
27
 
24
28
  class CUSource(LinkableCode):
25
- """CUDA C/C++ Source code in memory"""
29
+ """CUDA C/C++ source code in memory."""
26
30
 
27
31
  kind = "cu"
28
32
  default_name = "<unnamed-cu>"
29
33
 
30
34
 
31
35
  class Fatbin(LinkableCode):
32
- """A fatbin ELF in memory"""
36
+ """An ELF Fatbin in memory."""
33
37
 
34
38
  kind = FILE_EXTENSION_MAP["fatbin"]
35
39
  default_name = "<unnamed-fatbin>"
36
40
 
37
41
 
38
42
  class Cubin(LinkableCode):
39
- """A cubin ELF in memory"""
43
+ """An ELF Cubin in memory."""
40
44
 
41
45
  kind = FILE_EXTENSION_MAP["cubin"]
42
46
  default_name = "<unnamed-cubin>"
43
47
 
44
48
 
45
49
  class Archive(LinkableCode):
46
- """An archive of objects in memory"""
50
+ """An archive of objects in memory."""
47
51
 
48
52
  kind = FILE_EXTENSION_MAP["a"]
49
53
  default_name = "<unnamed-archive>"
50
54
 
51
55
 
52
56
  class Object(LinkableCode):
53
- """An object file in memory"""
57
+ """An object file in memory."""
54
58
 
55
59
  kind = FILE_EXTENSION_MAP["o"]
56
60
  default_name = "<unnamed-object>"
57
61
 
58
62
 
59
63
  class LTOIR(LinkableCode):
60
- """An LTOIR file in memory"""
64
+ """An LTOIR file in memory."""
61
65
 
62
66
  kind = "ltoir"
63
67
  default_name = "<unnamed-ltoir>"
@@ -266,7 +266,11 @@ def compile(src, name, cc, ltoir=False):
266
266
  cudadrv_path = os.path.dirname(os.path.abspath(__file__))
267
267
  numba_cuda_path = os.path.dirname(cudadrv_path)
268
268
  numba_include = f'-I{numba_cuda_path}'
269
- options = [arch, *cuda_include, numba_include, '-rdc', 'true']
269
+
270
+ nrt_path = os.path.join(numba_cuda_path, "runtime")
271
+ nrt_include = f'-I{nrt_path}'
272
+
273
+ options = [arch, *cuda_include, numba_include, nrt_include, '-rdc', 'true']
270
274
 
271
275
  if ltoir:
272
276
  options.append("-dlto")
@@ -314,7 +314,9 @@ COMPUTE_CAPABILITIES = (
314
314
  (6, 0), (6, 1), (6, 2),
315
315
  (7, 0), (7, 2), (7, 5),
316
316
  (8, 0), (8, 6), (8, 7), (8, 9),
317
- (9, 0)
317
+ (9, 0),
318
+ (10, 0), (10, 1),
319
+ (12, 0),
318
320
  )
319
321
 
320
322
  # Maps CTK version -> (min supported cc, max supported cc) inclusive
@@ -331,6 +333,9 @@ CTK_SUPPORTED = {
331
333
  (12, 2): ((5, 0), (9, 0)),
332
334
  (12, 3): ((5, 0), (9, 0)),
333
335
  (12, 4): ((5, 0), (9, 0)),
336
+ (12, 5): ((5, 0), (9, 0)),
337
+ (12, 6): ((5, 0), (9, 0)),
338
+ (12, 8): ((5, 0), (12, 0)),
334
339
  }
335
340
 
336
341
 
@@ -173,7 +173,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
173
173
  return disp
174
174
 
175
175
 
176
- def declare_device(name, sig):
176
+ def declare_device(name, sig, link=None):
177
177
  """
178
178
  Declare the signature of a foreign function. Returns a descriptor that can
179
179
  be used to call the function from a Python kernel.
@@ -181,10 +181,17 @@ def declare_device(name, sig):
181
181
  :param name: The name of the foreign function.
182
182
  :type name: str
183
183
  :param sig: The Numba signature of the function.
184
+ :param link: External code to link when calling the function.
184
185
  """
186
+ if link is None:
187
+ link = tuple()
188
+ else:
189
+ if not isinstance(link, (list, tuple, set)):
190
+ link = (link,)
191
+
185
192
  argtypes, restype = sigutils.normalize_signature(sig)
186
193
  if restype is None:
187
194
  msg = 'Return type must be provided for device declarations'
188
195
  raise TypeError(msg)
189
196
 
190
- return declare_device_function(name, restype, argtypes)
197
+ return declare_device_function(name, restype, argtypes, link)
@@ -11,16 +11,18 @@ from numba.core.compiler_lock import global_compiler_lock
11
11
  from numba.core.dispatcher import Dispatcher
12
12
  from numba.core.errors import NumbaPerformanceWarning
13
13
  from numba.core.typing.typeof import Purpose, typeof
14
-
14
+ from numba.core.types.functions import Function
15
15
  from numba.cuda.api import get_current_device
16
16
  from numba.cuda.args import wrap_arg
17
- from numba.cuda.compiler import compile_cuda, CUDACompiler, kernel_fixup
17
+ from numba.cuda.compiler import (compile_cuda, CUDACompiler, kernel_fixup,
18
+ ExternFunction)
18
19
  from numba.cuda.cudadrv import driver
19
20
  from numba.cuda.cudadrv.devices import get_context
20
21
  from numba.cuda.descriptor import cuda_target
21
22
  from numba.cuda.errors import (missing_launch_config_msg,
22
23
  normalize_kernel_dimensions)
23
24
  from numba.cuda import types as cuda_types
25
+ from numba.cuda.runtime.nrt import rtsys
24
26
 
25
27
  from numba import cuda
26
28
  from numba import _dispatcher
@@ -157,6 +159,16 @@ class _Kernel(serialize.ReduceMixin):
157
159
 
158
160
  self.maybe_link_nrt(link, tgt_ctx, asm)
159
161
 
162
+ for k, v in cres.fndesc.typemap.items():
163
+ if not isinstance(v, Function):
164
+ continue
165
+
166
+ if not isinstance(v.typing_key, ExternFunction):
167
+ continue
168
+
169
+ for obj in v.typing_key.link:
170
+ lib.add_linking_file(obj)
171
+
160
172
  for filepath in link:
161
173
  lib.add_linking_file(filepath)
162
174
 
@@ -253,7 +265,14 @@ class _Kernel(serialize.ReduceMixin):
253
265
  """
254
266
  Force binding to current CUDA context
255
267
  """
256
- self._codelibrary.get_cufunc()
268
+ cufunc = self._codelibrary.get_cufunc()
269
+
270
+ if hasattr(self, "target_context") and self.target_context.enable_nrt:
271
+ rtsys.ensure_initialized()
272
+ rtsys.set_memsys_to_module(cufunc.module)
273
+ # We don't know which stream the kernel will be launched on, so
274
+ # we force synchronize here.
275
+ cuda.synchronize()
257
276
 
258
277
  @property
259
278
  def regs_per_thread(self):
@@ -0,0 +1 @@
1
+ from numba.cuda.runtime.nrt import rtsys # noqa: F401
@@ -0,0 +1,94 @@
1
+ #include "memsys.cuh"
2
+
3
+ __device__ size_t memsys_size = sizeof(NRT_MemSys);
4
+
5
+ namespace detail
6
+ {
7
+ void __device__ check_memsys()
8
+ {
9
+ if (TheMSys == nullptr)
10
+ {
11
+ assert(false && "TheMSys pointer is null. Please use NRT_MemSys_set to set pointer first.");
12
+ }
13
+ }
14
+ }
15
+
16
+ extern "C" __global__ void NRT_MemSys_set(NRT_MemSys *memsys_ptr)
17
+ {
18
+ TheMSys = memsys_ptr;
19
+ }
20
+
21
+ extern "C" __global__ void NRT_MemSys_read(uint64_t *managed_memsys)
22
+ {
23
+ detail::check_memsys();
24
+ managed_memsys[0] = TheMSys->stats.alloc;
25
+ managed_memsys[1] = TheMSys->stats.free;
26
+ managed_memsys[2] = TheMSys->stats.mi_alloc;
27
+ managed_memsys[3] = TheMSys->stats.mi_free;
28
+ }
29
+
30
+ extern "C" __global__ void NRT_MemSys_read_alloc(uint64_t *managed_result)
31
+ {
32
+ detail::check_memsys();
33
+ managed_result[0] = TheMSys->stats.alloc;
34
+ }
35
+
36
+ extern "C" __global__ void NRT_MemSys_read_free(uint64_t *managed_result)
37
+ {
38
+ detail::check_memsys();
39
+ managed_result[0] = TheMSys->stats.free;
40
+ }
41
+
42
+ extern "C" __global__ void NRT_MemSys_read_mi_alloc(uint64_t *managed_result)
43
+ {
44
+ detail::check_memsys();
45
+ managed_result[0] = TheMSys->stats.mi_alloc;
46
+ }
47
+
48
+ extern "C" __global__ void NRT_MemSys_read_mi_free(uint64_t *managed_result)
49
+ {
50
+ detail::check_memsys();
51
+ managed_result[0] = TheMSys->stats.mi_free;
52
+ }
53
+
54
+ extern "C" __global__ void NRT_MemSys_init(void)
55
+ {
56
+ detail::check_memsys();
57
+ TheMSys->stats.enabled = false;
58
+ TheMSys->stats.alloc = 0;
59
+ TheMSys->stats.free = 0;
60
+ TheMSys->stats.mi_alloc = 0;
61
+ TheMSys->stats.mi_free = 0;
62
+ }
63
+
64
+ extern "C" __global__ void NRT_MemSys_enable_stats(void)
65
+ {
66
+ detail::check_memsys();
67
+ TheMSys->stats.enabled = true;
68
+ }
69
+
70
+ extern "C" __global__ void NRT_MemSys_disable_stats(void)
71
+ {
72
+ detail::check_memsys();
73
+ TheMSys->stats.enabled = false;
74
+ }
75
+
76
+ extern "C" __global__ void NRT_MemSys_stats_enabled(uint8_t *enabled)
77
+ {
78
+ detail::check_memsys();
79
+ *enabled = static_cast<uint8_t>(TheMSys->stats.enabled);
80
+ }
81
+
82
+ extern "C" __global__ void NRT_MemSys_print(void)
83
+ {
84
+ if (TheMSys != nullptr)
85
+ {
86
+ printf("TheMSys->stats.enabled %d\n", TheMSys->stats.enabled);
87
+ printf("TheMSys->stats.alloc %lu\n", TheMSys->stats.alloc.load());
88
+ printf("TheMSys->stats.free %lu\n", TheMSys->stats.free.load());
89
+ printf("TheMSys->stats.mi_alloc %lu\n", TheMSys->stats.mi_alloc.load());
90
+ printf("TheMSys->stats.mi_free %lu\n", TheMSys->stats.mi_free.load());
91
+ } else {
92
+ printf("TheMsys is null.\n");
93
+ }
94
+ }
@@ -0,0 +1,17 @@
1
+ #include <cuda/atomic>
2
+
3
+ // Globally needed variables
4
+ struct NRT_MemSys {
5
+ struct {
6
+ bool enabled;
7
+ cuda::atomic<size_t, cuda::thread_scope_device> alloc;
8
+ cuda::atomic<size_t, cuda::thread_scope_device> free;
9
+ cuda::atomic<size_t, cuda::thread_scope_device> mi_alloc;
10
+ cuda::atomic<size_t, cuda::thread_scope_device> mi_free;
11
+ } stats;
12
+ };
13
+
14
+ /* The Memory System object */
15
+ __device__ NRT_MemSys* TheMSys;
16
+
17
+ extern "C" __global__ void NRT_MemSys_set(NRT_MemSys *memsys_ptr);
@@ -3,6 +3,8 @@
3
3
 
4
4
  #include <cuda/atomic>
5
5
 
6
+ #include "memsys.cuh"
7
+
6
8
  typedef void (*NRT_dtor_function)(void* ptr, size_t size, void* info);
7
9
  typedef void (*NRT_dealloc_func)(void* ptr, void* dealloc_info);
8
10
 
@@ -18,29 +20,21 @@ struct MemInfo {
18
20
  };
19
21
  }
20
22
 
21
- // Globally needed variables
22
- struct NRT_MemSys {
23
- struct {
24
- bool enabled;
25
- cuda::atomic<size_t, cuda::thread_scope_device> alloc;
26
- cuda::atomic<size_t, cuda::thread_scope_device> free;
27
- cuda::atomic<size_t, cuda::thread_scope_device> mi_alloc;
28
- cuda::atomic<size_t, cuda::thread_scope_device> mi_free;
29
- } stats;
30
- };
23
+ extern "C" __global__ void NRT_MemSys_set(NRT_MemSys *memsys_ptr)
24
+ {
25
+ TheMSys = memsys_ptr;
26
+ }
31
27
 
32
28
  static __device__ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo **mi);
33
29
  static __device__ void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out);
34
30
  extern "C" __device__ void* NRT_Allocate_External(size_t size);
35
31
 
36
- /* The Memory System object */
37
- __device__ NRT_MemSys* TheMSys;
38
-
39
32
  extern "C" __device__ void* NRT_Allocate(size_t size)
40
33
  {
41
34
  void* ptr = NULL;
42
35
  ptr = malloc(size);
43
- // if (TheMSys->stats.enabled) { TheMSys->stats.alloc++; }
36
+ if (TheMSys && TheMSys->stats.enabled) {
37
+ TheMSys->stats.alloc.fetch_add(1, cuda::memory_order_relaxed); }
44
38
  return ptr;
45
39
  }
46
40
 
@@ -49,14 +43,14 @@ extern "C" __device__ void NRT_MemInfo_init(NRT_MemInfo* mi,
49
43
  size_t size,
50
44
  NRT_dtor_function dtor,
51
45
  void* dtor_info)
52
- // NRT_MemSys* TheMSys)
53
46
  {
54
47
  mi->refct = 1; /* starts with 1 refct */
55
48
  mi->dtor = dtor;
56
49
  mi->dtor_info = dtor_info;
57
50
  mi->data = data;
58
51
  mi->size = size;
59
- // if (TheMSys->stats.enabled) { TheMSys->stats.mi_alloc++; }
52
+ if (TheMSys && TheMSys->stats.enabled) {
53
+ TheMSys->stats.mi_alloc.fetch_add(1, cuda::memory_order_relaxed); }
60
54
  }
61
55
 
62
56
  extern "C"
@@ -71,7 +65,8 @@ __device__ NRT_MemInfo* NRT_MemInfo_new(
71
65
  extern "C" __device__ void NRT_Free(void* ptr)
72
66
  {
73
67
  free(ptr);
74
- //if (TheMSys->stats.enabled) { TheMSys->stats.free++; }
68
+ if (TheMSys && TheMSys->stats.enabled) {
69
+ TheMSys->stats.free.fetch_add(1, cuda::memory_order_relaxed); }
75
70
  }
76
71
 
77
72
  extern "C" __device__ void NRT_dealloc(NRT_MemInfo* mi)
@@ -82,8 +77,10 @@ extern "C" __device__ void NRT_dealloc(NRT_MemInfo* mi)
82
77
  extern "C" __device__ void NRT_MemInfo_destroy(NRT_MemInfo* mi)
83
78
  {
84
79
  NRT_dealloc(mi);
85
- //if (TheMSys->stats.enabled) { TheMSys->stats.mi_free++; }
80
+ if (TheMSys && TheMSys->stats.enabled) {
81
+ TheMSys->stats.mi_free.fetch_add(1, cuda::memory_order_relaxed); }
86
82
  }
83
+
87
84
  extern "C" __device__ void NRT_MemInfo_call_dtor(NRT_MemInfo* mi)
88
85
  {
89
86
  if (mi->dtor) /* We have a destructor */
@@ -158,10 +155,10 @@ extern "C" __device__ void* NRT_Allocate_External(size_t size) {
158
155
  ptr = malloc(size);
159
156
  //NRT_Debug(nrt_debug_print("NRT_Allocate_External bytes=%zu ptr=%p\n", size, ptr));
160
157
 
161
- //if (TheMSys.stats.enabled)
162
- //{
163
- // TheMSys.stats.alloc++;
164
- //}
158
+ if (TheMSys && TheMSys->stats.enabled)
159
+ {
160
+ TheMSys->stats.alloc.fetch_add(1, cuda::memory_order_relaxed);
161
+ }
165
162
  return ptr;
166
163
  }
167
164