numba-cuda 0.10.1__tar.gz → 0.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/PKG-INFO +1 -1
  2. numba_cuda-0.11.0/numba_cuda/VERSION +1 -0
  3. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/codegen.py +69 -2
  4. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/compiler.py +20 -15
  5. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadecl.py +15 -5
  6. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/driver.py +103 -20
  7. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/linkable_code.py +10 -2
  8. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudaimpl.py +103 -11
  9. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/decorators.py +3 -1
  10. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/dispatcher.py +23 -63
  11. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/runtime/nrt.cu +2 -17
  12. numba_cuda-0.11.0/numba_cuda/numba/cuda/runtime/nrt.cuh +41 -0
  13. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/runtime/nrt.py +13 -1
  14. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/stubs.py +23 -11
  15. numba_cuda-0.11.0/numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +236 -0
  16. numba_cuda-0.11.0/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +304 -0
  17. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt.py +122 -3
  18. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +11 -0
  19. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +5 -2
  20. numba_cuda-0.11.0/numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +7 -0
  21. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +4 -0
  22. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/utils.py +7 -0
  23. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda.egg-info/PKG-INFO +1 -1
  24. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda.egg-info/SOURCES.txt +3 -0
  25. numba_cuda-0.10.1/numba_cuda/VERSION +0 -1
  26. numba_cuda-0.10.1/numba_cuda/numba/cuda/tests/cudapy/test_extending.py +0 -164
  27. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/LICENSE +0 -0
  28. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/README.md +0 -0
  29. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/__init__.py +0 -0
  30. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/_version.py +0 -0
  31. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/__init__.py +0 -0
  32. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/api.py +0 -0
  33. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/api_util.py +0 -0
  34. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/args.py +0 -0
  35. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cg.py +0 -0
  36. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -0
  37. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cuda_bf16.py +0 -0
  38. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cuda_paths.py +0 -0
  39. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/__init__.py +0 -0
  40. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/devicearray.py +0 -0
  41. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/devices.py +0 -0
  42. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/drvapi.py +0 -0
  43. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/dummyarray.py +0 -0
  44. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/enums.py +0 -0
  45. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/error.py +0 -0
  46. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/libs.py +0 -0
  47. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/mappings.py +0 -0
  48. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/ndarray.py +0 -0
  49. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/nvrtc.py +0 -0
  50. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/nvvm.py +0 -0
  51. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/rtapi.py +0 -0
  52. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudadrv/runtime.py +0 -0
  53. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/cudamath.py +0 -0
  54. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/debuginfo.py +0 -0
  55. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/descriptor.py +0 -0
  56. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/device_init.py +0 -0
  57. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/deviceufunc.py +0 -0
  58. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/errors.py +0 -0
  59. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/extending.py +0 -0
  60. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -0
  61. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -0
  62. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -0
  63. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -0
  64. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/include/12/cuda_bf16.h +0 -0
  65. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +0 -0
  66. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/include/12/cuda_fp16.h +0 -0
  67. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +0 -0
  68. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/initialize.py +0 -0
  69. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -0
  70. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/intrinsics.py +0 -0
  71. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  72. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/kernels/reduction.py +0 -0
  73. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/kernels/transpose.py +0 -0
  74. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/libdevice.py +0 -0
  75. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/libdevicedecl.py +0 -0
  76. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/libdevicefuncs.py +0 -0
  77. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/libdeviceimpl.py +0 -0
  78. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/locks.py +0 -0
  79. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/lowering.py +0 -0
  80. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/mathimpl.py +0 -0
  81. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/models.py +0 -0
  82. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/nvvmutils.py +0 -0
  83. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/printimpl.py +0 -0
  84. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/random.py +0 -0
  85. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/reshape_funcs.cu +0 -0
  86. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/runtime/__init__.py +0 -0
  87. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/runtime/memsys.cu +0 -0
  88. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/runtime/memsys.cuh +0 -0
  89. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/__init__.py +0 -0
  90. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/api.py +0 -0
  91. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/compiler.py +0 -0
  92. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +0 -0
  93. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +0 -0
  94. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/devices.py +0 -0
  95. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/driver.py +0 -0
  96. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +0 -0
  97. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +0 -0
  98. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/error.py +0 -0
  99. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/libs.py +0 -0
  100. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +0 -0
  101. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +0 -0
  102. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/kernel.py +0 -0
  103. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/kernelapi.py +0 -0
  104. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/reduction.py +0 -0
  105. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator/vector_types.py +0 -0
  106. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/simulator_init.py +0 -0
  107. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/target.py +0 -0
  108. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/testing.py +0 -0
  109. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/__init__.py +0 -0
  110. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/__init__.py +0 -0
  111. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +0 -0
  112. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +0 -0
  113. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +0 -0
  114. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +0 -0
  115. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +0 -0
  116. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +0 -0
  117. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +0 -0
  118. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +0 -0
  119. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +0 -0
  120. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +0 -0
  121. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +0 -0
  122. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +0 -0
  123. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_events.py +0 -0
  124. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +0 -0
  125. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_init.py +0 -0
  126. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +0 -0
  127. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +0 -0
  128. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +0 -0
  129. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +0 -0
  130. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +0 -0
  131. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -0
  132. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +0 -0
  133. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +0 -0
  134. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +0 -0
  135. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +0 -0
  136. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +0 -0
  137. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +0 -0
  138. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +0 -0
  139. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +0 -0
  140. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +0 -0
  141. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/__init__.py +0 -0
  142. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -0
  143. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +0 -0
  144. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +0 -0
  145. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -0
  146. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +0 -0
  147. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +0 -0
  148. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -0
  149. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +0 -0
  150. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +0 -0
  151. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +0 -0
  152. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +0 -0
  153. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +0 -0
  154. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +0 -0
  155. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_caching.py +0 -0
  156. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_casting.py +0 -0
  157. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +0 -0
  158. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +0 -0
  159. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex.py +0 -0
  160. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +0 -0
  161. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +0 -0
  162. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +0 -0
  163. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +0 -0
  164. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +0 -0
  165. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +0 -0
  166. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +0 -0
  167. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_debug.py +0 -0
  168. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +0 -0
  169. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +0 -0
  170. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +0 -0
  171. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_enums.py +0 -0
  172. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_errors.py +0 -0
  173. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_exception.py +0 -0
  174. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +0 -0
  175. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_forall.py +0 -0
  176. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +0 -0
  177. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +0 -0
  178. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_globals.py +0 -0
  179. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +0 -0
  180. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +0 -0
  181. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +0 -0
  182. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +0 -0
  183. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_inline.py +0 -0
  184. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +0 -0
  185. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +0 -0
  186. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +0 -0
  187. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +0 -0
  188. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_lang.py +0 -0
  189. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +0 -0
  190. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +0 -0
  191. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +0 -0
  192. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +0 -0
  193. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +0 -0
  194. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_math.py +0 -0
  195. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +0 -0
  196. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +0 -0
  197. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +0 -0
  198. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +0 -0
  199. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +0 -0
  200. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +0 -0
  201. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +0 -0
  202. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_operator.py +0 -0
  203. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +0 -0
  204. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_overload.py +0 -0
  205. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_powi.py +0 -0
  206. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_print.py +0 -0
  207. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +0 -0
  208. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_random.py +0 -0
  209. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +0 -0
  210. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +0 -0
  211. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +0 -0
  212. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +0 -0
  213. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +0 -0
  214. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +0 -0
  215. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm.py +0 -0
  216. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +0 -0
  217. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +0 -0
  218. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_sync.py +0 -0
  219. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +0 -0
  220. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +0 -0
  221. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +0 -0
  222. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +0 -0
  223. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +0 -0
  224. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +0 -0
  225. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +0 -0
  226. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +0 -0
  227. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +0 -0
  228. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_warning.py +0 -0
  229. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +0 -0
  230. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudasim/__init__.py +0 -0
  231. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudasim/support.py +0 -0
  232. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +0 -0
  233. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  234. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/data/cuda_include.cu +0 -0
  235. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/data/error.cu +0 -0
  236. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/data/jitlink.cu +0 -0
  237. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -0
  238. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/data/warn.cu +0 -0
  239. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/__init__.py +0 -0
  240. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  241. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +0 -0
  242. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -0
  243. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +0 -0
  244. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +0 -0
  245. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -0
  246. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +0 -0
  247. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +0 -0
  248. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_random.py +0 -0
  249. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +0 -0
  250. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -0
  251. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +0 -0
  252. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +0 -0
  253. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/nocuda/__init__.py +0 -0
  254. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +0 -0
  255. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +0 -0
  256. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/nocuda/test_import.py +0 -0
  257. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +0 -0
  258. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +0 -0
  259. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/nrt/__init__.py +0 -0
  260. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +0 -0
  261. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/support.py +0 -0
  262. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +0 -0
  263. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/types.py +0 -0
  264. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/ufuncs.py +0 -0
  265. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/vector_types.py +0 -0
  266. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda/numba/cuda/vectorizers.py +0 -0
  267. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda.egg-info/dependency_links.txt +0 -0
  268. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda.egg-info/requires.txt +0 -0
  269. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/numba_cuda.egg-info/top_level.txt +0 -0
  270. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/pyproject.toml +0 -0
  271. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/setup.cfg +0 -0
  272. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/setup.py +0 -0
  273. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/site-packages/_numba_cuda_redirector.pth +0 -0
  274. {numba_cuda-0.10.1 → numba_cuda-0.11.0}/site-packages/_numba_cuda_redirector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numba-cuda
3
- Version: 0.10.1
3
+ Version: 0.11.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -0,0 +1 @@
1
+ 0.11.0
@@ -5,6 +5,7 @@ from numba.core.codegen import Codegen, CodeLibrary
5
5
  from .cudadrv import devices, driver, nvvm, runtime
6
6
  from numba.cuda.cudadrv.libs import get_cudalib
7
7
  from numba.cuda.cudadrv.linkable_code import LinkableCode
8
+ from numba.cuda.runtime.nrt import NRT_LIBRARY
8
9
 
9
10
  import os
10
11
  import subprocess
@@ -57,6 +58,57 @@ def disassemble_cubin_for_cfg(cubin):
57
58
  return run_nvdisasm(cubin, flags)
58
59
 
59
60
 
61
+ class ExternalCodeLibrary(CodeLibrary):
62
+ """Holds code produced externally, for linking with generated code."""
63
+
64
+ def __init__(self, codegen, name):
65
+ super().__init__(codegen, name)
66
+ # Files to link
67
+ self._linking_files = set()
68
+ # Setup and teardown functions for the module.
69
+ # The order is determined by the order they are added to the codelib.
70
+ self._setup_functions = []
71
+ self._teardown_functions = []
72
+
73
+ @property
74
+ def modules(self):
75
+ # There are no LLVM IR modules in an ExternalCodeLibrary
76
+ return set()
77
+
78
+ def add_linking_file(self, path_or_obj):
79
+ # Adding new files after finalization is prohibited, in case the list
80
+ # of libraries has already been added to another code library; the
81
+ # newly-added files would be omitted from their linking process.
82
+ self._raise_if_finalized()
83
+
84
+ if isinstance(path_or_obj, LinkableCode):
85
+ if path_or_obj.setup_callback:
86
+ self._setup_functions.append(path_or_obj.setup_callback)
87
+ if path_or_obj.teardown_callback:
88
+ self._teardown_functions.append(path_or_obj.teardown_callback)
89
+
90
+ self._linking_files.add(path_or_obj)
91
+
92
+ def add_ir_module(self, module):
93
+ raise NotImplementedError("Cannot add LLVM IR to external code")
94
+
95
+ def add_linking_library(self, library):
96
+ raise NotImplementedError("Cannot add libraries to external code")
97
+
98
+ def finalize(self):
99
+ self._raise_if_finalized()
100
+ self._finalized = True
101
+
102
+ def get_asm_str(self):
103
+ raise NotImplementedError("No assembly for external code")
104
+
105
+ def get_llvm_str(self):
106
+ raise NotImplementedError("No LLVM IR for external code")
107
+
108
+ def get_function(self, name):
109
+ raise NotImplementedError("Cannot get function from external code")
110
+
111
+
60
112
  class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
61
113
  """
62
114
  The CUDACodeLibrary generates PTX, SASS, cubins for multiple different
@@ -297,6 +349,9 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
297
349
  self._raise_if_finalized()
298
350
 
299
351
  self._linking_libraries.add(library)
352
+ self._linking_files.update(library._linking_files)
353
+ self._setup_functions.extend(library._setup_functions)
354
+ self._teardown_functions.extend(library._teardown_functions)
300
355
 
301
356
  def add_linking_file(self, path_or_obj):
302
357
  if isinstance(path_or_obj, LinkableCode):
@@ -362,9 +417,17 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
362
417
  but loaded functions are discarded. They are recreated when needed
363
418
  after deserialization.
364
419
  """
420
+ nrt = False
365
421
  if self._linking_files:
366
- msg = "Cannot pickle CUDACodeLibrary with linking files"
367
- raise RuntimeError(msg)
422
+ if (
423
+ len(self._linking_files) == 1
424
+ and NRT_LIBRARY in self._linking_files
425
+ ):
426
+ nrt = True
427
+ else:
428
+ msg = "Cannot pickle CUDACodeLibrary with linking files"
429
+ raise RuntimeError(msg)
430
+
368
431
  if not self._finalized:
369
432
  raise RuntimeError("Cannot pickle unfinalized CUDACodeLibrary")
370
433
  return dict(
@@ -378,6 +441,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
378
441
  max_registers=self._max_registers,
379
442
  nvvm_options=self._nvvm_options,
380
443
  needs_cudadevrt=self.needs_cudadevrt,
444
+ nrt=nrt,
381
445
  )
382
446
 
383
447
  @classmethod
@@ -393,6 +457,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
393
457
  max_registers,
394
458
  nvvm_options,
395
459
  needs_cudadevrt,
460
+ nrt,
396
461
  ):
397
462
  """
398
463
  Rebuild an instance.
@@ -409,6 +474,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
409
474
  instance.needs_cudadevrt = needs_cudadevrt
410
475
 
411
476
  instance._finalized = True
477
+ if nrt:
478
+ instance._linking_files = {NRT_LIBRARY}
412
479
 
413
480
  return instance
414
481
 
@@ -1,5 +1,4 @@
1
1
  from llvmlite import ir
2
- from numba.core.typing.templates import ConcreteTemplate
3
2
  from numba.core import ir as numba_ir
4
3
  from numba.core import (
5
4
  cgutils,
@@ -37,6 +36,7 @@ from numba.core.typed_passes import (
37
36
  from warnings import warn
38
37
  from numba.cuda import nvvmutils
39
38
  from numba.cuda.api import get_current_device
39
+ from numba.cuda.codegen import ExternalCodeLibrary
40
40
  from numba.cuda.cudadrv import nvvm
41
41
  from numba.cuda.descriptor import cuda_target
42
42
  from numba.cuda.target import CUDACABICallConv
@@ -798,32 +798,37 @@ def compile_ptx_for_current_device(
798
798
 
799
799
 
800
800
  def declare_device_function(name, restype, argtypes, link):
801
- return declare_device_function_template(name, restype, argtypes, link).key
802
-
803
-
804
- def declare_device_function_template(name, restype, argtypes, link):
805
801
  from .descriptor import cuda_target
806
802
 
807
803
  typingctx = cuda_target.typing_context
808
804
  targetctx = cuda_target.target_context
809
805
  sig = typing.signature(restype, *argtypes)
810
- extfn = ExternFunction(name, sig, link)
811
806
 
812
- class device_function_template(ConcreteTemplate):
813
- key = extfn
814
- cases = [sig]
807
+ # extfn is the descriptor used to call the function from Python code, and
808
+ # is used as the key for typing and lowering.
809
+ extfn = ExternFunction(name, sig)
815
810
 
816
- fndesc = funcdesc.ExternalFunctionDescriptor(
817
- name=name, restype=restype, argtypes=argtypes
818
- )
811
+ # Typing
812
+ device_function_template = typing.make_concrete_template(name, extfn, [sig])
819
813
  typingctx.insert_user_function(extfn, device_function_template)
820
- targetctx.insert_user_function(extfn, fndesc)
814
+
815
+ # Lowering
816
+ lib = ExternalCodeLibrary(f"{name}_externals", targetctx.codegen())
817
+ for file in link:
818
+ lib.add_linking_file(file)
819
+
820
+ # ExternalFunctionDescriptor provides a lowering implementation for calling
821
+ # external functions
822
+ fndesc = funcdesc.ExternalFunctionDescriptor(name, restype, argtypes)
823
+ targetctx.insert_user_function(extfn, fndesc, libs=(lib,))
821
824
 
822
825
  return device_function_template
823
826
 
824
827
 
825
828
  class ExternFunction:
826
- def __init__(self, name, sig, link):
829
+ """A descriptor that can be used to call the external function from within
830
+ a Python kernel."""
831
+
832
+ def __init__(self, name, sig):
827
833
  self.name = name
828
834
  self.sig = sig
829
- self.link = link
@@ -1,5 +1,5 @@
1
1
  import operator
2
- from numba.core import types
2
+ from numba.core import errors, types
3
3
  from numba.core.typing.npydecl import (
4
4
  parse_dtype,
5
5
  parse_shape,
@@ -21,7 +21,7 @@ from numba.core.typing.templates import (
21
21
  from numba.cuda.types import dim3
22
22
  from numba.core.typeconv import Conversion
23
23
  from numba import cuda
24
- from numba.cuda.compiler import declare_device_function_template
24
+ from numba.cuda.compiler import declare_device_function
25
25
 
26
26
  registry = Registry()
27
27
  register = registry.register
@@ -33,7 +33,7 @@ register_number_classes(register_global)
33
33
 
34
34
  class Cuda_array_decl(CallableTemplate):
35
35
  def generic(self):
36
- def typer(shape, dtype):
36
+ def typer(shape, dtype, alignment=None):
37
37
  # Only integer literals and tuples of integer literals are valid
38
38
  # shapes
39
39
  if isinstance(shape, types.Integer):
@@ -47,6 +47,16 @@ class Cuda_array_decl(CallableTemplate):
47
47
  else:
48
48
  return None
49
49
 
50
+ if alignment is not None:
51
+ permitted = (types.IntegerLiteral, types.NoneType)
52
+ if not isinstance(alignment, permitted):
53
+ msg = "alignment must be a constant integer"
54
+ raise errors.RequireLiteralValue(msg)
55
+
56
+ # N.B. We don't use alignment for typing; it's not part of
57
+ # types.Array. The value supplied to the array declaration
58
+ # is handled in the lowering.
59
+
50
60
  ndim = parse_shape(shape)
51
61
  nb_dtype = parse_dtype(dtype)
52
62
  if nb_dtype is not None and ndim is not None:
@@ -412,7 +422,7 @@ _genfp16_binary_operator(operator.itruediv)
412
422
 
413
423
  def _resolve_wrapped_unary(fname):
414
424
  link = tuple()
415
- decl = declare_device_function_template(
425
+ decl = declare_device_function(
416
426
  f"__numba_wrapper_{fname}", types.float16, (types.float16,), link
417
427
  )
418
428
  return types.Function(decl)
@@ -420,7 +430,7 @@ def _resolve_wrapped_unary(fname):
420
430
 
421
431
  def _resolve_wrapped_binary(fname):
422
432
  link = tuple()
423
- decl = declare_device_function_template(
433
+ decl = declare_device_function(
424
434
  f"__numba_wrapper_{fname}",
425
435
  types.float16,
426
436
  (
@@ -49,7 +49,7 @@ from .drvapi import API_PROTOTYPES
49
49
  from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
50
50
  from .mappings import FILE_EXTENSION_MAP
51
51
  from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
52
- from numba.cuda.utils import _readenv
52
+ from numba.cuda.utils import _readenv, cached_file_read
53
53
  from numba.cuda.cudadrv import enums, drvapi, nvrtc
54
54
 
55
55
  try:
@@ -2797,13 +2797,16 @@ class Linker(metaclass=ABCMeta):
2797
2797
  ptx_name = os.path.splitext(name)[0] + ".ptx"
2798
2798
  self.add_ptx(ptx.encode(), ptx_name)
2799
2799
 
2800
+ @abstractmethod
2801
+ def add_data(self, data, kind, name):
2802
+ """Add in-memory data to the link"""
2803
+
2800
2804
  @abstractmethod
2801
2805
  def add_file(self, path, kind):
2802
2806
  """Add code from a file to the link"""
2803
2807
 
2804
2808
  def add_cu_file(self, path):
2805
- with open(path, "rb") as f:
2806
- cu = f.read()
2809
+ cu = cached_file_read(path, how="rb")
2807
2810
  self.add_cu(cu, os.path.basename(path))
2808
2811
 
2809
2812
  def add_file_guess_ext(self, path_or_code, ignore_nonlto=False):
@@ -2948,6 +2951,10 @@ class MVCLinker(Linker):
2948
2951
  except CubinLinkerError as e:
2949
2952
  raise LinkerError from e
2950
2953
 
2954
+ def add_data(self, data, kind, name):
2955
+ msg = "Adding in-memory data unsupported in the MVC linker"
2956
+ raise LinkerError(msg)
2957
+
2951
2958
  def add_file(self, path, kind):
2952
2959
  try:
2953
2960
  from cubinlinker import CubinLinkerError
@@ -2955,8 +2962,7 @@ class MVCLinker(Linker):
2955
2962
  raise ImportError(_MVC_ERROR_MESSAGE) from err
2956
2963
 
2957
2964
  try:
2958
- with open(path, "rb") as f:
2959
- data = f.read()
2965
+ data = cached_file_read(path, how="rb")
2960
2966
  except FileNotFoundError:
2961
2967
  raise LinkerError(f"{path} not found")
2962
2968
 
@@ -3046,17 +3052,32 @@ class CtypesLinker(Linker):
3046
3052
  def error_log(self):
3047
3053
  return self.linker_errors_buf.value.decode("utf8")
3048
3054
 
3049
- def add_ptx(self, ptx, name="<cudapy-ptx>"):
3050
- ptxbuf = c_char_p(ptx)
3051
- namebuf = c_char_p(name.encode("utf8"))
3052
- self._keep_alive += [ptxbuf, namebuf]
3055
+ def add_cubin(self, cubin, name="<unnamed-cubin>"):
3056
+ return self._add_data(enums.CU_JIT_INPUT_CUBIN, cubin, name)
3057
+
3058
+ def add_ptx(self, ptx, name="<unnamed-ptx>"):
3059
+ return self._add_data(enums.CU_JIT_INPUT_PTX, ptx, name)
3060
+
3061
+ def add_object(self, object_, name="<unnamed-object>"):
3062
+ return self._add_data(enums.CU_JIT_INPUT_OBJECT, object_, name)
3063
+
3064
+ def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
3065
+ return self._add_data(enums.CU_JIT_INPUT_FATBINARY, fatbin, name)
3066
+
3067
+ def add_library(self, library, name="<unnamed-library>"):
3068
+ return self._add_data(enums.CU_JIT_INPUT_LIBRARY, library, name)
3069
+
3070
+ def _add_data(self, input_type, data, name):
3071
+ data_buffer = c_char_p(data)
3072
+ name_buffer = c_char_p(name.encode("utf8"))
3073
+ self._keep_alive += [data_buffer, name_buffer]
3053
3074
  try:
3054
3075
  driver.cuLinkAddData(
3055
3076
  self.handle,
3056
- enums.CU_JIT_INPUT_PTX,
3057
- ptxbuf,
3058
- len(ptx),
3059
- namebuf,
3077
+ input_type,
3078
+ data_buffer,
3079
+ len(data),
3080
+ name_buffer,
3060
3081
  0,
3061
3082
  None,
3062
3083
  None,
@@ -3064,6 +3085,28 @@ class CtypesLinker(Linker):
3064
3085
  except CudaAPIError as e:
3065
3086
  raise LinkerError("%s\n%s" % (e, self.error_log))
3066
3087
 
3088
+ def add_data(self, data, kind, name=None):
3089
+ # We pass the name as **kwargs to ensure the default name for the input
3090
+ # type is used if none is supplied
3091
+ kws = {}
3092
+ if name is not None:
3093
+ kws["name"] = name
3094
+
3095
+ if kind == FILE_EXTENSION_MAP["cubin"]:
3096
+ self.add_cubin(data, **kws)
3097
+ elif kind == FILE_EXTENSION_MAP["fatbin"]:
3098
+ self.add_fatbin(data, **kws)
3099
+ elif kind == FILE_EXTENSION_MAP["a"]:
3100
+ self.add_library(data, **kws)
3101
+ elif kind == FILE_EXTENSION_MAP["ptx"]:
3102
+ self.add_ptx(data, **kws)
3103
+ elif kind == FILE_EXTENSION_MAP["o"]:
3104
+ self.add_object(data, **kws)
3105
+ elif kind == FILE_EXTENSION_MAP["ltoir"]:
3106
+ raise LinkerError("Ctypes linker cannot link LTO-IR")
3107
+ else:
3108
+ raise LinkerError(f"Don't know how to link {kind}")
3109
+
3067
3110
  def add_file(self, path, kind):
3068
3111
  pathbuf = c_char_p(path.encode("utf8"))
3069
3112
  self._keep_alive.append(pathbuf)
@@ -3151,17 +3194,58 @@ class CudaPythonLinker(Linker):
3151
3194
  def error_log(self):
3152
3195
  return self.linker_errors_buf.decode("utf8")
3153
3196
 
3154
- def add_ptx(self, ptx, name="<cudapy-ptx>"):
3155
- namebuf = name.encode("utf8")
3156
- self._keep_alive += [ptx, namebuf]
3197
+ def add_cubin(self, cubin, name="<unnamed-cubin>"):
3198
+ input_type = binding.CUjitInputType.CU_JIT_INPUT_CUBIN
3199
+ return self._add_data(input_type, cubin, name)
3200
+
3201
+ def add_ptx(self, ptx, name="<unnamed-ptx>"):
3202
+ input_type = binding.CUjitInputType.CU_JIT_INPUT_PTX
3203
+ return self._add_data(input_type, ptx, name)
3204
+
3205
+ def add_object(self, object_, name="<unnamed-object>"):
3206
+ input_type = binding.CUjitInputType.CU_JIT_INPUT_OBJECT
3207
+ return self._add_data(input_type, object_, name)
3208
+
3209
+ def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
3210
+ input_type = binding.CUjitInputType.CU_JIT_INPUT_FATBINARY
3211
+ return self._add_data(input_type, fatbin, name)
3212
+
3213
+ def add_library(self, library, name="<unnamed-library>"):
3214
+ input_type = binding.CUjitInputType.CU_JIT_INPUT_LIBRARY
3215
+ return self._add_data(input_type, library, name)
3216
+
3217
+ def _add_data(self, input_type, data, name):
3218
+ name_buffer = name.encode("utf8")
3219
+ self._keep_alive += [data, name_buffer]
3157
3220
  try:
3158
- input_ptx = binding.CUjitInputType.CU_JIT_INPUT_PTX
3159
3221
  driver.cuLinkAddData(
3160
- self.handle, input_ptx, ptx, len(ptx), namebuf, 0, [], []
3222
+ self.handle, input_type, data, len(data), name_buffer, 0, [], []
3161
3223
  )
3162
3224
  except CudaAPIError as e:
3163
3225
  raise LinkerError("%s\n%s" % (e, self.error_log))
3164
3226
 
3227
+ def add_data(self, data, kind, name=None):
3228
+ # We pass the name as **kwargs to ensure the default name for the input
3229
+ # type is used if none is supplied
3230
+ kws = {}
3231
+ if name is not None:
3232
+ kws["name"] = name
3233
+
3234
+ if kind == FILE_EXTENSION_MAP["cubin"]:
3235
+ self.add_cubin(data, **kws)
3236
+ elif kind == FILE_EXTENSION_MAP["fatbin"]:
3237
+ self.add_fatbin(data, **kws)
3238
+ elif kind == FILE_EXTENSION_MAP["a"]:
3239
+ self.add_library(data, **kws)
3240
+ elif kind == FILE_EXTENSION_MAP["ptx"]:
3241
+ self.add_ptx(data, **kws)
3242
+ elif kind == FILE_EXTENSION_MAP["o"]:
3243
+ self.add_object(data, **kws)
3244
+ elif kind == FILE_EXTENSION_MAP["ltoir"]:
3245
+ raise LinkerError("CudaPythonLinker cannot link LTO-IR")
3246
+ else:
3247
+ raise LinkerError(f"Don't know how to link {kind}")
3248
+
3165
3249
  def add_file(self, path, kind):
3166
3250
  pathbuf = path.encode("utf8")
3167
3251
  self._keep_alive.append(pathbuf)
@@ -3252,8 +3336,7 @@ class PyNvJitLinker(Linker):
3252
3336
 
3253
3337
  def add_file(self, path, kind):
3254
3338
  try:
3255
- with open(path, "rb") as f:
3256
- data = f.read()
3339
+ data = cached_file_read(path, "rb")
3257
3340
  except FileNotFoundError:
3258
3341
  raise LinkerError(f"{path} not found")
3259
3342
 
@@ -16,16 +16,24 @@ class LinkableCode:
16
16
  :param teardown_callback: A function called just prior to the unloading of
17
17
  a module that has this code object linked into
18
18
  it.
19
+ :param nrt: If True, assume this object contains NRT function calls and
20
+ add NRT source code to the final link.
19
21
  """
20
22
 
21
23
  def __init__(
22
- self, data, name=None, setup_callback=None, teardown_callback=None
24
+ self,
25
+ data,
26
+ name=None,
27
+ setup_callback=None,
28
+ teardown_callback=None,
29
+ nrt=False,
23
30
  ):
24
31
  if setup_callback and not callable(setup_callback):
25
32
  raise TypeError("setup_callback must be callable")
26
33
  if teardown_callback and not callable(teardown_callback):
27
34
  raise TypeError("teardown_callback must be callable")
28
35
 
36
+ self.nrt = nrt
29
37
  self._name = name
30
38
  self._data = data
31
39
  self.setup_callback = setup_callback
@@ -87,5 +95,5 @@ class Object(LinkableCode):
87
95
  class LTOIR(LinkableCode):
88
96
  """An LTOIR file in memory."""
89
97
 
90
- kind = "ltoir"
98
+ kind = FILE_EXTENSION_MAP["ltoir"]
91
99
  default_name = "<unnamed-ltoir>"
@@ -1,6 +1,7 @@
1
1
  from functools import reduce
2
2
  import operator
3
3
  import math
4
+ import struct
4
5
 
5
6
  from llvmlite import ir
6
7
  import llvmlite.binding as ll
@@ -92,10 +93,61 @@ def _get_unique_smem_id(name):
92
93
  return "{0}_{1}".format(name, _unique_smem_id)
93
94
 
94
95
 
96
+ def _validate_alignment(alignment: int):
97
+ """
98
+ Ensures that *alignment*, if not None, is a) greater than zero, b) a power
99
+ of two, and c) a multiple of the size of a pointer. If any of these
100
+ conditions are not met, a ValueError is raised. Otherwise, this
101
+ function returns None, indicating that the alignment is valid.
102
+ """
103
+ if alignment is None:
104
+ return
105
+ if not isinstance(alignment, int):
106
+ raise ValueError("Alignment must be an integer")
107
+ if alignment <= 0:
108
+ raise ValueError("Alignment must be positive")
109
+ if (alignment & (alignment - 1)) != 0:
110
+ raise ValueError("Alignment must be a power of 2")
111
+ pointer_size = struct.calcsize("P")
112
+ if (alignment % pointer_size) != 0:
113
+ msg = f"Alignment must be a multiple of {pointer_size}"
114
+ raise ValueError(msg)
115
+
116
+
117
+ def _try_extract_and_validate_alignment(sig: types.Tuple):
118
+ """
119
+ Extracts and validates the alignment from the supplied signature.
120
+
121
+ Returns the alignment if it is present and is an integer literal;
122
+ otherwise, returns None.
123
+
124
+ N.B. Currently, this routine assumes the signature has exactly
125
+ three arguments, with the alignment (if present) as the third
126
+ argument, as is the case with the shared and local array
127
+ helper routines below.
128
+
129
+ If this routine is called from new places, you may need to
130
+ review this implicit assumption.
131
+ """
132
+ if len(sig.args) != 3:
133
+ return None
134
+
135
+ alignment_arg = sig.args[2]
136
+ if not isinstance(alignment_arg, types.IntegerLiteral):
137
+ return None
138
+
139
+ alignment_arg = alignment_arg.literal_value
140
+ _validate_alignment(alignment_arg)
141
+ return alignment_arg
142
+
143
+
95
144
  @lower(cuda.shared.array, types.IntegerLiteral, types.Any)
145
+ @lower(cuda.shared.array, types.IntegerLiteral, types.Any, types.IntegerLiteral)
146
+ @lower(cuda.shared.array, types.IntegerLiteral, types.Any, types.NoneType)
96
147
  def cuda_shared_array_integer(context, builder, sig, args):
97
148
  length = sig.args[0].literal_value
98
149
  dtype = parse_dtype(sig.args[1])
150
+ alignment = _try_extract_and_validate_alignment(sig)
99
151
  return _generic_array(
100
152
  context,
101
153
  builder,
@@ -104,14 +156,17 @@ def cuda_shared_array_integer(context, builder, sig, args):
104
156
  symbol_name=_get_unique_smem_id("_cudapy_smem"),
105
157
  addrspace=nvvm.ADDRSPACE_SHARED,
106
158
  can_dynsized=True,
159
+ alignment=alignment,
107
160
  )
108
161
 
109
162
 
110
- @lower(cuda.shared.array, types.Tuple, types.Any)
111
- @lower(cuda.shared.array, types.UniTuple, types.Any)
163
+ @lower(cuda.shared.array, types.BaseTuple, types.Any)
164
+ @lower(cuda.shared.array, types.BaseTuple, types.Any, types.IntegerLiteral)
165
+ @lower(cuda.shared.array, types.BaseTuple, types.Any, types.NoneType)
112
166
  def cuda_shared_array_tuple(context, builder, sig, args):
113
167
  shape = [s.literal_value for s in sig.args[0]]
114
168
  dtype = parse_dtype(sig.args[1])
169
+ alignment = _try_extract_and_validate_alignment(sig)
115
170
  return _generic_array(
116
171
  context,
117
172
  builder,
@@ -120,13 +175,17 @@ def cuda_shared_array_tuple(context, builder, sig, args):
120
175
  symbol_name=_get_unique_smem_id("_cudapy_smem"),
121
176
  addrspace=nvvm.ADDRSPACE_SHARED,
122
177
  can_dynsized=True,
178
+ alignment=alignment,
123
179
  )
124
180
 
125
181
 
126
182
  @lower(cuda.local.array, types.IntegerLiteral, types.Any)
183
+ @lower(cuda.local.array, types.IntegerLiteral, types.Any, types.IntegerLiteral)
184
+ @lower(cuda.local.array, types.IntegerLiteral, types.Any, types.NoneType)
127
185
  def cuda_local_array_integer(context, builder, sig, args):
128
186
  length = sig.args[0].literal_value
129
187
  dtype = parse_dtype(sig.args[1])
188
+ alignment = _try_extract_and_validate_alignment(sig)
130
189
  return _generic_array(
131
190
  context,
132
191
  builder,
@@ -135,14 +194,17 @@ def cuda_local_array_integer(context, builder, sig, args):
135
194
  symbol_name="_cudapy_lmem",
136
195
  addrspace=nvvm.ADDRSPACE_LOCAL,
137
196
  can_dynsized=False,
197
+ alignment=alignment,
138
198
  )
139
199
 
140
200
 
141
- @lower(cuda.local.array, types.Tuple, types.Any)
142
- @lower(cuda.local.array, types.UniTuple, types.Any)
143
- def ptx_lmem_alloc_array(context, builder, sig, args):
201
+ @lower(cuda.local.array, types.BaseTuple, types.Any)
202
+ @lower(cuda.local.array, types.BaseTuple, types.Any, types.IntegerLiteral)
203
+ @lower(cuda.local.array, types.BaseTuple, types.Any, types.NoneType)
204
+ def cuda_local_array_tuple(context, builder, sig, args):
144
205
  shape = [s.literal_value for s in sig.args[0]]
145
206
  dtype = parse_dtype(sig.args[1])
207
+ alignment = _try_extract_and_validate_alignment(sig)
146
208
  return _generic_array(
147
209
  context,
148
210
  builder,
@@ -151,6 +213,7 @@ def ptx_lmem_alloc_array(context, builder, sig, args):
151
213
  symbol_name="_cudapy_lmem",
152
214
  addrspace=nvvm.ADDRSPACE_LOCAL,
153
215
  can_dynsized=False,
216
+ alignment=alignment,
154
217
  )
155
218
 
156
219
 
@@ -966,7 +1029,14 @@ def ptx_nanosleep(context, builder, sig, args):
966
1029
 
967
1030
 
968
1031
  def _generic_array(
969
- context, builder, shape, dtype, symbol_name, addrspace, can_dynsized=False
1032
+ context,
1033
+ builder,
1034
+ shape,
1035
+ dtype,
1036
+ symbol_name,
1037
+ addrspace,
1038
+ can_dynsized=False,
1039
+ alignment=None,
970
1040
  ):
971
1041
  elemcount = reduce(operator.mul, shape, 1)
972
1042
 
@@ -994,6 +1064,14 @@ def _generic_array(
994
1064
  # NVVM is smart enough to only use local memory if no register is
995
1065
  # available
996
1066
  dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name)
1067
+
1068
+ # If the caller has specified a custom alignment, just set the align
1069
+ # attribute on the alloca IR directly. We don't do any additional
1070
+ # hand-holding here like checking the underlying data type's alignment
1071
+ # or rounding up to the next power of 2--those checks will have already
1072
+ # been done by the time we see the alignment value.
1073
+ if alignment is not None:
1074
+ dataptr.align = alignment
997
1075
  else:
998
1076
  lmod = builder.module
999
1077
 
@@ -1001,11 +1079,25 @@ def _generic_array(
1001
1079
  gvmem = cgutils.add_global_variable(
1002
1080
  lmod, laryty, symbol_name, addrspace
1003
1081
  )
1004
- # Specify alignment to avoid misalignment bug
1005
- align = context.get_abi_sizeof(lldtype)
1006
- # Alignment is required to be a power of 2 for shared memory. If it is
1007
- # not a power of 2 (e.g. for a Record array) then round up accordingly.
1008
- gvmem.align = 1 << (align - 1).bit_length()
1082
+
1083
+ # If the caller hasn't specified a custom alignment, obtain the
1084
+ # underlying dtype alignment from the ABI and then round it up to
1085
+ # a power of two. Otherwise, just use the caller's alignment.
1086
+ #
1087
+ # N.B. The caller *could* provide a valid-but-smaller-than-natural
1088
+ # alignment here; we'll assume the caller knows what they're
1089
+ # doing and let that through without error.
1090
+
1091
+ if alignment is None:
1092
+ abi_alignment = context.get_abi_alignment(lldtype)
1093
+ # Alignment is required to be a power of 2 for shared memory.
1094
+ # If it is not a power of 2 (e.g. for a Record array) then round
1095
+ # up accordingly.
1096
+ actual_alignment = 1 << (abi_alignment - 1).bit_length()
1097
+ else:
1098
+ actual_alignment = alignment
1099
+
1100
+ gvmem.align = actual_alignment
1009
1101
 
1010
1102
  if dynamic_smem:
1011
1103
  gvmem.linkage = "external"
@@ -250,4 +250,6 @@ def declare_device(name, sig, link=None):
250
250
  msg = "Return type must be provided for device declarations"
251
251
  raise TypeError(msg)
252
252
 
253
- return declare_device_function(name, restype, argtypes, link)
253
+ template = declare_device_function(name, restype, argtypes, link)
254
+
255
+ return template.key