numba-cuda 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/compiler.py +14 -1
  3. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  4. numba_cuda/numba/cuda/cuda_paths.py +2 -0
  5. numba_cuda/numba/cuda/cudadecl.py +0 -42
  6. numba_cuda/numba/cuda/cudadrv/linkable_code.py +11 -2
  7. numba_cuda/numba/cuda/cudadrv/nvrtc.py +10 -3
  8. numba_cuda/numba/cuda/cudaimpl.py +0 -63
  9. numba_cuda/numba/cuda/debuginfo.py +92 -2
  10. numba_cuda/numba/cuda/decorators.py +13 -1
  11. numba_cuda/numba/cuda/device_init.py +4 -5
  12. numba_cuda/numba/cuda/extending.py +54 -0
  13. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  14. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  15. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +550 -387
  16. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +465 -316
  17. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  18. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  19. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  20. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  21. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  22. numba_cuda/numba/cuda/intrinsics.py +172 -1
  23. numba_cuda/numba/cuda/lowering.py +43 -0
  24. numba_cuda/numba/cuda/stubs.py +0 -11
  25. numba_cuda/numba/cuda/target.py +28 -0
  26. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -2
  27. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +1 -1
  28. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  29. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +1 -1
  30. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +46 -0
  31. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +18 -0
  32. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +4 -2
  33. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
  34. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  35. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +50 -5
  36. numba_cuda/numba/cuda/vector_types.py +3 -1
  37. numba_cuda/numba/cuda/vectorizers.py +1 -1
  38. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
  39. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/RECORD +42 -32
  40. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
  41. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
  42. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.9.0
1
+ 0.10.0
@@ -40,6 +40,7 @@ from numba.cuda.api import get_current_device
40
40
  from numba.cuda.cudadrv import nvvm
41
41
  from numba.cuda.descriptor import cuda_target
42
42
  from numba.cuda.target import CUDACABICallConv
43
+ from numba.cuda import lowering
43
44
 
44
45
 
45
46
  def _nvvm_options_type(x):
@@ -163,6 +164,18 @@ class CreateLibrary(LoweringPass):
163
164
  return True
164
165
 
165
166
 
167
+ @register_pass(mutates_CFG=True, analysis_only=False)
168
+ class CUDANativeLowering(NativeLowering):
169
+ """Lowering pass for a CUDA native function IR described solely in terms of
170
+ Numba's standard `numba.core.ir` nodes."""
171
+
172
+ _name = "cuda_native_lowering"
173
+
174
+ @property
175
+ def lowering_class(self):
176
+ return lowering.CUDALower
177
+
178
+
166
179
  class CUDABytecodeInterpreter(Interpreter):
167
180
  # Based on the superclass implementation, but names the resulting variable
168
181
  # "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
@@ -251,7 +264,7 @@ class CUDACompiler(CompilerBase):
251
264
 
252
265
  # lower
253
266
  pm.add_pass(CreateLibrary, "create library")
254
- pm.add_pass(NativeLowering, "native lowering")
267
+ pm.add_pass(CUDANativeLowering, "cuda native lowering")
255
268
  pm.add_pass(CUDABackend, "cuda backend")
256
269
 
257
270
  pm.finalize()