numba-cuda 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/compiler.py +14 -1
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +2 -0
- numba_cuda/numba/cuda/cudadecl.py +0 -42
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +11 -2
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +10 -3
- numba_cuda/numba/cuda/cudaimpl.py +0 -63
- numba_cuda/numba/cuda/debuginfo.py +92 -2
- numba_cuda/numba/cuda/decorators.py +13 -1
- numba_cuda/numba/cuda/device_init.py +4 -5
- numba_cuda/numba/cuda/extending.py +54 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +550 -387
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +465 -316
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +172 -1
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/stubs.py +0 -11
- numba_cuda/numba/cuda/target.py +28 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -2
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +18 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +4 -2
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +50 -5
- numba_cuda/numba/cuda/vector_types.py +3 -1
- numba_cuda/numba/cuda/vectorizers.py +1 -1
- {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/RECORD +42 -32
- {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.10.0
|
@@ -40,6 +40,7 @@ from numba.cuda.api import get_current_device
|
|
40
40
|
from numba.cuda.cudadrv import nvvm
|
41
41
|
from numba.cuda.descriptor import cuda_target
|
42
42
|
from numba.cuda.target import CUDACABICallConv
|
43
|
+
from numba.cuda import lowering
|
43
44
|
|
44
45
|
|
45
46
|
def _nvvm_options_type(x):
|
@@ -163,6 +164,18 @@ class CreateLibrary(LoweringPass):
|
|
163
164
|
return True
|
164
165
|
|
165
166
|
|
167
|
+
@register_pass(mutates_CFG=True, analysis_only=False)
|
168
|
+
class CUDANativeLowering(NativeLowering):
|
169
|
+
"""Lowering pass for a CUDA native function IR described solely in terms of
|
170
|
+
Numba's standard `numba.core.ir` nodes."""
|
171
|
+
|
172
|
+
_name = "cuda_native_lowering"
|
173
|
+
|
174
|
+
@property
|
175
|
+
def lowering_class(self):
|
176
|
+
return lowering.CUDALower
|
177
|
+
|
178
|
+
|
166
179
|
class CUDABytecodeInterpreter(Interpreter):
|
167
180
|
# Based on the superclass implementation, but names the resulting variable
|
168
181
|
# "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
|
@@ -251,7 +264,7 @@ class CUDACompiler(CompilerBase):
|
|
251
264
|
|
252
265
|
# lower
|
253
266
|
pm.add_pass(CreateLibrary, "create library")
|
254
|
-
pm.add_pass(
|
267
|
+
pm.add_pass(CUDANativeLowering, "cuda native lowering")
|
255
268
|
pm.add_pass(CUDABackend, "cuda backend")
|
256
269
|
|
257
270
|
pm.finalize()
|