PyPI - numba-cuda - Versions diffs - 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl - Mend

numba-cuda 0.16.0py3-none-any.whl → 0.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +0 -8
numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
numba_cuda/numba/cuda/api_util.py +6 -0
numba_cuda/numba/cuda/cgutils.py +1291 -0
numba_cuda/numba/cuda/codegen.py +32 -14
numba_cuda/numba/cuda/compiler.py +113 -10
numba_cuda/numba/cuda/core/caching.py +741 -0
numba_cuda/numba/cuda/core/callconv.py +338 -0
numba_cuda/numba/cuda/core/codegen.py +168 -0
numba_cuda/numba/cuda/core/compiler.py +205 -0
numba_cuda/numba/cuda/core/typed_passes.py +139 -0
numba_cuda/numba/cuda/cuda_paths.py +1 -1
numba_cuda/numba/cuda/cudadecl.py +0 -268
numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
numba_cuda/numba/cuda/cudadrv/devices.py +4 -6
numba_cuda/numba/cuda/cudadrv/driver.py +105 -50
numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
numba_cuda/numba/cuda/cudaimpl.py +4 -178
numba_cuda/numba/cuda/debuginfo.py +469 -3
numba_cuda/numba/cuda/device_init.py +0 -1
numba_cuda/numba/cuda/dispatcher.py +311 -14
numba_cuda/numba/cuda/extending.py +2 -1
numba_cuda/numba/cuda/fp16.py +348 -0
numba_cuda/numba/cuda/intrinsics.py +1 -1
numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
numba_cuda/numba/cuda/lowering.py +1833 -8
numba_cuda/numba/cuda/mathimpl.py +2 -90
numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
numba_cuda/numba/cuda/nvvmutils.py +2 -1
numba_cuda/numba/cuda/printimpl.py +2 -1
numba_cuda/numba/cuda/serialize.py +264 -0
numba_cuda/numba/cuda/simulator/__init__.py +2 -0
numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
numba_cuda/numba/cuda/stubs.py +0 -308
numba_cuda/numba/cuda/target.py +13 -5
numba_cuda/numba/cuda/testing.py +156 -5
numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +16 -5
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +5 -1
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +1 -5
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +94 -24
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +2 -5
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
numba_cuda/numba/cuda/utils.py +785 -0
numba_cuda/numba/cuda/vector_types.py +1 -1
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/METADATA +18 -4
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/RECORD +69 -56
numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py CHANGED Viewed

@@ -2,7 +2,6 @@ import numpy as np
 from numba import vectorize, guvectorize
 from numba import cuda
-from numba.cuda.cudadrv import driver
 from numba.cuda.testing import unittest, ContextResettingTestCase, ForeignArray
 from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
 from numba.tests.support import linux_only, override_config
@@ -32,10 +31,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
         self.assertPointersEqual(wrapped, d_arr)
     def get_stream_value(self, stream):
-        if driver.USE_NV_BINDING:
-            return int(stream.handle)
-        else:
-            return stream.handle.value
+        return stream.handle.value
     @skip_if_external_memmgr("Ownership not relevant with external memmgr")
     def test_ownership(self):

numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py CHANGED Viewed

@@ -403,6 +403,387 @@ class TestCudaDebugInfo(CUDATestCase):
         match = re.compile(pat6).search(llvm_ir)
         self.assertIsNotNone(match, msg=llvm_ir)
+    def test_DW_LANG(self):
+        @cuda.jit(debug=True)
+        def foo():
+            """
+            CHECK: distinct !DICompileUnit
+            CHECK-SAME: emissionKind: FullDebug
+            CHECK-SAME: isOptimized: true
+            CHECK-SAME: language: DW_LANG_C_plus_plus
+            CHECK-SAME: producer: "clang (Numba)"
+            """
+            pass
+        foo[1, 1]()
+        llvm_ir = foo.inspect_llvm()[tuple()]
+        self.assertFileCheckMatches(llvm_ir, foo.__doc__)
+    def test_DILocation(self):
+        """Tests that DILocation information is reasonable.
+        The kernel `foo` produces LLVM like:
+        define function() {
+        entry:
+          alloca
+          store 0 to alloca
+          <arithmetic for doing the operations on b, c, d>
+          setup for print
+          branch
+        other_labels:
+        ... <elided>
+        }
+        The following checks that:
+        * the alloca and store have no !dbg
+        * the arithmetic occurs in the order defined and with !dbg
+        * that the !dbg entries are monotonically increasing in value with
+          source line number
+        """
+        sig = (types.float64,)
+        @cuda.jit(sig, debug=True)
+        def foo(a):
+            """
+            CHECK-LABEL: define void @{{.+}}foo
+            CHECK: entry:
+            CHECK: %[[VAL_0:.*]] = alloca double
+            CHECK-NOT: !dbg
+            CHECK: store double 0.0, double* %[[VAL_0]]
+            CHECK-NOT: !dbg
+            CHECK: %[[VAL_1:.*]] = alloca double
+            CHECK-NOT: !dbg
+            CHECK: store double 0.0, double* %[[VAL_1]]
+            CHECK-NOT: !dbg
+            CHECK: %[[VAL_2:.*]] = alloca double
+            CHECK-NOT: !dbg
+            CHECK: store double 0.0, double* %[[VAL_2]]
+            CHECK-NOT: !dbg
+            CHECK: %[[VAL_3:.*]] = alloca double
+            CHECK-NOT: !dbg
+            CHECK: store double 0.0, double* %[[VAL_3]]
+            CHECK-NOT: !dbg
+            CHECK: %[[VAL_4:.*]] = alloca double
+            CHECK-NOT: !dbg
+            CHECK: store double 0.0, double* %[[VAL_4]]
+            CHECK-NOT: !dbg
+            CHECK: %[[VAL_5:.*]] = alloca double
+            CHECK-NOT: !dbg
+            CHECK: store double 0.0, double* %[[VAL_5]]
+            CHECK-NOT: !dbg
+            CHECK: %[[VAL_6:.*]] = alloca i8*
+            CHECK-NOT: !dbg
+            CHECK: store i8* null, i8** %[[VAL_6]]
+            CHECK-NOT: !dbg
+            CHECK: %[[VAL_7:.*]] = alloca i8*
+            CHECK-NOT: !dbg
+            CHECK: store i8* null, i8** %[[VAL_7]]
+            CHECK-NOT: !dbg
+            CHECK: br label %"[[ENTRY:.+]]"
+            CHECK-NOT: !dbg
+            CHECK: [[ENTRY]]:
+            CHECK: fadd{{.+}} !dbg ![[DBGADD:[0-9]+]]
+            CHECK: fmul{{.+}} !dbg ![[DBGMUL:[0-9]+]]
+            CHECK: fdiv{{.+}} !dbg ![[DBGDIV:[0-9]+]]
+            CHECK: ![[DBGADD]] = !DILocation
+            CHECK: ![[DBGMUL]] = !DILocation
+            CHECK: ![[DBGDIV]] = !DILocation
+            """
+            b = a + 1.23
+            c = b * 2.34
+            a = b / c
+        ir = foo.inspect_llvm()[sig]
+        self.assertFileCheckMatches(ir, foo.__doc__)
+    def test_DITypes(self):
+        """Tests that DITypes are emitted for the types used in the kernel."""
+        sig = (
+            types.float32,
+            types.float64,
+            types.int8,
+            types.int16,
+            types.int32,
+            types.int64,
+            types.uint8,
+            types.uint16,
+            types.uint32,
+            types.uint64,
+            types.complex64,
+            types.complex128,
+        )
+        @cuda.jit(sig, debug=True)
+        def foo(a, b, c, d, e, f, g, h, i, j, k, l):
+            """
+            CHECK: [[DBG1:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
+            CHECK: [[DBG2:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG1]]
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_pointer_type
+            CHECK: [[DBG3:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG2]]
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_pointer_type
+            CHECK: [[DBG4:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
+            CHECK: [[DBG5:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float64", size: 64)
+            CHECK: [[DBG6:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int8", size: 8)
+            CHECK: [[DBG7:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int16", size: 16)
+            CHECK: [[DBG8:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int32", size: 32)
+            CHECK: [[DBG9:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
+            CHECK: [[DBG10:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint8", size: 8)
+            CHECK: [[DBG11:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint16", size: 16)
+            CHECK: [[DBG12:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint32", size: 32)
+            CHECK: [[DBG13:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint64", size: 64)
+            CHECK: [[DBG14:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
+            CHECK: [[DBG15:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG14]]
+            CHECK-SAME: name: "real"
+            CHECK-SAME: offset: 0
+            CHECK-SAME: size: 32
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK: [[DBG16:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
+            CHECK: [[DBG17:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG16]]
+            CHECK-SAME: name: "imag"
+            CHECK-SAME: offset: 32
+            CHECK-SAME: size: 32
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK: [[DBG18:.+]] = !{ [[DBG15]], [[DBG17]] }
+            CHECK: [[DBG19:.+]] = distinct !DICompositeType(
+            CHECK-SAME: elements: [[DBG18]]
+            CHECK-SAME: name: "complex64 ({float, float})"
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_structure_type
+            CHECK: [[DBG20:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
+            CHECK: [[DBG21:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG20]]
+            CHECK-SAME: name: "real"
+            CHECK-SAME: offset: 0
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK: [[DBG22:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
+            CHECK: [[DBG23:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG22]]
+            CHECK-SAME: name: "imag"
+            CHECK-SAME: offset: 64
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK: [[DBG24:.+]] = !{ [[DBG21]], [[DBG23]] }
+            CHECK: [[DBG25:.+]] = distinct !DICompositeType(
+            CHECK-SAME: elements: [[DBG24]]
+            CHECK-SAME: name: "complex128 ({double, double})"
+            CHECK-SAME: size: 128
+            CHECK-SAME: tag: DW_TAG_structure_type
+            CHECK: [[DBG32:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
+            CHECK: [[DBG33:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "a"
+            CHECK-SAME: type: [[DBG32]]
+            CHECK: [[DBG34:.+]] = !DIExpression()
+            CHECK: [[DBG35:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float64", size: 64)
+            CHECK: [[DBG36:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "b"
+            CHECK-SAME: type: [[DBG35]]
+            CHECK: [[DBG37:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int8", size: 8)
+            CHECK: [[DBG38:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "c"
+            CHECK-SAME: type: [[DBG37]]
+            CHECK: [[DBG39:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int16", size: 16)
+            CHECK: [[DBG40:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "d"
+            CHECK-SAME: type: [[DBG39]]
+            CHECK: [[DBG41:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int32", size: 32)
+            CHECK: [[DBG42:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "e"
+            CHECK-SAME: type: [[DBG41]]
+            CHECK: [[DBG43:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
+            CHECK: [[DBG44:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "f"
+            CHECK-SAME: type: [[DBG43]]
+            CHECK: [[DBG45:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint8", size: 8)
+            CHECK: [[DBG46:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "g"
+            CHECK-SAME: type: [[DBG45]]
+            CHECK: [[DBG47:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint16", size: 16)
+            CHECK: [[DBG48:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "h"
+            CHECK-SAME: type: [[DBG47]]
+            CHECK: [[DBG49:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint32", size: 32)
+            CHECK: [[DBG50:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "i"
+            CHECK-SAME: type: [[DBG49]]
+            CHECK: [[DBG51:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint64", size: 64)
+            CHECK: [[DBG52:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "j"
+            CHECK-SAME: type: [[DBG51]]
+            CHECK: [[DBG53:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
+            CHECK: [[DBG54:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG53]]
+            CHECK-SAME: name: "real"
+            CHECK-SAME: offset: 0
+            CHECK-SAME: size: 32
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK: [[DBG55:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
+            CHECK: [[DBG56:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG55]]
+            CHECK-SAME: name: "imag"
+            CHECK-SAME: offset: 32
+            CHECK-SAME: size: 32
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK: [[DBG57:.+]] = !{ [[DBG54]], [[DBG56]] }
+            CHECK: [[DBG58:.+]] = distinct !DICompositeType(
+            CHECK-SAME: elements: [[DBG57]]
+            CHECK-SAME: name: "complex64 ({float, float})"
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_structure_type
+            CHECK: [[DBG59:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "k"
+            CHECK-SAME: type: [[DBG58]]
+            CHECK: [[DBG60:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
+            CHECK: [[DBG61:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG60]]
+            CHECK-SAME: name: "real"
+            CHECK-SAME: offset: 0
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK: [[DBG62:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
+            CHECK: [[DBG63:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG62]]
+            CHECK-SAME: name: "imag"
+            CHECK-SAME: offset: 64
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK: [[DBG64:.+]] = !{ [[DBG61]], [[DBG63]] }
+            CHECK: [[DBG65:.+]] = distinct !DICompositeType(
+            CHECK-SAME: elements: [[DBG64]]
+            CHECK-SAME: name: "complex128 ({double, double})"
+            CHECK-SAME: size: 128
+            CHECK-SAME: tag: DW_TAG_structure_type
+            CHECK: [[DBG66:.+]] = !DILocalVariable(
+            CHECK-SAME: name: "l"
+            CHECK-SAME: type: [[DBG65]]
+            """
+            pass
+        ir = foo.inspect_llvm()[sig]
+        self.assertFileCheckMatches(ir, foo.__doc__)
+    def test_arrays(self):
+        """Tests that arrays are emitted as DIBasicType."""
+        sig = (types.float32[::1],)
+        @cuda.jit(sig, debug=True)
+        def foo(a):
+            """
+            CHECK: distinct !DICompileUnit
+            CHECK: distinct !DISubprogram
+            CHECK: [[DBG127:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
+            CHECK: [[DBG128:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG127]]
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_pointer_type
+            CHECK-SAME: )
+            CHECK: [[DBG129:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG128]]
+            CHECK-SAME: name: "meminfo"
+            CHECK-SAME: offset: 0
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK-SAME: )
+            CHECK: [[DBG130:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
+            CHECK: [[DBG131:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG130]]
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_pointer_type
+            CHECK-SAME: )
+            CHECK: [[DBG132:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG131]]
+            CHECK-SAME: name: "parent"
+            CHECK-SAME: offset: 64
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK-SAME: )
+            CHECK: [[DBG133:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
+            CHECK: [[DBG134:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG133]]
+            CHECK-SAME: name: "nitems"
+            CHECK-SAME: offset: 128
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK-SAME: )
+            CHECK: [[DBG135:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
+            CHECK: [[DBG136:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG135]]
+            CHECK-SAME: name: "itemsize"
+            CHECK-SAME: offset: 192
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK-SAME: )
+            CHECK: [[DBG137:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
+            CHECK: [[DBG138:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG137]]
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_pointer_type
+            CHECK-SAME: )
+            CHECK: [[DBG139:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG138]]
+            CHECK-SAME: name: "data"
+            CHECK-SAME: offset: 256
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK-SAME: )
+            CHECK: [[DBG140:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "i64", size: 64)
+            CHECK: [[DBG141:.+]] = !DICompositeType(
+            CHECK-SAME: baseType: [[DBG140]]
+            CHECK-SAME: identifier: "[1 x i64]"
+            CHECK-SAME: name: "UniTuple(int64 x 1) ([1 x i64])"
+            CHECK-SAME: tag: DW_TAG_array_type
+            CHECK-SAME: )
+            CHECK: [[DBG142:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG141]]
+            CHECK-SAME: name: "shape"
+            CHECK-SAME: offset: 320
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK-SAME: )
+            CHECK: [[DBG143:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "i64", size: 64)
+            CHECK: [[DBG144:.+]] = !DICompositeType(
+            CHECK-SAME: baseType: [[DBG143]]
+            CHECK-SAME: identifier: "[1 x i64]"
+            CHECK-SAME: name: "UniTuple(int64 x 1) ([1 x i64])"
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_array_type
+            CHECK-SAME: )
+            CHECK: [[DBG145:.+]] = !DIDerivedType(
+            CHECK-SAME: baseType: [[DBG144]]
+            CHECK-SAME: name: "strides"
+            CHECK-SAME: offset: 384
+            CHECK-SAME: size: 64
+            CHECK-SAME: tag: DW_TAG_member
+            CHECK-SAME: )
+            CHECK: [[DBG146:.+]] = !{ [[DBG129]], [[DBG132]], [[DBG134]], [[DBG136]], [[DBG139]], [[DBG142]], [[DBG145]] }
+            CHECK: [[DBG147:.+]] = distinct !DICompositeType(
+            CHECK-SAME: elements: [[DBG146]]
+            CHECK-SAME: identifier: "{i8*, i8*, i64, i64, float*, [1 x i64], [1 x i64]}"
+            CHECK-SAME: name: "array(float32, 1d, C) ({i8*, i8*, i64, i64, float*, [1 x i64], [1 x i64]})"
+            CHECK-SAME: size: 448
+            CHECK-SAME: tag: DW_TAG_structure_type
+            CHECK-SAME: )
+            CHECK: !DILocalVariable(
+            CHECK-SAME: name: "a"
+            CHECK-SAME: type: [[DBG147]]
+            CHECK-SAME: )
+            """
+            pass
+        ir = foo.inspect_llvm()[sig]
+        self.assertFileCheckMatches(ir, foo.__doc__)
 if __name__ == "__main__":
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_enums.py CHANGED Viewed

@@ -8,7 +8,7 @@ from numba import int16, int32
 from numba import cuda, vectorize, njit
 from numba.core import types
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
-from numba.tests.enum_usecases import (
+from numba.cuda.tests.enum_usecases import (
     Color,
     Shape,
     Planet,

numba_cuda/numba/cuda/tests/cudapy/test_extending.py CHANGED Viewed

@@ -36,7 +36,7 @@ def sum_intervals(i, j):
 if not config.ENABLE_CUDASIM:
-    from numba.core import cgutils
+    from numba.cuda import cgutils
     from numba.core.extending import (
         lower_builtin,
         models,

numba_cuda/numba/cuda/tests/cudapy/test_inspect.py CHANGED Viewed

@@ -1,12 +1,18 @@
+import re
+import cffi
 import numpy as np
 from io import StringIO
 from numba import cuda, float32, float64, int32, intp
+from numba.types import float16, CPointer
+from numba.cuda import declare_device
 from numba.cuda.testing import unittest, CUDATestCase
 from numba.cuda.testing import (
     skip_on_cudasim,
     skip_with_nvdisasm,
     skip_without_nvdisasm,
+    skip_if_nvjitlink_missing,
 )
@@ -21,6 +27,17 @@ class TestInspect(CUDATestCase):
         @cuda.jit(sig)
         def foo(x, y):
+            """
+            // LLVM: define void
+            // LLVM-SAME: foo
+            // LLVM-LABEL: entry:
+            // LLVM-NEXT:         br label %"[[VAL_0:.*]]"
+            // LLVM-NEXT:       [[VAL_0]]:
+            // LLVM-NEXT:         ret void
+            // ASM: Generated by NVIDIA NVVM Compiler
+            // ASM: foo
+            """
             pass
         file = StringIO()
@@ -31,28 +48,43 @@ class TestInspect(CUDATestCase):
         # Signature in annotation
         self.assertIn("(float32, int32)", typeanno)
         file.close()
-        # Function name in LLVM
-        llvm = foo.inspect_llvm(sig)
-        self.assertIn("foo", llvm)
-        # Kernel in LLVM
-        self.assertIn("define void @", llvm)
-        asm = foo.inspect_asm(sig)
-        # Function name in PTX
-        self.assertIn("foo", asm)
-        # NVVM inserted comments in PTX
-        self.assertIn("Generated by NVIDIA NVVM Compiler", asm)
+        self.assertFileCheckLLVM(foo, sig)
+        self.assertFileCheckAsm(foo, sig)
     def test_polytyped(self):
         @cuda.jit
         def foo(x, y):
+            """
+            // LLVM: define void
+            // LLVM-SAME: foo
+            // LLVM_INT-SAME: i64
+            // LLVM_INT-SAME: i64
+            // LLVM_FLOAT-SAME: double
+            // LLVM_FLOAT-SAME: double
+            // ASM: Generated by NVIDIA NVVM Compiler
+            // ASM: .visible
+            // ASM-SAME: .entry
+            // ASM-SAME: foo
+            """
             pass
         foo[1, 1](1, 1)
         foo[1, 1](1.2, 2.4)
+        int_sig = (intp, intp)
+        float_sig = (float64, float64)
+        self.assertFileCheckLLVM(
+            foo, int_sig, check_prefixes=["LLVM", "LLVM_INT"]
+        )
+        self.assertFileCheckAsm(foo, int_sig, check_prefixes=["ASM"])
+        self.assertFileCheckLLVM(
+            foo, float_sig, check_prefixes=["LLVM", "LLVM_FLOAT"]
+        )
+        self.assertFileCheckAsm(foo, float_sig, check_prefixes=["ASM"])
         file = StringIO()
         foo.inspect_types(file=file)
         typeanno = file.getvalue()
@@ -70,14 +102,6 @@ class TestInspect(CUDATestCase):
         self.assertIn((intp, intp), llvmirs)
         self.assertIn((float64, float64), llvmirs)
-        # Function name in LLVM
-        self.assertIn("foo", llvmirs[intp, intp])
-        self.assertIn("foo", llvmirs[float64, float64])
-        # Kernels in LLVM
-        self.assertIn("define void @", llvmirs[intp, intp])
-        self.assertIn("define void @", llvmirs[float64, float64])
         asmdict = foo.inspect_asm()
         # Signature in assembly dict
@@ -88,10 +112,6 @@ class TestInspect(CUDATestCase):
         self.assertIn((intp, intp), asmdict)
         self.assertIn((float64, float64), asmdict)
-        # NVVM inserted in PTX
-        self.assertIn("foo", asmdict[intp, intp])
-        self.assertIn("foo", asmdict[float64, float64])
     def _test_inspect_sass(self, kernel, name, sass):
         # Ensure function appears in output
         seen_function = False
@@ -108,6 +128,56 @@ class TestInspect(CUDATestCase):
         self.assertIn("BRA", sass)  # Branch
         self.assertIn("EXIT", sass)  # Exit program
+    @skip_on_cudasim("Simulator does not generate code to be inspected")
+    @skip_if_nvjitlink_missing("nvJitLink is required for LTO")
+    def test_inspect_lto_asm(self):
+        ffi = cffi.FFI()
+        ext = cuda.CUSource("""
+            #include <cuda_fp16.h>
+            extern "C"
+            __device__ int add_f2_f2(__half * res, __half * a, __half *b) {
+                *res = *a + *b;
+                return 0;
+            }
+            """)
+        add = declare_device(
+            "add_f2_f2",
+            float16(CPointer(float16), CPointer(float16)),
+            link=ext,
+        )
+        @cuda.jit
+        def k(arr):
+            local_arr = cuda.local.array(shape=1, dtype=np.float16)
+            local_arr2 = cuda.local.array(shape=1, dtype=np.float16)
+            local_arr[0] = 1
+            local_arr2[0] = 2
+            ptr = ffi.from_buffer(local_arr)
+            ptr2 = ffi.from_buffer(local_arr2)
+            arr[0] = add(ptr, ptr2)
+        arr = np.array([0], dtype=np.float16)
+        k[1, 1](arr)
+        allasms = k.inspect_asm()
+        asm = next(iter(allasms.values()))
+        regex = re.compile(r"call(.|\n)*add_f2_f2")
+        self.assertRegex(asm, regex)
+        all_ext_asms = k.inspect_lto_ptx()
+        lto_asm = next(iter(all_ext_asms.values()))
+        self.assertIn("add.f16", lto_asm)
+        self.assertNotIn("call", lto_asm)
+        np.testing.assert_equal(arr[0], np.float16(1) + np.float16(2))
     @skip_without_nvdisasm("nvdisasm needed for inspect_sass()")
     def test_inspect_sass_eager(self):
         sig = (float32[::1], int32[::1])

numba-cuda 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

numba-cuda 0.16.0py3-none-any.whl → 0.18.0py3-none-any.whl