PyPI - numba-cuda - Versions diffs - 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl - Mend

numba-cuda 0.17.0py3-none-any.whl → 0.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +0 -8
numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
numba_cuda/numba/cuda/api_util.py +6 -0
numba_cuda/numba/cuda/cgutils.py +1291 -0
numba_cuda/numba/cuda/codegen.py +32 -14
numba_cuda/numba/cuda/compiler.py +113 -10
numba_cuda/numba/cuda/core/caching.py +741 -0
numba_cuda/numba/cuda/core/callconv.py +338 -0
numba_cuda/numba/cuda/core/codegen.py +168 -0
numba_cuda/numba/cuda/core/compiler.py +205 -0
numba_cuda/numba/cuda/core/typed_passes.py +139 -0
numba_cuda/numba/cuda/cudadecl.py +0 -268
numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
numba_cuda/numba/cuda/cudadrv/driver.py +2 -1
numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
numba_cuda/numba/cuda/cudaimpl.py +4 -178
numba_cuda/numba/cuda/debuginfo.py +469 -3
numba_cuda/numba/cuda/device_init.py +0 -1
numba_cuda/numba/cuda/dispatcher.py +309 -11
numba_cuda/numba/cuda/extending.py +2 -1
numba_cuda/numba/cuda/fp16.py +348 -0
numba_cuda/numba/cuda/intrinsics.py +1 -1
numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
numba_cuda/numba/cuda/lowering.py +1833 -8
numba_cuda/numba/cuda/mathimpl.py +2 -90
numba_cuda/numba/cuda/nvvmutils.py +2 -1
numba_cuda/numba/cuda/printimpl.py +2 -1
numba_cuda/numba/cuda/serialize.py +264 -0
numba_cuda/numba/cuda/simulator/__init__.py +2 -0
numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
numba_cuda/numba/cuda/stubs.py +0 -308
numba_cuda/numba/cuda/target.py +13 -5
numba_cuda/numba/cuda/testing.py +156 -5
numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +94 -24
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
numba_cuda/numba/cuda/utils.py +785 -0
numba_cuda/numba/cuda/vector_types.py +1 -1
{numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/METADATA +18 -4
{numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/RECORD +61 -48
numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
{numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/stubs.py CHANGED Viewed

@@ -542,314 +542,6 @@ class nanosleep(Stub):
     _description_ = "<nansleep()>"
-# -------------------------------------------------------------------------------
-# Floating point 16
-class fp16(Stub):
-    """Namespace for fp16 operations"""
-    _description_ = "<fp16>"
-    class hadd(Stub):
-        """hadd(a, b)
-        Perform fp16 addition, (a + b) in round to nearest mode. Supported
-        on fp16 operands only.
-        Returns the fp16 result of the addition.
-        """
-    class hsub(Stub):
-        """hsub(a, b)
-        Perform fp16 subtraction, (a - b) in round to nearest mode. Supported
-        on fp16 operands only.
-        Returns the fp16 result of the subtraction.
-        """
-    class hmul(Stub):
-        """hmul(a, b)
-        Perform fp16 multiplication, (a * b) in round to nearest mode. Supported
-        on fp16 operands only.
-        Returns the fp16 result of the multiplication.
-        """
-    class hdiv(Stub):
-        """hdiv(a, b)
-        Perform fp16 division, (a / b) in round to nearest mode. Supported
-        on fp16 operands only.
-        Returns the fp16 result of the division
-        """
-    class hfma(Stub):
-        """hfma(a, b, c)
-        Perform fp16 multiply and accumulate, (a * b) + c in round to nearest
-        mode. Supported on fp16 operands only.
-        Returns the fp16 result of the multiplication.
-        """
-    class hneg(Stub):
-        """hneg(a)
-        Perform fp16 negation, -(a). Supported on fp16 operands only.
-        Returns the fp16 result of the negation.
-        """
-    class habs(Stub):
-        """habs(a)
-        Perform fp16 absolute value, |a|. Supported on fp16 operands only.
-        Returns the fp16 result of the absolute value.
-        """
-    class hsin(Stub):
-        """hsin(a)
-        Calculate sine in round to nearest even mode. Supported on fp16
-        operands only.
-        Returns the sine result.
-        """
-    class hcos(Stub):
-        """hsin(a)
-        Calculate cosine in round to nearest even mode. Supported on fp16
-        operands only.
-        Returns the cosine result.
-        """
-    class hlog(Stub):
-        """hlog(a)
-        Calculate natural logarithm in round to nearest even mode. Supported
-        on fp16 operands only.
-        Returns the natural logarithm result.
-        """
-    class hlog10(Stub):
-        """hlog10(a)
-        Calculate logarithm base 10 in round to nearest even mode. Supported
-        on fp16 operands only.
-        Returns the logarithm base 10 result.
-        """
-    class hlog2(Stub):
-        """hlog2(a)
-        Calculate logarithm base 2 in round to nearest even mode. Supported
-        on fp16 operands only.
-        Returns the logarithm base 2 result.
-        """
-    class hexp(Stub):
-        """hexp(a)
-        Calculate natural exponential, exp(a), in round to nearest mode.
-        Supported on fp16 operands only.
-        Returns the natural exponential result.
-        """
-    class hexp10(Stub):
-        """hexp10(a)
-        Calculate exponential base 10 (10 ** a) in round to nearest mode.
-        Supported on fp16 operands only.
-        Returns the exponential base 10 result.
-        """
-    class hexp2(Stub):
-        """hexp2(a)
-        Calculate exponential base 2 (2 ** a) in round to nearest mode.
-        Supported on fp16 operands only.
-        Returns the exponential base 2 result.
-        """
-    class hfloor(Stub):
-        """hfloor(a)
-        Calculate the floor, the largest integer less than or equal to 'a'.
-        Supported on fp16 operands only.
-        Returns the floor result.
-        """
-    class hceil(Stub):
-        """hceil(a)
-        Calculate the ceil, the smallest integer greater than or equal to 'a'.
-        Supported on fp16 operands only.
-        Returns the ceil result.
-        """
-    class hsqrt(Stub):
-        """hsqrt(a)
-        Calculate the square root of the input argument in round to nearest
-        mode. Supported on fp16 operands only.
-        Returns the square root result.
-        """
-    class hrsqrt(Stub):
-        """hrsqrt(a)
-        Calculate the reciprocal square root of the input argument in round
-        to nearest even mode. Supported on fp16 operands only.
-        Returns the reciprocal square root result.
-        """
-    class hrcp(Stub):
-        """hrcp(a)
-        Calculate the reciprocal of the input argument in round to nearest
-        even mode. Supported on fp16 operands only.
-        Returns the reciprocal result.
-        """
-    class hrint(Stub):
-        """hrint(a)
-        Round the input argument to nearest integer value. Supported on fp16
-        operands only.
-        Returns the rounded result.
-        """
-    class htrunc(Stub):
-        """htrunc(a)
-        Truncate the input argument to its integer portion. Supported
-        on fp16 operands only.
-        Returns the truncated result.
-        """
-    class heq(Stub):
-        """heq(a, b)
-        Perform fp16 comparison, (a == b). Supported
-        on fp16 operands only.
-        Returns True if a and b are equal and False otherwise.
-        """
-    class hne(Stub):
-        """hne(a, b)
-        Perform fp16 comparison, (a != b). Supported
-        on fp16 operands only.
-        Returns True if a and b are not equal and False otherwise.
-        """
-    class hge(Stub):
-        """hge(a, b)
-        Perform fp16 comparison, (a >= b). Supported
-        on fp16 operands only.
-        Returns True if a is >= b and False otherwise.
-        """
-    class hgt(Stub):
-        """hgt(a, b)
-        Perform fp16 comparison, (a > b). Supported
-        on fp16 operands only.
-        Returns True if a is > b and False otherwise.
-        """
-    class hle(Stub):
-        """hle(a, b)
-        Perform fp16 comparison, (a <= b). Supported
-        on fp16 operands only.
-        Returns True if a is <= b and False otherwise.
-        """
-    class hlt(Stub):
-        """hlt(a, b)
-        Perform fp16 comparison, (a < b). Supported
-        on fp16 operands only.
-        Returns True if a is < b and False otherwise.
-        """
-    class hmax(Stub):
-        """hmax(a, b)
-        Perform fp16 maximum operation, max(a,b) Supported
-        on fp16 operands only.
-        Returns a if a is greater than b, returns b otherwise.
-        """
-    class hmin(Stub):
-        """hmin(a, b)
-        Perform fp16 minimum operation, min(a,b). Supported
-        on fp16 operands only.
-        Returns a if a is less than b, returns b otherwise.
-        """
 # -------------------------------------------------------------------------------
 # vector types

numba_cuda/numba/cuda/target.py CHANGED Viewed

@@ -3,9 +3,8 @@ from functools import cached_property
 import llvmlite.binding as ll
 from llvmlite import ir
 import warnings
+from numba.cuda import cgutils
 from numba.core import (
-    cgutils,
     compiler,
     config,
     itanium_mangler,
@@ -17,7 +16,7 @@ from numba.core.compiler_lock import global_compiler_lock
 from numba.core.dispatcher import Dispatcher
 from numba.core.errors import NumbaWarning
 from numba.core.base import BaseContext
-from numba.core.callconv import BaseCallConv, MinimalCallConv
+from numba.cuda.core.callconv import BaseCallConv, MinimalCallConv
 from numba.core.typing import cmathdecl
 from numba.core import datamodel
@@ -33,7 +32,7 @@ from numba.cuda.models import cuda_data_manager
 class CUDATypingContext(typing.BaseContext):
     def load_additional_registries(self):
-        from . import cudadecl, cudamath, libdevicedecl, vector_types
+        from . import cudadecl, cudamath, fp16, libdevicedecl, vector_types
         from numba.core.typing import enumdecl, cffi_utils
         self.install_registry(cudadecl.registry)
@@ -43,6 +42,7 @@ class CUDATypingContext(typing.BaseContext):
         self.install_registry(libdevicedecl.registry)
         self.install_registry(enumdecl.registry)
         self.install_registry(vector_types.typing_registry)
+        self.install_registry(fp16.typing_registry)
     def resolve_value_type(self, val):
         # treat other dispatcher object as another device function
@@ -148,7 +148,14 @@ class CUDATargetContext(BaseContext):
         from numba.misc import cffiimpl
         from numba.np import arrayobj  # noqa: F401
         from numba.np import npdatetime  # noqa: F401
-        from . import cudaimpl, printimpl, libdeviceimpl, mathimpl, vector_types
+        from . import (
+            cudaimpl,
+            fp16,
+            printimpl,
+            libdeviceimpl,
+            mathimpl,
+            vector_types,
+        )
         # fix for #8940
         from numba.np.unsafe import ndarray  # noqa F401
@@ -160,6 +167,7 @@ class CUDATargetContext(BaseContext):
         self.install_registry(cmathimpl.registry)
         self.install_registry(mathimpl.registry)
         self.install_registry(vector_types.impl_registry)
+        self.install_registry(fp16.target_registry)
     def codegen(self):
         return self._internal_codegen

numba_cuda/numba/cuda/testing.py CHANGED Viewed

@@ -1,25 +1,44 @@
 import os
 import platform
 import shutil
-from numba.tests.support import SerialMixin
+import pytest
+from datetime import datetime
+from numba.core.utils import PYVERSION
 from numba.cuda.cuda_paths import get_conda_ctk
 from numba.cuda.cudadrv import driver, devices, libs
+from numba.cuda.dispatcher import CUDADispatcher
 from numba.core import config
 from numba.tests.support import TestCase
 from pathlib import Path
+from typing import Iterable, Union
+from io import StringIO
 import unittest
+if PYVERSION >= (3, 10):
+    from filecheck.matcher import Matcher
+    from filecheck.options import Options
+    from filecheck.parser import Parser, pattern_for_opts
+    from filecheck.finput import FInput
 numba_cuda_dir = Path(__file__).parent
 test_data_dir = numba_cuda_dir / "tests" / "data"
-class CUDATestCase(SerialMixin, TestCase):
+@pytest.mark.usefixtures("initialize_from_pytest_config")
+class CUDATestCase(TestCase):
     """
     For tests that use a CUDA device. Test methods in a CUDATestCase must not
     be run out of module order, because the ContextResettingTestCase may reset
     the context and destroy resources used by a normal CUDATestCase if any of
     its tests are run between tests from a CUDATestCase.
+    Methods assertFileCheckAsm and assertFileCheckLLVM will inspect a
+    CUDADispatcher and assert that the compilation artifacts match the
+    FileCheck checks given in the kernel's docstring.
+    Method assertFileCheckMatches can be used to assert that a given string
+    matches FileCheck checks, and is not specific to CUDADispatcher.
     """
     def setUp(self):
@@ -35,6 +54,134 @@ class CUDATestCase(SerialMixin, TestCase):
         config.CUDA_LOW_OCCUPANCY_WARNINGS = self._low_occupancy_warnings
         config.CUDA_WARN_ON_IMPLICIT_COPY = self._warn_on_implicit_copy
+    Signature = Union[tuple[type, ...], None]
+    def _getIRContents(
+        self,
+        ir_result: Union[dict[Signature, str], str],
+        signature: Union[Signature, None] = None,
+    ) -> Iterable[str]:
+        if isinstance(ir_result, str):
+            assert signature is None, (
+                "Cannot use signature because the kernel was only compiled for one signature"
+            )
+            return [ir_result]
+        if signature is None:
+            return list(ir_result.values())
+        return [ir_result[signature]]
+    def assertFileCheckAsm(
+        self,
+        ir_producer: CUDADispatcher,
+        signature: Union[tuple[type, ...], None] = None,
+        check_prefixes: tuple[str] = ("ASM",),
+        **extra_filecheck_options,
+    ) -> None:
+        """
+        Assert that the assembly output of the given CUDADispatcher matches
+        the FileCheck checks given in the kernel's docstring.
+        """
+        ir_contents = self._getIRContents(ir_producer.inspect_asm(), signature)
+        assert ir_contents, "No assembly output found for the given signature."
+        assert ir_producer.__doc__ is not None, (
+            "Kernel docstring is required. To pass checks explicitly, use assertFileCheckMatches."
+        )
+        check_patterns = ir_producer.__doc__
+        for ir_content in ir_contents:
+            self.assertFileCheckMatches(
+                ir_content,
+                check_patterns=check_patterns,
+                check_prefixes=check_prefixes,
+                **extra_filecheck_options,
+            )
+    def assertFileCheckLLVM(
+        self,
+        ir_producer: CUDADispatcher,
+        signature: Union[tuple[type, ...], None] = None,
+        check_prefixes: tuple[str] = ("LLVM",),
+        **extra_filecheck_options,
+    ) -> None:
+        """
+        Assert that the LLVM IR output of the given CUDADispatcher matches
+        the FileCheck checks given in the kernel's docstring.
+        """
+        ir_contents = self._getIRContents(ir_producer.inspect_llvm(), signature)
+        assert ir_contents, "No LLVM IR output found for the given signature."
+        assert ir_producer.__doc__ is not None, (
+            "Kernel docstring is required. To pass checks explicitly, use assertFileCheckMatches."
+        )
+        check_patterns = ir_producer.__doc__
+        for ir_content in ir_contents:
+            assert ir_content, (
+                "LLVM IR content is empty for the given signature."
+            )
+            self.assertFileCheckMatches(
+                ir_content,
+                check_patterns=check_patterns,
+                check_prefixes=check_prefixes,
+                **extra_filecheck_options,
+            )
+    def assertFileCheckMatches(
+        self,
+        ir_content: str,
+        check_patterns: str,
+        check_prefixes: tuple[str] = ("CHECK",),
+        **extra_filecheck_options,
+    ) -> None:
+        """
+        Assert that the given string matches the passed FileCheck checks.
+        Args:
+            ir_content: The string to check against.
+            check_patterns: The FileCheck checks to use.
+            check_prefixes: The prefixes to use for the FileCheck checks.
+            extra_filecheck_options: Extra options to pass to FileCheck.
+        """
+        if PYVERSION < (3, 10):
+            self.skipTest("FileCheck requires Python 3.10 or later")
+        opts = Options(
+            match_filename="-",
+            check_prefixes=list(check_prefixes),
+            **extra_filecheck_options,
+        )
+        input_file = FInput(fname="-", content=ir_content)
+        parser = Parser(opts, StringIO(check_patterns), *pattern_for_opts(opts))
+        matcher = Matcher(opts, input_file, parser)
+        matcher.stderr = StringIO()
+        result = matcher.run()
+        if result != 0:
+            dump_instructions = ""
+            if self._dump_failed_filechecks:
+                dump_directory = Path(
+                    datetime.now().strftime("numba-ir-%Y_%m_%d_%H_%M_%S")
+                )
+                if not dump_directory.exists():
+                    dump_directory.mkdir(parents=True, exist_ok=True)
+                base_path = self.id().replace(".", "_")
+                ir_dump = dump_directory / Path(base_path).with_suffix(".ll")
+                checks_dump = dump_directory / Path(base_path).with_suffix(
+                    ".checks"
+                )
+                with (
+                    open(ir_dump, "w") as ir_file,
+                    open(checks_dump, "w") as checks_file,
+                ):
+                    _ = ir_file.write(ir_content + "\n")
+                    _ = checks_file.write(check_patterns)
+                    dump_instructions = f"Reproduce with:\n\nfilecheck --check-prefixes={','.join(check_prefixes)} {checks_dump} --input-file={ir_dump}"
+            self.fail(
+                f"FileCheck failed:\n{matcher.stderr.getvalue()}\n\n"
+                + f"Check prefixes:\n{check_prefixes}\n\n"
+                + f"Check patterns:\n{check_patterns}\n"
+                + f"IR:\n{ir_content}\n\n"
+                + dump_instructions
+            )
 class ContextResettingTestCase(CUDATestCase):
     """
@@ -127,8 +274,8 @@ def skip_if_mvc_enabled(reason):
 def skip_if_mvc_libraries_unavailable(fn):
     libs_available = False
     try:
-        import cubinlinker  # noqa: F401
-        import ptxcompiler  # noqa: F401
+        import cubinlinker  # noqa: F401 # type: ignore
+        import ptxcompiler  # noqa: F401 # type: ignore
         libs_available = True
     except ImportError:
@@ -189,6 +336,10 @@ def skip_if_cudadevrt_missing(fn):
     return unittest.skipIf(cudadevrt_missing(), "cudadevrt missing")(fn)
+def skip_if_nvjitlink_missing(reason):
+    return unittest.skipIf(not driver._have_nvjitlink(), reason)
 class ForeignArray(object):
     """
     Class for emulating an array coming from another library through the CUDA

numba_cuda/numba/cuda/tests/complex_usecases.py ADDED Viewed

@@ -0,0 +1,113 @@
+import cmath
+def div_usecase(x, y):
+    return x / y
+def real_usecase(x):
+    return x.real
+def imag_usecase(x):
+    return x.imag
+def conjugate_usecase(x):
+    return x.conjugate()
+def acos_usecase(x):
+    return cmath.acos(x)
+def cos_usecase(x):
+    return cmath.cos(x)
+def asin_usecase(x):
+    return cmath.asin(x)
+def sin_usecase(x):
+    return cmath.sin(x)
+def atan_usecase(x):
+    return cmath.atan(x)
+def tan_usecase(x):
+    return cmath.tan(x)
+def acosh_usecase(x):
+    return cmath.acosh(x)
+def cosh_usecase(x):
+    return cmath.cosh(x)
+def asinh_usecase(x):
+    return cmath.asinh(x)
+def sinh_usecase(x):
+    return cmath.sinh(x)
+def atanh_usecase(x):
+    return cmath.atanh(x)
+def tanh_usecase(x):
+    return cmath.tanh(x)
+def exp_usecase(x):
+    return cmath.exp(x)
+def isfinite_usecase(x):
+    return cmath.isfinite(x)
+def isinf_usecase(x):
+    return cmath.isinf(x)
+def isnan_usecase(x):
+    return cmath.isnan(x)
+def log_usecase(x):
+    return cmath.log(x)
+def log_base_usecase(x, base):
+    return cmath.log(x, base)
+def log10_usecase(x):
+    return cmath.log10(x)
+def phase_usecase(x):
+    return cmath.phase(x)
+def polar_usecase(x):
+    return cmath.polar(x)
+def polar_as_complex_usecase(x):
+    return complex(*cmath.polar(x))
+def rect_usecase(r, phi):
+    return cmath.rect(r, phi)
+def sqrt_usecase(x):
+    return cmath.sqrt(x)

numba-cuda 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

numba-cuda 0.17.0py3-none-any.whl → 0.18.0py3-none-any.whl