PyPI - numba-cuda - Versions diffs - 0.10.0__py3-none-any.whl → 0.10.1__py3-none-any.whl - Mend

numba-cuda 0.10.0py3-none-any.whl → 0.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

numba_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.10.0
1	+ 0.10.1

numba_cuda/numba/cuda/compiler.py CHANGED Viewed

@@ -278,7 +278,7 @@ def compile_cuda(
     args,
     debug=False,
     lineinfo=False,
-    inline=False,
+    forceinline=False,
     fastmath=False,
     nvvm_options=None,
     cc=None,
@@ -316,7 +316,7 @@ def compile_cuda(
     else:
         flags.error_model = "numpy"
-    if inline:
+    if forceinline:
         flags.forceinline = True
     if fastmath:
         flags.fastmath = True
@@ -574,6 +574,7 @@ def compile(
     abi="c",
     abi_info=None,
     output="ptx",
+    forceinline=False,
 ):
     """Compile a Python function to PTX or LTO-IR for a given set of argument
     types.
@@ -614,6 +615,11 @@ def compile(
     :type abi_info: dict
     :param output: Type of output to generate, either ``"ptx"`` or ``"ltoir"``.
     :type output: str
+    :param forceinline: Enables inlining at the NVVM IR level when set to
+                        ``True``. This is accomplished by adding the
+                        ``alwaysinline`` function attribute to the function
+                        definition. This is only valid when the output is
+                        ``"ltoir"``.
     :return: (code, resty): The compiled code and inferred return type
     :rtype: tuple
     """
@@ -626,6 +632,12 @@ def compile(
     if output not in ("ptx", "ltoir"):
         raise NotImplementedError(f"Unsupported output type: {output}")
+    if forceinline and not device:
+        raise ValueError("Cannot force-inline kernels")
+    if forceinline and output != "ltoir":
+        raise ValueError("Can only designate forced inlining in LTO-IR")
     debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
     opt = (config.OPT != 0) if opt is None else opt
@@ -660,6 +672,7 @@ def compile(
         fastmath=fastmath,
         nvvm_options=nvvm_options,
         cc=cc,
+        forceinline=forceinline,
     )
     resty = cres.signature.return_type
@@ -699,6 +712,7 @@ def compile_for_current_device(
     abi="c",
     abi_info=None,
     output="ptx",
+    forceinline=False,
 ):
     """Compile a Python function to PTX or LTO-IR for a given signature for the
     current device's compute capabilility. This calls :func:`compile` with an
@@ -716,6 +730,7 @@ def compile_for_current_device(
         abi=abi,
         abi_info=abi_info,
         output=output,
+        forceinline=forceinline,
     )
@@ -730,6 +745,7 @@ def compile_ptx(
     opt=None,
     abi="numba",
     abi_info=None,
+    forceinline=False,
 ):
     """Compile a Python function to PTX for a given signature. See
     :func:`compile`. The defaults for this function are to compile a kernel
@@ -747,6 +763,7 @@ def compile_ptx(
         abi=abi,
         abi_info=abi_info,
         output="ptx",
+        forceinline=forceinline,
     )
@@ -760,6 +777,7 @@ def compile_ptx_for_current_device(
     opt=None,
     abi="numba",
     abi_info=None,
+    forceinline=False,
 ):
     """Compile a Python function to PTX for a given signature for the current
     device's compute capabilility. See :func:`compile_ptx`."""
@@ -775,6 +793,7 @@ def compile_ptx_for_current_device(
         opt=opt,
         abi=abi,
         abi_info=abi_info,
+        forceinline=forceinline,
     )

numba_cuda/numba/cuda/decorators.py CHANGED Viewed

@@ -17,6 +17,7 @@ def jit(
     func_or_sig=None,
     device=False,
     inline="never",
+    forceinline=False,
     link=[],
     debug=None,
     opt=None,
@@ -39,6 +40,14 @@ def jit(
        .. note:: A kernel cannot have any return value.
     :param device: Indicates whether this is a device function.
     :type device: bool
+    :param inline: Enables inlining at the Numba IR level when set to
+       ``"always"``. See `Notes on Inlining
+       <https://numba.readthedocs.io/en/stable/developer/inlining.html>`_.
+    :type inline: str
+    :param forceinline: Enables inlining at the NVVM IR level when set to
+       ``True``. This is accomplished by adding the ``alwaysinline`` function
+       attribute to the function definition.
+    :type forceinline: bool
     :param link: A list of files containing PTX or CUDA C/C++ source to link
        with the function
     :type link: list
@@ -85,7 +94,9 @@ def jit(
         DeprecationWarning(
             "Passing bool to inline argument is deprecated, please refer to "
             "Numba's documentation on inlining: "
-            "https://numba.readthedocs.io/en/stable/developer/inlining.html"
+            "https://numba.readthedocs.io/en/stable/developer/inlining.html. "
+            "You may have wanted the forceinline argument instead, to force "
+            "inlining at the NVVM IR level."
         )
         inline = "always" if inline else "never"
@@ -140,6 +151,7 @@ def jit(
             targetoptions["fastmath"] = fastmath
             targetoptions["device"] = device
             targetoptions["inline"] = inline
+            targetoptions["forceinline"] = forceinline
             targetoptions["extensions"] = extensions
             disp = CUDADispatcher(func, targetoptions=targetoptions)
@@ -182,6 +194,7 @@ def jit(
                         func,
                         device=device,
                         inline=inline,
+                        forceinline=forceinline,
                         debug=debug,
                         opt=opt,
                         lineinfo=lineinfo,
@@ -206,6 +219,7 @@ def jit(
                 targetoptions["fastmath"] = fastmath
                 targetoptions["device"] = device
                 targetoptions["inline"] = inline
+                targetoptions["forceinline"] = forceinline
                 targetoptions["extensions"] = extensions
                 disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)

numba_cuda/numba/cuda/dispatcher.py CHANGED Viewed

@@ -137,6 +137,7 @@ class _Kernel(serialize.ReduceMixin):
         debug=False,
         lineinfo=False,
         inline=False,
+        forceinline=False,
         fastmath=False,
         extensions=None,
         max_registers=None,
@@ -182,7 +183,7 @@ class _Kernel(serialize.ReduceMixin):
             self.argtypes,
             debug=self.debug,
             lineinfo=lineinfo,
-            inline=inline,
+            forceinline=forceinline,
             fastmath=fastmath,
             nvvm_options=nvvm_options,
             cc=cc,
@@ -1073,7 +1074,7 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
             with self._compiling_counter:
                 debug = self.targetoptions.get("debug")
                 lineinfo = self.targetoptions.get("lineinfo")
-                inline = self.targetoptions.get("inline")
+                forceinline = self.targetoptions.get("forceinline")
                 fastmath = self.targetoptions.get("fastmath")
                 nvvm_options = {
@@ -1091,7 +1092,7 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
                     args,
                     debug=debug,
                     lineinfo=lineinfo,
-                    inline=inline,
+                    forceinline=forceinline,
                     fastmath=fastmath,
                     nvvm_options=nvvm_options,
                     cc=cc,

numba_cuda/numba/cuda/tests/cudapy/test_inline.py CHANGED Viewed

@@ -8,8 +8,8 @@ from numba.cuda.testing import (
 )
+@skip_on_cudasim("Cudasim does not support inline and forceinline")
 class TestCudaInline(CUDATestCase):
-    @skip_on_cudasim("Cudasim does not support inline")
     def _test_call_inline(self, inline):
         """Test @cuda.jit(inline=...)"""
         a = np.ones(2, dtype=np.int32)
@@ -42,6 +42,9 @@ class TestCudaInline(CUDATestCase):
             # check that call was not inlined
             self.assertIsNotNone(match, msg=llvm_ir)
+        # alwaysinline should not be in the IR when the inline kwarg is used
+        self.assertNotIn("alwaysinline", llvm_ir)
     def test_call_inline_always(self):
         self._test_call_inline("always")
@@ -54,6 +57,100 @@ class TestCudaInline(CUDATestCase):
     def test_call_inline_false(self):
         self._test_call_inline(False)
+    def _test_call_forceinline(self, forceinline):
+        """Test @cuda.jit(forceinline=...)"""
+        a = np.ones(2, dtype=np.int32)
+        sig = (types.int32[::1],)
+        @cuda.jit(forceinline=forceinline)
+        def set_zero(a):
+            a[0] = 0
+        @cuda.jit(sig)
+        def call_set_zero(a):
+            set_zero(a)
+        call_set_zero[1, 2](a)
+        expected = np.arange(2, dtype=np.int32)
+        self.assertTrue(np.all(a == expected))
+        llvm_ir = call_set_zero.inspect_llvm(sig)
+        pat = r"call [a-zA-Z0-9]* @"
+        match = re.compile(pat).search(llvm_ir)
+        # Check that call was not inlined at the Numba IR level - the call
+        # should still be present in the IR
+        self.assertIsNotNone(match)
+        # Check the definition of set_zero - it is a definition where the
+        # name does not include an underscore just before "set_zero", because
+        # that would match the "call_set_zero" definition
+        pat = r"define.*[^_]set_zero.*"
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match)
+        if forceinline:
+            self.assertIn("alwaysinline", match.group())
+        else:
+            self.assertNotIn("alwaysinline", match.group())
+        # The kernel, "call_set_zero", should never have "alwaysinline" set
+        pat = r"define.*call_set_zero.*"
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match)
+        self.assertNotIn("alwaysinline", match.group())
+    def test_call_forceinline_true(self):
+        self._test_call_forceinline(True)
+    def test_call_forceinline_false(self):
+        self._test_call_forceinline(False)
+    def test_compile_forceinline_ltoir_only(self):
+        def set_zero(a):
+            a[0] = 0
+        args = (types.float32[::1],)
+        msg = r"Can only designate forced inlining in LTO-IR"
+        with self.assertRaisesRegex(ValueError, msg):
+            cuda.compile(
+                set_zero,
+                args,
+                device=True,
+                forceinline=True,
+            )
+    def _compile_set_zero(self, forceinline):
+        def set_zero(a):
+            a[0] = 0
+        args = (types.float32[::1],)
+        ltoir, resty = cuda.compile(
+            set_zero,
+            args,
+            device=True,
+            output="ltoir",
+            forceinline=forceinline,
+        )
+        # Sanity check
+        self.assertEqual(resty, types.none)
+        return ltoir
+    def test_compile_forceinline(self):
+        ltoir_noinline = self._compile_set_zero(False)
+        ltoir_forceinline = self._compile_set_zero(True)
+        # As LTO-IR is opaque, the best we can do is check that changing the
+        # flag resulted in a change in the generated LTO-IR in some way.
+        self.assertNotEqual(
+            ltoir_noinline,
+            ltoir_forceinline,
+            "forceinline flag appeared to have no effect on LTO-IR",
+        )
 if __name__ == "__main__":
     unittest.main()

{numba_cuda-0.10.0.dist-info → numba_cuda-0.10.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: numba-cuda
-Version: 0.10.0
+Version: 0.10.1
 Summary: CUDA target for Numba
 Author: Anaconda Inc., NVIDIA Corporation
 License: BSD 2-clause

{numba_cuda-0.10.0.dist-info → numba_cuda-0.10.1.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
 _numba_cuda_redirector.py,sha256=n_r8MYbu5-vcXMnLJW147k8DnFXXvgb7nPIXnlXwTyQ,2659
-numba_cuda/VERSION,sha256=3CT-tb01CE2K4ypOr77BI1JwfUZiQB_LzJu9aWzed6k,7
+numba_cuda/VERSION,sha256=9NQ54LUjIIoJ0ThiwWggzDAo_ZRBcxDOHVOjHRTWosQ,7
 numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
 numba_cuda/_version.py,sha256=nzrrJXi85d18m6SPdsPsetJNClDETkmF1MrEhGLYDBs,734
 numba_cuda/numba/cuda/__init__.py,sha256=3siqMXEKqa9ezQ8RxPC3KMdebUjgJt-EKxxV4CX9818,607
@@ -9,7 +9,7 @@ numba_cuda/numba/cuda/api_util.py,sha256=jK8oUD3zf_D5IX7vbjc3uY_5kmOxwgEqO2m_lDH
 numba_cuda/numba/cuda/args.py,sha256=UlTHTJpwPeCtnW0Bb-Wetm5UO9TPR-PCgIt5ys8b8tQ,1894
 numba_cuda/numba/cuda/cg.py,sha256=azz1sIT_jXQfJEZfDjBeqboJc6Pu_NtrZxfE7D1eQLQ,1484
 numba_cuda/numba/cuda/codegen.py,sha256=4hAdztvCcpwVbWcl9b5zK9xu04f7mVMNAgekpfc-8uw,14049
-numba_cuda/numba/cuda/compiler.py,sha256=v2QWta2uKlkbgEMKYKKzQpU6sOS1sQxfn3FpkbYlwHA,24511
+numba_cuda/numba/cuda/compiler.py,sha256=sFreZM07D8zp4QyUBL2IKoBtDjzdxj80wN4KUgEQOS8,25283
 numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=8lUPmU6FURxphzEqkPLZRPYBCEK_wmDtHq2voPkckfs,950
 numba_cuda/numba/cuda/cuda_bf16.py,sha256=RfnWMV2_zSAW9FLN4JqfW6GfmWR8ZVO16e9Bw3jZnto,152203
 numba_cuda/numba/cuda/cuda_paths.py,sha256=kMIJ_1yV2qtcKEM5rCgSDJ3Gz7bgxbfAWh54E5cDndg,15872
@@ -17,11 +17,11 @@ numba_cuda/numba/cuda/cudadecl.py,sha256=4DhYDnKg95AKsmDHetJvL1rfdvhnuz9PKS1Ncf4
 numba_cuda/numba/cuda/cudaimpl.py,sha256=-a5dvGHORH4RypGliHqXvwG3Rc0CAJVntYGxoYHmbpc,35656
 numba_cuda/numba/cuda/cudamath.py,sha256=wbGjlyGVwcUAoQjgXIaAaasLdVuDSKHkf6KyID5IYBw,3979
 numba_cuda/numba/cuda/debuginfo.py,sha256=tWlRAC1-AsSQp0pG9kXQY9tlVdZPA-nDUJsrvru4eaM,4504
-numba_cuda/numba/cuda/decorators.py,sha256=t1W2eyqvaNHAiVZFe-lxNQpO4dSTOX1tjmkc1VtDFvo,8707
+numba_cuda/numba/cuda/decorators.py,sha256=kqzbv7eEQSyQg2G_XtIyKIfvmm354jw2vZDlOmK-t9s,9454
 numba_cuda/numba/cuda/descriptor.py,sha256=t1rSVJSCAlVACC5_Un3FQ7iubdTTBe-euqz88cvs2tI,985
 numba_cuda/numba/cuda/device_init.py,sha256=Rtwd6hQMHMLMkj6MXtndbWYFJfkIaRe0MwOIJF2nzhU,3449
 numba_cuda/numba/cuda/deviceufunc.py,sha256=zj9BbLiZD-dPttHew4olw8ANgR2nXnXEE9qjCeGLrQI,30731
-numba_cuda/numba/cuda/dispatcher.py,sha256=_lEKvUcystUwgMvEyT3lCuvi41OULn0VE3H36HQ21o8,44369
+numba_cuda/numba/cuda/dispatcher.py,sha256=uX6ltCDQq9mIBqSHV6Ci-2mJtuAmeZXBb3yWp8gXZ2U,44426
 numba_cuda/numba/cuda/errors.py,sha256=WRso1Q_jCoWP5yrDBMhihRhhVtVo1-7KdN8QVE9j46o,1712
 numba_cuda/numba/cuda/extending.py,sha256=VwuU5F0AQFlJsqaiwoWk-6Itihew1FsjVT_BVjhY8Us,2278
 numba_cuda/numba/cuda/initialize.py,sha256=0SnpjccQEYiWITIyfAJx833H1yhYFFDY42EpnwYyMn8,487
@@ -173,7 +173,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py,sha256=Rl35HQdN6J3ZPjSLIz2mFJx
 numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py,sha256=vAP2ggp2arBqJS8kNbGeC5jrZuYzLtFstgvxX0PI-I0,5322
 numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py,sha256=1USofSlavYFaVhP8oep5oJ-CLzXxYwkI3EtOkY6jrVw,2610
 numba_cuda/numba/cuda/tests/cudapy/test_idiv.py,sha256=tTy7hN2LJ4897UzO3EUxjuUzbBcs9QITHJu3s_eknq0,1054
-numba_cuda/numba/cuda/tests/cudapy/test_inline.py,sha256=APWMZgfuYwWZWTM6AOpJNkrRLpYoe7Yx3AvbLRp-erY,1492
+numba_cuda/numba/cuda/tests/cudapy/test_inline.py,sha256=T7DHquV_4HuX5fFQQS3kcZzgifTzwYbMFiY7SgQzoLA,4584
 numba_cuda/numba/cuda/tests/cudapy/test_inspect.py,sha256=L9-62nPmiWC90PST5EZrnGdAcrsbhMS_mbEkwdDkFQ0,4901
 numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=uQ0S_XXds-F9Z5GhuFYzRVXu5XYD1ULa-y55Wi92i5I,36726
 numba_cuda/numba/cuda/tests/cudapy/test_ipc.py,sha256=bNT6UZgsgeVWyzBrlKXucQW6IKcD6NEmbwV5cFhf-7I,10553
@@ -256,8 +256,8 @@ numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq
 numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=SE5FrbZdkVrnzS0R62YPPyH25r6Jevd2nuB6HRJ3PZ0,5011
 numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
 numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
-numba_cuda-0.10.0.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
-numba_cuda-0.10.0.dist-info/METADATA,sha256=PsCSJol5Cminr99rBE-G11R0TWXJ8hDzmD7L8pr3BN0,1859
-numba_cuda-0.10.0.dist-info/WHEEL,sha256=GHB6lJx2juba1wDgXDNlMTyM13ckjBMKf-OnwgKOCtA,91
-numba_cuda-0.10.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
-numba_cuda-0.10.0.dist-info/RECORD,,
+numba_cuda-0.10.1.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
+numba_cuda-0.10.1.dist-info/METADATA,sha256=nP_9oLjsU48Y-dOmumPuN2JsiapA9t5ViCU_paTk7Uw,1859
+numba_cuda-0.10.1.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
+numba_cuda-0.10.1.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
+numba_cuda-0.10.1.dist-info/RECORD,,

{numba_cuda-0.10.0.dist-info → numba_cuda-0.10.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.3.0)
+Generator: setuptools (80.3.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{numba_cuda-0.10.0.dist-info → numba_cuda-0.10.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{numba_cuda-0.10.0.dist-info → numba_cuda-0.10.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

numba-cuda 0.10.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

numba-cuda 0.10.0py3-none-any.whl → 0.10.1py3-none-any.whl