PyPI - numba-cuda - Versions diffs - 0.23.0__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl - Mend

numba-cuda 0.23.0__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +4 -1
numba_cuda/numba/cuda/_compat.py +47 -0
numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_dispatcher.cpp +8 -2
numba_cuda/numba/cuda/cext/_hashtable.cpp +5 -0
numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_pymodule.h +1 -1
numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cext/_typeof.cpp +56 -8
numba_cuda/numba/cuda/cext/mviewbuf.c +7 -1
numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +4 -5
numba_cuda/numba/cuda/codegen.py +4 -2
numba_cuda/numba/cuda/compiler.py +5 -5
numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +1 -1
numba_cuda/numba/cuda/core/base.py +6 -10
numba_cuda/numba/cuda/core/bytecode.py +21 -13
numba_cuda/numba/cuda/core/byteflow.py +336 -90
numba_cuda/numba/cuda/core/compiler.py +3 -4
numba_cuda/numba/cuda/core/compiler_machinery.py +3 -3
numba_cuda/numba/cuda/core/config.py +5 -7
numba_cuda/numba/cuda/core/controlflow.py +17 -9
numba_cuda/numba/cuda/core/inline_closurecall.py +11 -10
numba_cuda/numba/cuda/core/interpreter.py +255 -96
numba_cuda/numba/cuda/core/ir_utils.py +8 -17
numba_cuda/numba/cuda/core/pythonapi.py +3 -0
numba_cuda/numba/cuda/core/rewrites/static_binop.py +1 -1
numba_cuda/numba/cuda/core/ssa.py +2 -2
numba_cuda/numba/cuda/core/transforms.py +4 -6
numba_cuda/numba/cuda/core/typed_passes.py +1 -1
numba_cuda/numba/cuda/core/typeinfer.py +3 -3
numba_cuda/numba/cuda/core/untyped_passes.py +11 -10
numba_cuda/numba/cuda/cpython/unicode.py +2 -2
numba_cuda/numba/cuda/cpython/unicode_support.py +1 -3
numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -4
numba_cuda/numba/cuda/cudadrv/driver.py +13 -11
numba_cuda/numba/cuda/cudadrv/nvrtc.py +71 -32
numba_cuda/numba/cuda/debuginfo.py +10 -79
numba_cuda/numba/cuda/deviceufunc.py +3 -6
numba_cuda/numba/cuda/dispatcher.py +5 -19
numba_cuda/numba/cuda/libdeviceimpl.py +1 -2
numba_cuda/numba/cuda/lowering.py +0 -28
numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
numba_cuda/numba/cuda/np/arrayobj.py +7 -9
numba_cuda/numba/cuda/np/numpy_support.py +7 -10
numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +4 -3
numba_cuda/numba/cuda/testing.py +4 -8
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +66 -4
numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +2 -2
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +26 -4
numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +61 -9
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +6 -0
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +12 -1
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +13 -0
numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +12 -7
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +37 -35
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +8 -7
numba_cuda/numba/cuda/tests/support.py +11 -0
numba_cuda/numba/cuda/types/cuda_functions.py +1 -1
numba_cuda/numba/cuda/typing/asnumbatype.py +37 -2
numba_cuda/numba/cuda/typing/typeof.py +9 -16
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/METADATA +4 -13
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/RECORD +74 -73
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/WHEEL +0 -0
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE.numba +0 -0
{numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/core/pythonapi.py CHANGED Viewed

@@ -875,6 +875,9 @@ class PythonAPI(object):
                 self.py_hash_t.as_pointer(),
             ],
         )
+        # `_PySet_NextEntry` returns a borrowed reference to the key, which is
+        # generally not expected for iterators--which is the place where this
+        # is used internally. Perhaps we should revisit this at some point
         fn = self._get_function(fnty, name="_PySet_NextEntry")
         return self.builder.call(fn, (set, posptr, keyptr, hashptr))

numba_cuda/numba/cuda/core/rewrites/static_binop.py CHANGED Viewed

@@ -27,7 +27,7 @@ class DetectStaticBinops(Rewrite):
                     and expr.static_rhs is ir.UNDEFINED
                 ):
                     self.static_rhs[expr] = func_ir.infer_constant(expr.rhs)
-            except errors.ConstantInferenceError:
+            except errors.ConstantInferenceError:  # noqa: PERF203
                 continue
         return len(self.static_lhs) > 0 or len(self.static_rhs) > 0

numba_cuda/numba/cuda/core/ssa.py CHANGED Viewed

@@ -113,8 +113,8 @@ def _iterated_domfronts(cfg):
     keep_going = True
     while keep_going:
         keep_going = False
-        for k, vs in domfronts.items():
-            inner = reduce(operator.or_, [domfronts[v] for v in vs], set())
+        for vs in domfronts.values():
+            inner = reduce(operator.or_, map(domfronts.__getitem__, vs), set())
             if inner.difference(vs):
                 vs |= inner
                 keep_going = True

numba_cuda/numba/cuda/core/transforms.py CHANGED Viewed

@@ -436,9 +436,7 @@ def with_lifting(func_ir, typingctx, targetctx, flags, locals):
     # the kind of contextmanager
     sub_irs = []
     for blk_start, blk_end in withs:
-        body_blocks = []
-        for node in _cfg_nodes_in_region(cfg, blk_start, blk_end):
-            body_blocks.append(node)
+        body_blocks = _cfg_nodes_in_region(cfg, blk_start, blk_end).copy()
         _legalize_with_head(blocks[blk_start])
         # Find the contextmanager
         cmkind, extra = _get_with_contextmanager(func_ir, blocks, blk_start)
@@ -631,9 +629,9 @@ def find_setupwiths(func_ir):
                     # add all its targets to the to_visit stack, unless we
                     # have seen them already
                     if ir_utils.is_terminator(stmt):
-                        for t in stmt.get_targets():
-                            if t not in seen:
-                                to_visit.append(t)
+                        to_visit.extend(
+                            t for t in stmt.get_targets() if t not in seen
+                        )
         return setup_with_to_pop_blocks_map

numba_cuda/numba/cuda/core/typed_passes.py CHANGED Viewed

@@ -164,7 +164,7 @@ class BaseTypeInference(FunctionPass):
                 retstmts = []
                 caststmts = {}
                 argvars = set()
-                for bid, blk in interp.blocks.items():
+                for blk in interp.blocks.values():
                     for inst in blk.body:
                         if isinstance(inst, ir.return_types):
                             retstmts.append(inst.value.name)

numba_cuda/numba/cuda/core/typeinfer.py CHANGED Viewed

@@ -1464,9 +1464,9 @@ https://numba.readthedocs.io/en/stable/user/troubleshoot.html#my-code-has-an-unt
                         interped = msg % (name, atype, loc.strformat())
                     return interped
-                problem_str = []
-                for xtype in rettypes:
-                    problem_str.append(_termcolor.errmsg(check_type(xtype)))
+                problem_str = [
+                    _termcolor.errmsg(check_type(xtype)) for xtype in rettypes
+                ]
                 raise TypingError(
                     "Can't unify return type from the "

numba_cuda/numba/cuda/core/untyped_passes.py CHANGED Viewed

@@ -632,7 +632,7 @@ class MakeFunctionToJitFunction(FunctionPass):
     def run_pass(self, state):
         func_ir = state.func_ir
         mutated = False
-        for idx, blk in func_ir.blocks.items():
+        for blk in func_ir.blocks.values():
             for stmt in blk.body:
                 if isinstance(stmt, ir.assign_types):
                     if isinstance(stmt.value, ir.expr_types):
@@ -696,7 +696,7 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
     def run_pass(self, state):
         mutated = False
         func_ir = state.func_ir
-        for label, blk in func_ir.blocks.items():
+        for blk in func_ir.blocks.values():
             calls = [_ for _ in blk.find_exprs("call")]
             for call in calls:
                 glbl = guard(get_definition, func_ir, call.func)
@@ -1123,16 +1123,17 @@ class MixedContainerUnroller(FunctionPass):
         )
         keys = [k for k in data.keys()]
-        elifs = []
-        for i in range(1, len(keys)):
-            elifs.append(elif_tplt % ",".join(map(str, data[keys[i]])))
+        elifs = [
+            elif_tplt % ",".join(map(str, data[keys[i]]))
+            for i in range(1, len(keys))
+        ]
         src = b % (",".join(map(str, data[keys[0]])), "".join(elifs))
         wstr = src
         l = {}
         exec(wstr, {}, l)
         bfunc = l["foo"]
         branches = compile_to_numba_ir(bfunc, {})
-        for lbl, blk in branches.blocks.items():
+        for blk in branches.blocks.values():
             for stmt in blk.body:
                 if isinstance(stmt, ir.assign_types):
                     if isinstance(stmt.value, ir.global_types):
@@ -1173,7 +1174,7 @@ class MixedContainerUnroller(FunctionPass):
             """This finds loops which are compliant with the form:
             for i in range(len(literal_unroll(<something>>)))"""
             unroll_loops = {}
-            for header_lbl, loop in loops.items():
+            for loop in loops.values():
                 # TODO: check the loop head has literal_unroll, if it does but
                 # does not conform to the following then raise
@@ -1605,7 +1606,7 @@ class IterLoopCanonicalization(FunctionPass):
         for x in induction_vars:
             try:  # there's not always an alias, e.g. loop from inlined closure
                 tmp.add(func_ir.get_assignee(x, loop.header))
-            except ValueError:
+            except ValueError:  # noqa: PERF203
                 pass
         induction_vars |= tmp
         induction_var_names = set([x.name for x in induction_vars])
@@ -1639,7 +1640,7 @@ class IterLoopCanonicalization(FunctionPass):
         loops = cfg.loops()
         mutated = False
-        for header, loop in loops.items():
+        for loop in loops.values():
             stat = self.assess_loop(loop, func_ir, state.typemap)
             if stat:
                 if self._DEBUG:
@@ -1979,7 +1980,7 @@ class RewriteDynamicRaises(FunctionPass):
                     try:
                         const = func_ir.infer_constant(exc_arg)
                         exc_args.append(const)
-                    except consts.ConstantInferenceError:
+                    except consts.ConstantInferenceError:  # noqa: PERF203
                         exc_args.append(exc_arg)
                 loc = raise_.loc

numba_cuda/numba/cuda/cpython/unicode.py CHANGED Viewed

@@ -394,7 +394,7 @@ def _set_code_point(a, i, ch):
         )
-if PYVERSION in ((3, 12), (3, 13)):
+if PYVERSION in ((3, 12), (3, 13), (3, 14)):
     @register_jitable
     def _pick_kind(kind1, kind2):
@@ -442,7 +442,7 @@ def _pick_ascii(is_ascii1, is_ascii2):
     return types.uint32(0)
-if PYVERSION in ((3, 12), (3, 13)):
+if PYVERSION in ((3, 12), (3, 13), (3, 14)):
     @register_jitable
     def _kind_to_byte_width(kind):

numba_cuda/numba/cuda/cpython/unicode_support.py CHANGED Viewed

@@ -125,9 +125,7 @@ def _gettyperecord_impl(typingctx, codepoint):
         byref = [upper, lower, title, decimal, digit, flags]
         builder.call(fn, [args[0]] + byref)
-        buf = []
-        for x in byref:
-            buf.append(builder.load(x))
+        buf = list(map(builder.load, byref))
         res = context.make_tuple(builder, signature.return_type, tuple(buf))
         return impl_ret_untracked(context, builder, signature.return_type, res)

numba_cuda/numba/cuda/cudadrv/devicearray.py CHANGED Viewed

@@ -852,10 +852,10 @@ def array_core(ary):
     """
     if not ary.strides or not ary.size:
         return ary
-    core_index = []
-    for stride in ary.strides:
-        core_index.append(0 if stride == 0 else slice(None))
-    return ary[tuple(core_index)]
+    core_index = tuple(
+        0 if stride == 0 else slice(None) for stride in ary.strides
+    )
+    return ary[core_index]
 def is_contiguous(ary):

numba_cuda/numba/cuda/cudadrv/driver.py CHANGED Viewed

@@ -54,18 +54,16 @@ from numba.cuda.utils import cached_file_read
 from numba.cuda.cudadrv import enums, drvapi, nvrtc
 from cuda.bindings import driver as binding
-from cuda.core.experimental import (
+from numba.cuda._compat import (
     Linker,
     LinkerOptions,
     ObjectCode,
-)
-from cuda.bindings.utils import get_cuda_native_handle
-from cuda.core.experimental import (
     Stream as ExperimentalStream,
     Device as ExperimentalDevice,
 )
+from cuda.bindings.utils import get_cuda_native_handle
 # There is no definition of the default stream in the Nvidia bindings (nor
 # is there at the C/C++ level), so we define it here so we don't need to
@@ -184,7 +182,7 @@ def load_driver(dlloader, candidates):
     for path in candidates:
         try:
             dll = dlloader(path)
-        except OSError as e:
+        except OSError as e:  # noqa: PERF203
             # Problem opening the DLL
             path_not_exist.append(not os.path.isfile(path))
             driver_load_error.append(e)
@@ -375,10 +373,10 @@ class Driver(object):
             return getattr(self.lib, fname)
         for variant in variants:
-            try:
-                return getattr(self.lib, f"{fname}{variant}")
-            except AttributeError:
-                pass
+            if (
+                value := getattr(self.lib, f"{fname}{variant}", None)
+            ) is not None:
+                return value
         # Not found.
         # Delay missing function error to use
@@ -2305,7 +2303,11 @@ class _Linker:
         lto=None,
         additional_flags=None,
     ):
-        arch = f"sm_{cc[0]}{cc[1]}"
+        if len(cc) == 3:
+            arch = f"sm_{cc[0]}{cc[1]}{cc[2]}"
+        else:
+            arch = f"sm_{cc[0]}{cc[1]}"
         self.max_registers = max_registers if max_registers else None
         self.lineinfo = lineinfo
         self.cc = cc

numba_cuda/numba/cuda/cudadrv/nvrtc.py CHANGED Viewed

@@ -12,7 +12,7 @@ import os
 import warnings
 import functools
-from cuda.core.experimental import Program, ProgramOptions
+from numba.cuda._compat import Program, ProgramOptions
 from cuda.bindings import nvrtc as bindings_nvrtc
 NVRTC_EXTRA_SEARCH_PATHS = _readenv(
@@ -30,6 +30,44 @@ def _get_nvrtc_version():
     return (major, minor)
+def _verify_cc_tuple(cc):
+    version = _get_nvrtc_version()
+    ver_str = lambda version: ".".join(str(v) for v in version)
+    if len(cc) == 3:
+        cc, arch = (cc[0], cc[1]), cc[2]
+    else:
+        arch = ""
+    if arch not in ("", "a", "f"):
+        raise ValueError(
+            f"Invalid architecture suffix '{arch}' in compute capability "
+            f"{ver_str(cc)}{arch}. Expected '', 'a', or 'f'."
+        )
+    supported_ccs = get_supported_ccs()
+    try:
+        found = max(filter(lambda v: v <= cc, [v for v in supported_ccs]))
+    except ValueError:
+        raise RuntimeError(
+            f"Device compute capability {ver_str(cc)} is less than the "
+            f"minimum supported by NVRTC {ver_str(version)}. Supported "
+            "compute capabilities are "
+            f"{', '.join([ver_str(v) for v in supported_ccs])}."
+        )
+    if found != cc:
+        found = (found[0], found[1], arch)
+        warnings.warn(
+            f"Device compute capability {ver_str(cc)} is not supported by "
+            f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
+        )
+    else:
+        found = (cc[0], cc[1], arch)
+    return found
 def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
     """
     Compile a CUDA C/C++ source to PTX or LTOIR for a given compute capability.
@@ -38,7 +76,8 @@ def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
     :type src: str
     :param name: The filename of the source (for information only)
     :type name: str
-    :param cc: A tuple ``(major, minor)`` of the compute capability
+    :param cc: A tuple ``(major, minor)`` or ``(major, minor, arch)`` of the
+        compute capability
     :type cc: tuple
     :param ltoir: Compile into LTOIR if True, otherwise into PTX
     :type ltoir: bool
@@ -49,34 +88,18 @@ def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
     :return: The compiled PTX or LTOIR and compilation log
     :rtype: tuple
     """
+    found = _verify_cc_tuple(cc)
     version = _get_nvrtc_version()
-    ver_str = lambda version: ".".join(str(v) for v in version)
-    supported_ccs = get_supported_ccs()
-    try:
-        found = max(filter(lambda v: v <= cc, [v for v in supported_ccs]))
-    except ValueError:
-        raise RuntimeError(
-            f"Device compute capability {ver_str(cc)} is less than the "
-            f"minimum supported by NVRTC {ver_str(version)}. Supported "
-            "compute capabilities are "
-            f"{', '.join([ver_str(v) for v in supported_ccs])}."
-        )
-    if found != cc:
-        warnings.warn(
-            f"Device compute capability {ver_str(cc)} is not supported by "
-            f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
-        )
     # Compilation options:
     # - Compile for the current device's compute capability.
     # - The CUDA include path is added.
     # - Relocatable Device Code (rdc) is needed to prevent device functions
     #   being optimized away.
-    major, minor = found
+    major, minor = found[0], found[1]
+    cc_arch = found[2] if len(found) == 3 else ""
-    arch = f"sm_{major}{minor}"
+    arch = f"sm_{major}{minor}{cc_arch}"
     cuda_include_dir = get_cuda_paths()["include_dir"].info
     cuda_includes = [f"{cuda_include_dir}"]
@@ -156,7 +179,7 @@ def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
     return result, log
-def find_closest_arch(mycc):
+def find_closest_arch(cc):
     """
     Given a compute capability, return the closest compute capability supported
     by the CUDA toolkit.
@@ -166,17 +189,17 @@ def find_closest_arch(mycc):
     """
     supported_ccs = get_supported_ccs()
-    for i, cc in enumerate(supported_ccs):
-        if cc == mycc:
+    for i, supported_cc in enumerate(supported_ccs):
+        if supported_cc == cc:
             # Matches
-            return cc
-        elif cc > mycc:
+            return supported_cc
+        elif supported_cc > cc:
             # Exceeded
             if i == 0:
                 # CC lower than supported
                 msg = (
                     "GPU compute capability %d.%d is not supported"
-                    "(requires >=%d.%d)" % (mycc + cc)
+                    "(requires >=%d.%d)" % (cc + supported_cc)
                 )
                 raise CCSupportError(msg)
             else:
@@ -187,13 +210,29 @@ def find_closest_arch(mycc):
     return supported_ccs[-1]  # Choose the highest
-def get_arch_option(major, minor):
+def get_arch_option(major, minor, arch=""):
     """Matches with the closest architecture option"""
     if config.FORCE_CUDA_CC:
-        arch = config.FORCE_CUDA_CC
+        fcc = config.FORCE_CUDA_CC
+        major, minor = fcc[0], fcc[1]
+        if len(fcc) == 3:
+            arch = fcc[2]
+        else:
+            arch = ""
     else:
-        arch = find_closest_arch((major, minor))
-    return "compute_%d%d" % arch
+        new_major, new_minor = find_closest_arch((major, minor))
+        if (new_major, new_minor) != (major, minor):
+            # If we picked a different major / minor, then using an
+            # arch-specific version is invalid
+            if arch != "":
+                raise ValueError(
+                    f"Can't use arch-specific compute_{major}{minor}{arch} with "
+                    "closest found compute capability "
+                    f"compute_{new_major}{new_minor}"
+                )
+        major, minor = new_major, new_minor
+    return f"compute_{major}{minor}{arch}"
 def get_lowest_supported_cc():

numba_cuda/numba/cuda/debuginfo.py CHANGED Viewed

@@ -646,11 +646,6 @@ class CUDADIBuilder(DIBuilder):
         super().__init__(module, filepath, cgctx, directives_only)
         # Cache for local variable metadata type and line deduplication
         self._vartypelinemap = {}
-        # Variable address space dictionary
-        self._var_addrspace_map = {}
-    def _set_addrspace_map(self, map):
-        self._var_addrspace_map = map
     def _var_type(self, lltype, size, datamodel=None):
         is_bool = False
@@ -826,64 +821,6 @@ class CUDADIBuilder(DIBuilder):
                     is_distinct=True,
                 )
-        # Check if there's actually address space info to handle
-        addrspace = getattr(self, "_addrspace", None)
-        if (
-            isinstance(lltype, ir.LiteralStructType)
-            and datamodel is not None
-            and datamodel.inner_models()
-            and addrspace not in (None, 0)
-        ):
-            # Process struct with datamodel that has address space info
-            meta = []
-            offset = 0
-            for element, field, model in zip(
-                lltype.elements, datamodel._fields, datamodel.inner_models()
-            ):
-                size_field = self.cgctx.get_abi_sizeof(element)
-                if isinstance(element, ir.PointerType) and field == "data":
-                    # Create pointer type with correct address space
-                    pointee_size = self.cgctx.get_abi_sizeof(element.pointee)
-                    pointee_model = getattr(model, "_pointee_model", None)
-                    pointee_type = self._var_type(
-                        element.pointee, pointee_size, datamodel=pointee_model
-                    )
-                    meta_ptr = {
-                        "tag": ir.DIToken("DW_TAG_pointer_type"),
-                        "baseType": pointee_type,
-                        "size": _BYTE_SIZE * size_field,
-                    }
-                    dwarf_addrclass = self.get_dwarf_address_class(addrspace)
-                    if dwarf_addrclass is not None:
-                        meta_ptr["dwarfAddressSpace"] = int(dwarf_addrclass)
-                    basetype = m.add_debug_info("DIDerivedType", meta_ptr)
-                else:
-                    basetype = self._var_type(
-                        element, size_field, datamodel=model
-                    )
-                derived_type = m.add_debug_info(
-                    "DIDerivedType",
-                    {
-                        "tag": ir.DIToken("DW_TAG_member"),
-                        "name": field,
-                        "baseType": basetype,
-                        "size": _BYTE_SIZE * size_field,
-                        "offset": offset,
-                    },
-                )
-                meta.append(derived_type)
-                offset += _BYTE_SIZE * size_field
-            return m.add_debug_info(
-                "DICompositeType",
-                {
-                    "tag": ir.DIToken("DW_TAG_structure_type"),
-                    "name": f"{datamodel.fe_type}",
-                    "elements": m.add_metadata(meta),
-                    "size": offset,
-                },
-                is_distinct=True,
-            )
         # For other cases, use upstream Numba implementation
         return super()._var_type(lltype, size, datamodel=datamodel)
@@ -936,22 +873,16 @@ class CUDADIBuilder(DIBuilder):
                 # to llvm.dbg.value
                 return
             else:
-                # Look up address space for this variable
-                self._addrspace = self._var_addrspace_map.get(name)
-                try:
-                    return super().mark_variable(
-                        builder,
-                        allocavalue,
-                        name,
-                        lltype,
-                        size,
-                        line,
-                        datamodel,
-                        argidx,
-                    )
-                finally:
-                    # Clean up address space info
-                    self._addrspace = None
+                return super().mark_variable(
+                    builder,
+                    allocavalue,
+                    name,
+                    lltype,
+                    size,
+                    line,
+                    datamodel,
+                    argidx,
+                )
     def update_variable(
         self,

numba_cuda/numba/cuda/deviceufunc.py CHANGED Viewed

@@ -682,12 +682,9 @@ class GUFuncEngine(object):
             inner_shapes.append(inner_shape)
         # solve output shape
-        oshapes = []
-        for outsig in self.sout:
-            oshape = []
-            for sym in outsig:
-                oshape.append(symbolmap[sym])
-            oshapes.append(tuple(oshape))
+        oshapes = [
+            tuple(map(symbolmap.__getitem__, outsig)) for outsig in self.sout
+        ]
         # find the biggest outershape as looping dimension
         sizes = [reduce(operator.mul, s, 1) for s in outer_shapes]

numba_cuda/numba/cuda/dispatcher.py CHANGED Viewed

@@ -15,7 +15,7 @@ import uuid
 import re
 from warnings import warn
-from cuda.core.experimental import launch
+from numba.cuda._compat import launch, LaunchConfig
 from numba.cuda.core import errors
 from numba.cuda import serialize, utils
@@ -41,7 +41,7 @@ from numba.cuda.compiler import (
 from numba.cuda.core import sigutils, config, entrypoints
 from numba.cuda.flags import Flags
 from numba.cuda.cudadrv import driver, nvvm
-from cuda.core.experimental import LaunchConfig
 from numba.cuda.locks import module_init_lock
 from numba.cuda.core.caching import Cache, CacheImpl, NullCache
 from numba.cuda.descriptor import cuda_target
@@ -858,7 +858,7 @@ class _DispatcherBase(_dispatcher.Dispatcher):
             for cres in overloads.values():
                 try:
                     targetctx.remove_user_function(cres.entry_point)
-                except KeyError:
+                except KeyError:  # noqa: PERF203
                     pass
         return finalizer
@@ -1626,21 +1626,7 @@ class CUDADispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase):
     def typeof_pyval(self, val):
         # Based on _DispatcherBase.typeof_pyval, but differs from it to support
         # the CUDA Array Interface.
-        try:
-            return typeof(val, Purpose.argument)
-        except ValueError:
-            if (
-                interface := getattr(val, "__cuda_array_interface__")
-            ) is not None:
-                # When typing, we don't need to synchronize on the array's
-                # stream - this is done when the kernel is launched.
-                return typeof(
-                    cuda.from_cuda_array_interface(interface, sync=False),
-                    Purpose.argument,
-                )
-            else:
-                raise
+        return typeof(val, Purpose.argument)
     def specialize(self, *args):
         """
@@ -2104,7 +2090,7 @@ class CUDADispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase):
         if file is None:
             file = sys.stdout
-        for _, defn in self.overloads.items():
+        for defn in self.overloads.values():
             defn.inspect_types(file=file)
     @classmethod

numba_cuda/numba/cuda/libdeviceimpl.py CHANGED Viewed

@@ -69,8 +69,7 @@ def libdevice_implement_multiple_returns(func, retty, prototype_args):
         tuple_args = []
         if retty != types.void:
             tuple_args.append(ret)
-        for arg in virtual_args:
-            tuple_args.append(builder.load(arg))
+        tuple_args.extend(map(builder.load, virtual_args))
         if isinstance(nb_retty, types.UniTuple):
             return cgutils.pack_array(builder, tuple_args)