PyPI - numba-cuda - Versions diffs - 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

numba-cuda 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/compiler.py +85 -8
numba_cuda/numba/cuda/cudadecl.py +6 -2
numba_cuda/numba/cuda/cudadrv/linkable_code.py +13 -9
numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -1
numba_cuda/numba/cuda/debuginfo.py +44 -0
numba_cuda/numba/cuda/decorators.py +9 -2
numba_cuda/numba/cuda/dispatcher.py +62 -4
numba_cuda/numba/cuda/target.py +4 -134
numba_cuda/numba/cuda/testing.py +11 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +81 -0
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +270 -11
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +10 -7
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +4 -1
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/METADATA +20 -2
{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/RECORD +20 -19
{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/WHEEL +1 -1
{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/LICENSE +0 -0
{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/top_level.txt +0 -0

numba_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.4.0
1	+ 0.6.0

numba_cuda/numba/cuda/compiler.py CHANGED Viewed

@@ -1,14 +1,17 @@
 from llvmlite import ir
 from numba.core.typing.templates import ConcreteTemplate
+from numba.core import ir as numba_ir
 from numba.core import (cgutils, types, typing, funcdesc, config, compiler,
                         sigutils, utils)
 from numba.core.compiler import (sanitize_compile_result_entries, CompilerBase,
                                  DefaultPassBuilder, Flags, Option,
                                  CompileResult)
 from numba.core.compiler_lock import global_compiler_lock
-from numba.core.compiler_machinery import (LoweringPass,
+from numba.core.compiler_machinery import (FunctionPass, LoweringPass,
                                            PassManager, register_pass)
+from numba.core.interpreter import Interpreter
 from numba.core.errors import NumbaInvalidConfigWarning
+from numba.core.untyped_passes import TranslateByteCode
 from numba.core.typed_passes import (IRLegalization, NativeLowering,
                                      AnnotateTypes)
 from warnings import warn
@@ -143,13 +146,74 @@ class CreateLibrary(LoweringPass):
         return True
+class CUDABytecodeInterpreter(Interpreter):
+    # Based on the superclass implementation, but names the resulting variable
+    # "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
+    # https://github.com/numba/numba/pull/9888
+    #
+    # This can be removed once that PR is available in an upstream Numba
+    # release.
+    def _op_JUMP_IF(self, inst, pred, iftrue):
+        brs = {
+            True: inst.get_jump_target(),
+            False: inst.next,
+        }
+        truebr = brs[iftrue]
+        falsebr = brs[not iftrue]
+        name = "$bool%s" % (inst.offset)
+        gv_fn = numba_ir.Global("bool", bool, loc=self.loc)
+        self.store(value=gv_fn, name=name)
+        callres = numba_ir.Expr.call(self.get(name), (self.get(pred),), (),
+                                     loc=self.loc)
+        pname = "$%spred" % (inst.offset)
+        predicate = self.store(value=callres, name=pname)
+        bra = numba_ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr,
+                              loc=self.loc)
+        self.current_block.append(bra)
+@register_pass(mutates_CFG=True, analysis_only=False)
+class CUDATranslateBytecode(FunctionPass):
+    _name = "cuda_translate_bytecode"
+    def __init__(self):
+        FunctionPass.__init__(self)
+    def run_pass(self, state):
+        func_id = state['func_id']
+        bc = state['bc']
+        interp = CUDABytecodeInterpreter(func_id)
+        func_ir = interp.interpret(bc)
+        state['func_ir'] = func_ir
+        return True
 class CUDACompiler(CompilerBase):
     def define_pipelines(self):
         dpb = DefaultPassBuilder
         pm = PassManager('cuda')
         untyped_passes = dpb.define_untyped_pipeline(self.state)
-        pm.passes.extend(untyped_passes.passes)
+        # Rather than replicating the whole untyped passes definition in
+        # numba-cuda, it seems cleaner to take the pass list and replace the
+        # TranslateBytecode pass with our own.
+        def replace_translate_pass(implementation, description):
+            if implementation is TranslateByteCode:
+                return (CUDATranslateBytecode, description)
+            else:
+                return (implementation, description)
+        cuda_untyped_passes = [
+            replace_translate_pass(implementation, description)
+            for implementation, description in untyped_passes.passes
+        ]
+        pm.passes.extend(cuda_untyped_passes)
         typed_passes = dpb.define_typed_pipeline(self.state)
         pm.passes.extend(typed_passes.passes)
@@ -352,6 +416,18 @@ def kernel_fixup(kernel, debug):
     kernel.return_value = ir.ReturnValue(kernel, ir.VoidType())
     kernel.args = kernel.args[1:]
+    # If debug metadata is present, remove the return value from it
+    if kernel_metadata := getattr(kernel, 'metadata', None):
+        if dbg_metadata := kernel_metadata.get('dbg', None):
+            for name, value in dbg_metadata.operands:
+                if name == "type":
+                    type_metadata = value
+                    for tm_name, tm_value in type_metadata.operands:
+                        if tm_name == 'types':
+                            types = tm_value
+                            types.operands = types.operands[1:]
     # Mark as a kernel for NVVM
     nvvm.set_cuda_kernel(kernel)
@@ -570,16 +646,16 @@ def compile_ptx_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
                        abi=abi, abi_info=abi_info)
-def declare_device_function(name, restype, argtypes):
-    return declare_device_function_template(name, restype, argtypes).key
+def declare_device_function(name, restype, argtypes, link):
+    return declare_device_function_template(name, restype, argtypes, link).key
-def declare_device_function_template(name, restype, argtypes):
+def declare_device_function_template(name, restype, argtypes, link):
     from .descriptor import cuda_target
     typingctx = cuda_target.typing_context
     targetctx = cuda_target.target_context
     sig = typing.signature(restype, *argtypes)
-    extfn = ExternFunction(name, sig)
+    extfn = ExternFunction(name, sig, link)
     class device_function_template(ConcreteTemplate):
         key = extfn
@@ -593,7 +669,8 @@ def declare_device_function_template(name, restype, argtypes):
     return device_function_template
-class ExternFunction(object):
-    def __init__(self, name, sig):
+class ExternFunction:
+    def __init__(self, name, sig, link):
         self.name = name
         self.sig = sig
+        self.link = link

numba_cuda/numba/cuda/cudadecl.py CHANGED Viewed

@@ -403,16 +403,20 @@ _genfp16_binary_operator(operator.itruediv)
 def _resolve_wrapped_unary(fname):
+    link = tuple()
     decl = declare_device_function_template(f'__numba_wrapper_{fname}',
                                             types.float16,
-                                            (types.float16,))
+                                            (types.float16,),
+                                            link)
     return types.Function(decl)
 def _resolve_wrapped_binary(fname):
+    link = tuple()
     decl = declare_device_function_template(f'__numba_wrapper_{fname}',
                                             types.float16,
-                                            (types.float16, types.float16,))
+                                            (types.float16, types.float16,),
+                                            link)
     return types.Function(decl)

numba_cuda/numba/cuda/cudadrv/linkable_code.py CHANGED Viewed

@@ -2,8 +2,12 @@ from .mappings import FILE_EXTENSION_MAP
 class LinkableCode:
-    """An object that can be passed in the `link` list argument to `@cuda.jit`
-    kernels to supply code to be linked from memory."""
+    """An object that holds code to be linked from memory.
+    :param data: A buffer containing the data to link.
+    :param name: The name of the file to be referenced in any compilation or
+                 linking errors that may be produced.
+    """
     def __init__(self, data, name=None):
         self.data = data
@@ -15,49 +19,49 @@ class LinkableCode:
 class PTXSource(LinkableCode):
-    """PTX Source code in memory"""
+    """PTX source code in memory."""
     kind = FILE_EXTENSION_MAP["ptx"]
     default_name = "<unnamed-ptx>"
 class CUSource(LinkableCode):
-    """CUDA C/C++ Source code in memory"""
+    """CUDA C/C++ source code in memory."""
     kind = "cu"
     default_name = "<unnamed-cu>"
 class Fatbin(LinkableCode):
-    """A fatbin ELF in memory"""
+    """An ELF Fatbin in memory."""
     kind = FILE_EXTENSION_MAP["fatbin"]
     default_name = "<unnamed-fatbin>"
 class Cubin(LinkableCode):
-    """A cubin ELF in memory"""
+    """An ELF Cubin in memory."""
     kind = FILE_EXTENSION_MAP["cubin"]
     default_name = "<unnamed-cubin>"
 class Archive(LinkableCode):
-    """An archive of objects in memory"""
+    """An archive of objects in memory."""
     kind = FILE_EXTENSION_MAP["a"]
     default_name = "<unnamed-archive>"
 class Object(LinkableCode):
-    """An object file in memory"""
+    """An object file in memory."""
     kind = FILE_EXTENSION_MAP["o"]
     default_name = "<unnamed-object>"
 class LTOIR(LinkableCode):
-    """An LTOIR file in memory"""
+    """An LTOIR file in memory."""
     kind = "ltoir"
     default_name = "<unnamed-ltoir>"

numba_cuda/numba/cuda/cudadrv/nvvm.py CHANGED Viewed

@@ -314,7 +314,9 @@ COMPUTE_CAPABILITIES = (
     (6, 0), (6, 1), (6, 2),
     (7, 0), (7, 2), (7, 5),
     (8, 0), (8, 6), (8, 7), (8, 9),
-    (9, 0)
+    (9, 0),
+    (10, 0), (10, 1),
+    (12, 0),
 )
 # Maps CTK version -> (min supported cc, max supported cc) inclusive
@@ -331,6 +333,9 @@ CTK_SUPPORTED = {
     (12, 2): ((5, 0), (9, 0)),
     (12, 3): ((5, 0), (9, 0)),
     (12, 4): ((5, 0), (9, 0)),
+    (12, 5): ((5, 0), (9, 0)),
+    (12, 6): ((5, 0), (9, 0)),
+    (12, 8): ((5, 0), (12, 0)),
 }

numba_cuda/numba/cuda/debuginfo.py ADDED Viewed

@@ -0,0 +1,44 @@
+from llvmlite import ir
+from numba.core import types
+from numba.core.debuginfo import DIBuilder
+from numba.cuda.types import GridGroup
+_BYTE_SIZE = 8
+class CUDADIBuilder(DIBuilder):
+    def _var_type(self, lltype, size, datamodel=None):
+        is_bool = False
+        is_grid_group = False
+        if isinstance(lltype, ir.IntType):
+            if datamodel is None:
+                if size == 1:
+                    name = str(lltype)
+                    is_bool = True
+            else:
+                name = str(datamodel.fe_type)
+                if isinstance(datamodel.fe_type, types.Boolean):
+                    is_bool = True
+                elif isinstance(datamodel.fe_type, GridGroup):
+                    is_grid_group = True
+        if is_bool or is_grid_group:
+            m = self.module
+            bitsize = _BYTE_SIZE * size
+            # Boolean type workaround until upstream Numba is fixed
+            if is_bool:
+                ditok = "DW_ATE_boolean"
+            # GridGroup type should use numba.cuda implementation
+            elif is_grid_group:
+                ditok = "DW_ATE_unsigned"
+            return m.add_debug_info('DIBasicType', {
+                'name': name,
+                'size': bitsize,
+                'encoding': ir.DIToken(ditok),
+            })
+        # For other cases, use upstream Numba implementation
+        return super()._var_type(lltype, size, datamodel=datamodel)

numba_cuda/numba/cuda/decorators.py CHANGED Viewed

@@ -173,7 +173,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
                 return disp
-def declare_device(name, sig):
+def declare_device(name, sig, link=None):
     """
     Declare the signature of a foreign function. Returns a descriptor that can
     be used to call the function from a Python kernel.
@@ -181,10 +181,17 @@ def declare_device(name, sig):
     :param name: The name of the foreign function.
     :type name: str
     :param sig: The Numba signature of the function.
+    :param link: External code to link when calling the function.
     """
+    if link is None:
+        link = tuple()
+    else:
+        if not isinstance(link, (list, tuple, set)):
+            link = (link,)
     argtypes, restype = sigutils.normalize_signature(sig)
     if restype is None:
         msg = 'Return type must be provided for device declarations'
         raise TypeError(msg)
-    return declare_device_function(name, restype, argtypes)
+    return declare_device_function(name, restype, argtypes, link)

numba_cuda/numba/cuda/dispatcher.py CHANGED Viewed

@@ -4,17 +4,19 @@ import re
 import sys
 import ctypes
 import functools
+from collections import defaultdict
-from numba.core import config, serialize, sigutils, types, typing, utils
+from numba.core import config, ir, serialize, sigutils, types, typing, utils
 from numba.core.caching import Cache, CacheImpl
 from numba.core.compiler_lock import global_compiler_lock
 from numba.core.dispatcher import Dispatcher
 from numba.core.errors import NumbaPerformanceWarning
 from numba.core.typing.typeof import Purpose, typeof
+from numba.core.types.functions import Function
 from numba.cuda.api import get_current_device
 from numba.cuda.args import wrap_arg
-from numba.cuda.compiler import compile_cuda, CUDACompiler, kernel_fixup
+from numba.cuda.compiler import (compile_cuda, CUDACompiler, kernel_fixup,
+                                 ExternFunction)
 from numba.cuda.cudadrv import driver
 from numba.cuda.cudadrv.devices import get_context
 from numba.cuda.descriptor import cuda_target
@@ -41,6 +43,55 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
 reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
+def get_cres_link_objects(cres):
+    """Given a compile result, return a set of all linkable code objects that
+    are required for it to be fully linked."""
+    link_objects = set()
+    # List of calls into declared device functions
+    device_func_calls = [
+        (name, v) for name, v in cres.fndesc.typemap.items() if (
+            isinstance(v, cuda_types.CUDADispatcher)
+        )
+    ]
+    # List of tuples with SSA name of calls and corresponding signature
+    call_signatures = [
+        (call.func.name, sig)
+        for call, sig in cres.fndesc.calltypes.items() if (
+            isinstance(call, ir.Expr) and call.op == 'call'
+        )
+    ]
+    # Map SSA names to all invoked signatures
+    call_signature_d = defaultdict(list)
+    for name, sig in call_signatures:
+        call_signature_d[name].append(sig)
+    # Add the link objects from the current function's callees
+    for name, v in device_func_calls:
+        for sig in call_signature_d.get(name, []):
+            called_cres = v.dispatcher.overloads[sig.args]
+            called_link_objects = get_cres_link_objects(called_cres)
+            link_objects.update(called_link_objects)
+    # From this point onwards, we are only interested in ExternFunction
+    # declarations - these are the calls made directly in this function to
+    # them.
+    for name, v in cres.fndesc.typemap.items():
+        if not isinstance(v, Function):
+            continue
+        if not isinstance(v.typing_key, ExternFunction):
+            continue
+        for obj in v.typing_key.link:
+            link_objects.add(obj)
+    return link_objects
 class _Kernel(serialize.ReduceMixin):
     '''
     CUDA Kernel specialized for a given set of argument types. When called, this
@@ -158,6 +209,9 @@ class _Kernel(serialize.ReduceMixin):
         self.maybe_link_nrt(link, tgt_ctx, asm)
+        for obj in get_cres_link_objects(cres):
+            lib.add_linking_file(obj)
         for filepath in link:
             lib.add_linking_file(filepath)
@@ -256,7 +310,11 @@ class _Kernel(serialize.ReduceMixin):
         """
         cufunc = self._codelibrary.get_cufunc()
-        if hasattr(self, "target_context") and self.target_context.enable_nrt:
+        if (
+            hasattr(self, "target_context")
+            and self.target_context.enable_nrt
+            and config.CUDA_NRT_STATS
+        ):
             rtsys.ensure_initialized()
             rtsys.set_memsys_to_module(cufunc.module)
             # We don't know which stream the kernel will be launched on, so

numba_cuda/numba/cuda/target.py CHANGED Viewed

@@ -3,8 +3,7 @@ from functools import cached_property
 import llvmlite.binding as ll
 from llvmlite import ir
-from numba.core import (cgutils, config, debuginfo, itanium_mangler, types,
-                        typing, utils)
+from numba.core import cgutils, config, itanium_mangler, types, typing
 from numba.core.dispatcher import Dispatcher
 from numba.core.base import BaseContext
 from numba.core.callconv import BaseCallConv, MinimalCallConv
@@ -12,7 +11,8 @@ from numba.core.typing import cmathdecl
 from numba.core import datamodel
 from .cudadrv import nvvm
-from numba.cuda import codegen, nvvmutils, ufuncs
+from numba.cuda import codegen, ufuncs
+from numba.cuda.debuginfo import CUDADIBuilder
 from numba.cuda.models import cuda_data_manager
 # -----------------------------------------------------------------------------
@@ -80,7 +80,7 @@ class CUDATargetContext(BaseContext):
     @property
     def DIBuilder(self):
-        return debuginfo.DIBuilder
+        return CUDADIBuilder
     @property
     def enable_boundscheck(self):
@@ -150,136 +150,6 @@ class CUDATargetContext(BaseContext):
         return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
                                       uid=uid)
-    def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
-                            nvvm_options, filename, linenum,
-                            max_registers=None, lto=False):
-        """
-        Adapt a code library ``codelib`` with the numba compiled CUDA kernel
-        with name ``fname`` and arguments ``argtypes`` for NVVM.
-        A new library is created with a wrapper function that can be used as
-        the kernel entry point for the given kernel.
-        Returns the new code library and the wrapper function.
-        Parameters:
-        codelib:       The CodeLibrary containing the device function to wrap
-                       in a kernel call.
-        fndesc:        The FunctionDescriptor of the source function.
-        debug:         Whether to compile with debug.
-        lineinfo:      Whether to emit line info.
-        nvvm_options:  Dict of NVVM options used when compiling the new library.
-        filename:      The source filename that the function is contained in.
-        linenum:       The source line that the function is on.
-        max_registers: The max_registers argument for the code library.
-        """
-        kernel_name = itanium_mangler.prepend_namespace(
-            fndesc.llvm_func_name, ns='cudapy',
-        )
-        library = self.codegen().create_library(f'{codelib.name}_kernel_',
-                                                entry_name=kernel_name,
-                                                nvvm_options=nvvm_options,
-                                                max_registers=max_registers,
-                                                lto=lto
-                                                )
-        library.add_linking_library(codelib)
-        wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
-                                               debug, lineinfo, filename,
-                                               linenum)
-        return library, wrapper
-    def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
-                                lineinfo, filename, linenum):
-        """
-        Generate the kernel wrapper in the given ``library``.
-        The function being wrapped is described by ``fndesc``.
-        The wrapper function is returned.
-        """
-        argtypes = fndesc.argtypes
-        arginfo = self.get_arg_packer(argtypes)
-        argtys = list(arginfo.argument_types)
-        wrapfnty = ir.FunctionType(ir.VoidType(), argtys)
-        wrapper_module = self.create_module("cuda.kernel.wrapper")
-        fnty = ir.FunctionType(ir.IntType(32),
-                               [self.call_conv.get_return_type(types.pyobject)]
-                               + argtys)
-        func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)
-        prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
-        wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
-        builder = ir.IRBuilder(wrapfn.append_basic_block(''))
-        if debug or lineinfo:
-            directives_only = lineinfo and not debug
-            debuginfo = self.DIBuilder(module=wrapper_module,
-                                       filepath=filename,
-                                       cgctx=self,
-                                       directives_only=directives_only)
-            debuginfo.mark_subprogram(
-                wrapfn, kernel_name, fndesc.args, argtypes, linenum,
-            )
-            debuginfo.mark_location(builder, linenum)
-        # Define error handling variable
-        def define_error_gv(postfix):
-            name = wrapfn.name + postfix
-            gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32),
-                                             name)
-            gv.initializer = ir.Constant(gv.type.pointee, None)
-            return gv
-        gv_exc = define_error_gv("__errcode__")
-        gv_tid = []
-        gv_ctaid = []
-        for i in 'xyz':
-            gv_tid.append(define_error_gv("__tid%s__" % i))
-            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))
-        callargs = arginfo.from_arguments(builder, wrapfn.args)
-        status, _ = self.call_conv.call_function(
-            builder, func, types.void, argtypes, callargs)
-        if debug:
-            # Check error status
-            with cgutils.if_likely(builder, status.is_ok):
-                builder.ret_void()
-            with builder.if_then(builder.not_(status.is_python_exc)):
-                # User exception raised
-                old = ir.Constant(gv_exc.type.pointee, None)
-                # Use atomic cmpxchg to prevent rewriting the error status
-                # Only the first error is recorded
-                xchg = builder.cmpxchg(gv_exc, old, status.code,
-                                       'monotonic', 'monotonic')
-                changed = builder.extract_value(xchg, 1)
-                # If the xchange is successful, save the thread ID.
-                sreg = nvvmutils.SRegBuilder(builder)
-                with builder.if_then(changed):
-                    for dim, ptr, in zip("xyz", gv_tid):
-                        val = sreg.tid(dim)
-                        builder.store(val, ptr)
-                    for dim, ptr, in zip("xyz", gv_ctaid):
-                        val = sreg.ctaid(dim)
-                        builder.store(val, ptr)
-        builder.ret_void()
-        nvvm.set_cuda_kernel(wrapfn)
-        library.add_ir_module(wrapper_module)
-        if debug or lineinfo:
-            debuginfo.finalize()
-        library.finalize()
-        if config.DUMP_LLVM:
-            utils.dump_llvm(fndesc, wrapper_module)
-        return library.get_function(wrapfn.name)
     def make_constant_array(self, builder, aryty, arr):
         """
         Unlike the parent version.  This returns a a pointer in the constant

numba_cuda/numba/cuda/testing.py CHANGED Viewed

@@ -115,12 +115,22 @@ def skip_on_arm(reason):
 def skip_if_cuda_includes_missing(fn):
     # Skip when cuda.h is not available - generally this should indicate
     # whether the CUDA includes are available or not
-    cuda_h = os.path.join(config.CUDA_INCLUDE_PATH, 'cuda.h')
+    cuda_include_path = libs.get_cuda_include_dir()
+    cuda_h = os.path.join(cuda_include_path, 'cuda.h')
     cuda_h_file = (os.path.exists(cuda_h) and os.path.isfile(cuda_h))
     reason = 'CUDA include dir not available on this system'
     return unittest.skipUnless(cuda_h_file, reason)(fn)
+def skip_if_curand_kernel_missing(fn):
+    cuda_include_path = libs.get_cuda_include_dir()
+    curand_kernel_h = os.path.join(cuda_include_path, 'curand_kernel.h')
+    curand_kernel_h_file = (os.path.exists(curand_kernel_h) and
+                            os.path.isfile(curand_kernel_h))
+    reason = 'curand_kernel.h not available on this system'
+    return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
 def skip_if_mvc_enabled(reason):
     """Skip a test if Minor Version Compatibility is enabled"""
     return unittest.skipIf(config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY,

numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py CHANGED Viewed

@@ -72,6 +72,57 @@ class TestCudaDebugInfo(CUDATestCase):
         def f(x):
             x[0] = 0
+    def test_issue_9888(self):
+        # Compiler created symbol should not be emitted in DILocalVariable
+        # See Numba Issue #9888 https://github.com/numba/numba/pull/9888
+        sig = (types.boolean,)
+        @cuda.jit(sig, debug=True, opt=False)
+        def f(cond):
+            if cond:
+                x = 1  # noqa: F841
+            else:
+                x = 0  # noqa: F841
+        llvm_ir = f.inspect_llvm(sig)
+        # A varible name starting with "bool" in the debug metadata
+        pat = r'!DILocalVariable\(.*name:\s+\"bool'
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNone(match, msg=llvm_ir)
+    def test_bool_type(self):
+        sig = (types.int32, types.int32)
+        @cuda.jit("void(int32, int32)", debug=True, opt=False)
+        def f(x, y):
+            z = x == y  # noqa: F841
+        llvm_ir = f.inspect_llvm(sig)
+        # extract the metadata node id from `type` field of DILocalVariable
+        pat = r'!DILocalVariable\(.*name:\s+"z".*type:\s+!(\d+)'
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        mdnode_id = match.group(1)
+        # verify the DIBasicType has correct encoding attribute DW_ATE_boolean
+        pat = rf'!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean'
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+    def test_grid_group_type(self):
+        sig = (types.int32,)
+        @cuda.jit(sig, debug=True, opt=False)
+        def f(x):
+            grid = cuda.cg.this_grid()  # noqa: F841
+        llvm_ir = f.inspect_llvm(sig)
+        pat = r'!DIBasicType\(.*DW_ATE_unsigned, name: "GridGroup", size: 64'
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
     @unittest.skip("Wrappers no longer exist")
     def test_wrapper_has_debuginfo(self):
         sig = (types.int32[::1],)
@@ -217,6 +268,36 @@ class TestCudaDebugInfo(CUDATestCase):
         three_device_fns(kernel_debug=False, leaf_debug=True)
         three_device_fns(kernel_debug=False, leaf_debug=False)
+    def test_kernel_args_types(self):
+        sig = (types.int32, types.int32)
+        @cuda.jit("void(int32, int32)", debug=True, opt=False)
+        def f(x, y):
+            z = x + y  # noqa: F841
+        llvm_ir = f.inspect_llvm(sig)
+        # extract the metadata node id from `types` field of DISubroutineType
+        pat = r'!DISubroutineType\(types:\s+!(\d+)\)'
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        mdnode_id = match.group(1)
+        # extract the metadata node ids from the flexible node of types
+        pat = rf'!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}'
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        mdnode_id1 = match.group(1)
+        mdnode_id2 = match.group(2)
+        # verify each of the two metadata nodes match expected type
+        pat = rf'!{mdnode_id1}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"'  # noqa: E501
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
+        pat = rf'!{mdnode_id2}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"'  # noqa: E501
+        match = re.compile(pat).search(llvm_ir)
+        self.assertIsNotNone(match, msg=llvm_ir)
 if __name__ == '__main__':
     unittest.main()

numba_cuda/numba/cuda/tests/cudapy/test_device_func.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import re
-import types
+import cffi
 import numpy as np
-from numba.cuda.testing import unittest, skip_on_cudasim, CUDATestCase
-from numba import cuda, jit, float32, int32
+from numba.cuda.testing import (skip_if_curand_kernel_missing, skip_on_cudasim,
+                                test_data_dir, unittest, CUDATestCase)
+from numba import cuda, jit, float32, int32, types
 from numba.core.errors import TypingError
+from numba.tests.support import skip_unless_cffi
+from types import ModuleType
 class TestDeviceFunc(CUDATestCase):
@@ -92,7 +95,7 @@ class TestDeviceFunc(CUDATestCase):
         def add(a, b):
             return a + b
-        mymod = types.ModuleType(name='mymod')
+        mymod = ModuleType(name='mymod')
         mymod.add = add
         del add
@@ -192,31 +195,287 @@ class TestDeviceFunc(CUDATestCase):
         self.assertEqual(0x04010203, x[0])
-    def _test_declare_device(self, decl):
+times2_cu = cuda.CUSource("""
+extern "C" __device__
+int times2(int *out, int a)
+{
+  *out = a * 2;
+  return 0;
+}
+""")
+times3_cu = cuda.CUSource("""
+extern "C" __device__
+int times3(int *out, int a)
+{
+  *out = a * 3;
+  return 0;
+}
+""")
+times4_cu = cuda.CUSource("""
+extern "C" __device__
+int times2(int *out, int a);
+extern "C" __device__
+int times4(int *out, int a)
+{
+  int tmp;
+  times2(&tmp, a);
+  *out = tmp * 2;
+  return 0;
+}
+""")
+jitlink_user_cu = cuda.CUSource("""
+extern "C" __device__
+int array_mutator(void *out, int *a);
+extern "C" __device__
+int use_array_mutator(void *out, int *a) {
+  array_mutator(out, a);
+  return 0;
+}
+""")
+rng_cu = cuda.CUSource("""
+#include <curand_kernel.h>
+extern "C" __device__
+int random_number(unsigned int *out, unsigned long long seed)
+{
+  // Initialize state
+  curandStateXORWOW_t state;
+  unsigned long long sequence = 1;
+  unsigned long long offset = 0;
+  curand_init(seed, sequence, offset, &state);
+  // Generate one random number
+  *out = curand(&state);
+  // Report no exception
+  return 0;
+}""")
+@skip_on_cudasim('External functions unsupported in the simulator')
+class TestDeclareDevice(CUDATestCase):
+    def check_api(self, decl):
         self.assertEqual(decl.name, 'f1')
         self.assertEqual(decl.sig.args, (float32[:],))
         self.assertEqual(decl.sig.return_type, int32)
-    @skip_on_cudasim('cudasim does not check signatures')
     def test_declare_device_signature(self):
         f1 = cuda.declare_device('f1', int32(float32[:]))
-        self._test_declare_device(f1)
+        self.check_api(f1)
-    @skip_on_cudasim('cudasim does not check signatures')
     def test_declare_device_string(self):
         f1 = cuda.declare_device('f1', 'int32(float32[:])')
-        self._test_declare_device(f1)
+        self.check_api(f1)
-    @skip_on_cudasim('cudasim does not check signatures')
     def test_bad_declare_device_tuple(self):
         with self.assertRaisesRegex(TypeError, 'Return type'):
             cuda.declare_device('f1', (float32[:],))
-    @skip_on_cudasim('cudasim does not check signatures')
     def test_bad_declare_device_string(self):
         with self.assertRaisesRegex(TypeError, 'Return type'):
             cuda.declare_device('f1', '(float32[:],)')
+    def test_link_cu_source(self):
+        times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = times2(x[i])
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 2)
+    def _test_link_multiple_sources(self, link_type):
+        link = link_type([times2_cu, times4_cu])
+        times4 = cuda.declare_device('times4', 'int32(int32)', link=link)
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = times4(x[i])
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 4)
+    def test_link_multiple_sources_set(self):
+        self._test_link_multiple_sources(set)
+    def test_link_multiple_sources_tuple(self):
+        self._test_link_multiple_sources(tuple)
+    def test_link_multiple_sources_list(self):
+        self._test_link_multiple_sources(list)
+    @skip_unless_cffi
+    def test_link_sources_in_memory_and_on_disk(self):
+        jitlink_cu = str(test_data_dir / "jitlink.cu")
+        link = [jitlink_cu, jitlink_user_cu]
+        sig = types.void(types.CPointer(types.int32))
+        ext_fn = cuda.declare_device("use_array_mutator", sig, link=link)
+        ffi = cffi.FFI()
+        @cuda.jit
+        def kernel(x):
+            ptr = ffi.from_buffer(x)
+            ext_fn(ptr)
+        x = np.arange(2, dtype=np.int32)
+        kernel[1, 1](x)
+        expected = np.ones(2, dtype=np.int32)
+        np.testing.assert_equal(x, expected)
+    @skip_if_curand_kernel_missing
+    def test_include_cuda_header(self):
+        sig = types.int32(types.uint64)
+        link = [rng_cu]
+        random_number = cuda.declare_device("random_number", sig, link=link)
+        @cuda.jit
+        def kernel(x, seed):
+            x[0] = random_number(seed)
+        x = np.zeros(1, dtype=np.uint32)
+        kernel[1, 1](x, 1)
+        np.testing.assert_equal(x[0], 323845807)
+    def test_declared_in_called_function(self):
+        times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
+        @cuda.jit
+        def device_func(x):
+            return times2(x)
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = device_func(x[i])
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 2)
+    def test_declared_in_called_function_twice(self):
+        times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
+        @cuda.jit
+        def device_func_1(x):
+            return times2(x)
+        @cuda.jit
+        def device_func_2(x):
+            return device_func_1(x)
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = device_func_2(x[i])
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 2)
+    def test_declared_in_called_function_two_calls(self):
+        times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
+        @cuda.jit
+        def device_func(x):
+            return times2(x)
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = device_func(x[i]) + device_func(x[i] + i)
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 6)
+    def test_call_declared_function_twice(self):
+        times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = times2(x[i]) + times2(x[i] + i)
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 6)
+    def test_declared_in_called_function_and_parent(self):
+        times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
+        @cuda.jit
+        def device_func(x):
+            return times2(x)
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = device_func(x[i]) + times2(x[i])
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 4)
+    def test_call_two_different_declared_functions(self):
+        times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
+        times3 = cuda.declare_device('times3', 'int32(int32)', link=times3_cu)
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = times2(x[i]) + times3(x[i])
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 5)
 if __name__ == '__main__':
     unittest.main()

numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py CHANGED Viewed

@@ -15,16 +15,18 @@ class TestFFI(CUDATestCase):
         import numpy as np
         import os
-        # Declaration of the foreign function
-        mul = cuda.declare_device('mul_f32_f32', 'float32(float32, float32)')
         # Path to the source containing the foreign function
         # (here assumed to be in a subdirectory called "ffi")
         basedir = os.path.dirname(os.path.abspath(__file__))
         functions_cu = os.path.join(basedir, 'ffi', 'functions.cu')
-        # Kernel that links in functions.cu and calls mul
-        @cuda.jit(link=[functions_cu])
+        # Declaration of the foreign function
+        mul = cuda.declare_device('mul_f32_f32', 'float32(float32, float32)',
+                                  link=functions_cu)
+        # A kernel that calls mul; functions.cu is linked automatically due to
+        # the call to mul.
+        @cuda.jit
         def multiply_vectors(r, x, y):
             i = cuda.grid(1)
@@ -54,14 +56,15 @@ class TestFFI(CUDATestCase):
         # magictoken.ex_from_buffer_decl.begin
         signature = 'float32(CPointer(float32), int32)'
-        sum_reduce = cuda.declare_device('sum_reduce', signature)
+        sum_reduce = cuda.declare_device('sum_reduce', signature,
+                                         link=functions_cu)
         # magictoken.ex_from_buffer_decl.end
         # magictoken.ex_from_buffer_kernel.begin
         import cffi
         ffi = cffi.FFI()
-        @cuda.jit(link=[functions_cu])
+        @cuda.jit
         def reduction_caller(result, array):
             array_ptr = ffi.from_buffer(array)
             result[()] = sum_reduce(array_ptr, len(array))

numba_cuda/numba/cuda/tests/nrt/test_nrt.py CHANGED Viewed

@@ -171,7 +171,10 @@ class TestNrtStatistics(CUDATestCase):
             arr = cuda_arange(5 * tmp[0]) # noqa: F841
             return None
-        with override_config('CUDA_ENABLE_NRT', True):
+        with (
+            override_config('CUDA_ENABLE_NRT', True),
+            override_config('CUDA_NRT_STATS', True)
+        ):
             # Switch on stats
             rtsys.memsys_enable_stats()
             # check the stats are on

numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py CHANGED Viewed

@@ -18,7 +18,10 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
         super(TestNrtRefCt, self).tearDown()
     def run(self, result=None):
-        with override_config("CUDA_ENABLE_NRT", True):
+        with (
+            override_config("CUDA_ENABLE_NRT", True),
+            override_config('CUDA_NRT_STATS', True)
+        ):
             super(TestNrtRefCt, self).run(result)
     def test_no_return(self):

{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: numba-cuda
-Version: 0.4.0
+Version: 0.6.0
 Summary: CUDA target for Numba
 Author: Anaconda Inc., NVIDIA Corporation
 License: BSD 2-clause
@@ -27,7 +27,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
 To raise questions or initiate discussions, please use the [Numba Discourse
 forum](https://numba.discourse.group).
-## Building from source
+## Installation with pip
+```shell
+pip install numba-cuda
+```
+## Installation with Conda
+```shell
+conda install -c conda-forge numba-cuda
+```
+## Installation from source
 Install as an editable install:
@@ -53,3 +65,9 @@ which will show a path like:
 ```
 <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
 ```
+## Contributing Guide
+Review the
+[CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
+file for information on how to contribute code and issues to the project.

{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
 _numba_cuda_redirector.py,sha256=QKJmYICSQvjvph0Zw9OW015MsuKxIF28GPFjR35AXLM,2681
-numba_cuda/VERSION,sha256=QLjrQACpE6d5EJBTXykdPTaYdBYqie88nj1OiHobnnk,6
+numba_cuda/VERSION,sha256=l6XW5UCmEg0Jw53bZn4Ojiusf8wv_vgTuC4I_WA2W84,6
 numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
 numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
 numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
@@ -9,19 +9,20 @@ numba_cuda/numba/cuda/api_util.py,sha256=aQfUV2-4RM_oGVvckMjbMr5e3effOQNX04v1T0O
 numba_cuda/numba/cuda/args.py,sha256=HloHkw_PQal2DT-I70Xf_XbnGObS1jiUgcRrQ85Gq28,1978
 numba_cuda/numba/cuda/cg.py,sha256=9V1uZqyGOJX1aFd9c6GAPbLSqq83lE8LoP-vxxrKENY,1490
 numba_cuda/numba/cuda/codegen.py,sha256=ghdYBKZ3Mzk2UlLE64HkrAjb60PN9fibSNkWFRQuj4M,13184
-numba_cuda/numba/cuda/compiler.py,sha256=_0qfSjnLnF29B-t8NQRJt4FBUIKxZJE6xN47_G7oRio,21339
+numba_cuda/numba/cuda/compiler.py,sha256=aWP_aunOOw8RZsTKf-S3YdH5MDkY6kLN5Xr5B2XgOfk,24214
 numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=iv84_F6Q9kFjV_kclrQz1msh6Dud8mI3qNkswTid7Qc,953
 numba_cuda/numba/cuda/cuda_fp16.h,sha256=1IC0mdNdkvKbvAe0-f4uYVS7WFrVqOyI1nRUbBiqr6A,126844
 numba_cuda/numba/cuda/cuda_fp16.hpp,sha256=vJ7NUr2X2tKhAP7ojydAiCoOjVO6n4QGoXD6m9Srrlw,89130
 numba_cuda/numba/cuda/cuda_paths.py,sha256=C0gA72QLWUMfvXkFpw1WqqaFqfsQ7HM72hQVXG0A7RU,10023
-numba_cuda/numba/cuda/cudadecl.py,sha256=ZUssRdTvS4sVwvJWTmaRTvrMXMbkPZ_qVp8JMXoXFoc,23300
+numba_cuda/numba/cuda/cudadecl.py,sha256=6h_Je6cXmfr4VjBowkr-OOGlsXei-QqGlcjU4Yv-m-4,23438
 numba_cuda/numba/cuda/cudaimpl.py,sha256=0oHjDwBC4JmfpwS1Fsn1bm5YWVru5vZvvnO414P4TS0,38840
 numba_cuda/numba/cuda/cudamath.py,sha256=EFNtdzEytAZuwijdRoFGzVKCeal76UzzaNy7wUFQx8I,3978
-numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZpwJocM,7823
+numba_cuda/numba/cuda/debuginfo.py,sha256=lMIs7UAOfkqUvD9sx-nNEY8qP9DhWF9X38xnW3yo_Qc,1433
+numba_cuda/numba/cuda/decorators.py,sha256=MqmbEXVVgIV1G_feYtccKBRTDL0VALWf0LjbrVfJo4s,8041
 numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
 numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
 numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
-numba_cuda/numba/cuda/dispatcher.py,sha256=cJH7Jm-U26PyU-M2Igevar_Q_c_k9R-A99InnRGPzX0,42444
+numba_cuda/numba/cuda/dispatcher.py,sha256=j2nAjlqNAIAoQVCQ4ZQD--hQDsnFLXedlvaXdCMNKEc,44354
 numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
 numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
 numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
@@ -39,8 +40,8 @@ numba_cuda/numba/cuda/random.py,sha256=khX8iDdde_RTUPWhAqrxZacHRQAorFr7BokPuxRWz
 numba_cuda/numba/cuda/reshape_funcs.cu,sha256=H5UAa-VAvoxW9SQwJO88ZrDXC64nWALW3Ch4cHAAqO4,4325
 numba_cuda/numba/cuda/simulator_init.py,sha256=W_bPRtmPGOQVuiprbgt7ENnnnELv_LPCeLDIsfsvFZ8,460
 numba_cuda/numba/cuda/stubs.py,sha256=W3tozv4ganMnfbdFqyPjgQXYeX8GQhwx_xXgv8jk6iM,22270
-numba_cuda/numba/cuda/target.py,sha256=hBflzmxCGlmTugWT1sYhZj9f4HkQAMK2RQ9lO85pMW4,17052
-numba_cuda/numba/cuda/testing.py,sha256=E0wP2vfno1yWsl0v1zg31kpbU8FrKxTF-5y9Iv4WjA4,6412
+numba_cuda/numba/cuda/target.py,sha256=MWpdHs2K17Lus4e318FNbR533q24MhovGS6Q1ob9x_4,11354
+numba_cuda/numba/cuda/testing.py,sha256=tG1FBm_gqW4esDxCaecMvCRKvwYEg7Yu2Q60ARNnes0,6873
 numba_cuda/numba/cuda/types.py,sha256=WVfjcly_VUpG9FfKueiEPzZm2NV8Hg0XAFg3bNzPdVc,1314
 numba_cuda/numba/cuda/ufuncs.py,sha256=txw27IxG80W1Yo7e-XwL2AMcQo0fMnxMjBIMy-n5pCo,23317
 numba_cuda/numba/cuda/utils.py,sha256=JId22EI3KkQosW6Dafdaw43qU0xXXO_4JOENLap8klU,630
@@ -55,11 +56,11 @@ numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWx
 numba_cuda/numba/cuda/cudadrv/enums.py,sha256=Wy5dzukTk4TnWCowg_PLceET_v2xEyiWLu9TyH8pXr8,23742
 numba_cuda/numba/cuda/cudadrv/error.py,sha256=zEIryW6aIy8GG4ypmTliB6RgY4Gy2n8ckz7I6W99LUM,524
 numba_cuda/numba/cuda/cudadrv/libs.py,sha256=Gk9zQ1CKcsZsWl-_9QneXeP9VH5q5R1I3Cx043UOytk,7240
-numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=Q_YTv0apBo9t8pkMlKrthPPSVeLd376ZTmVDF5NtVVo,1328
+numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=bWBvnndrzWu24SXm7cilCwNFXShJgNmbMfj1Wzemito,1456
 numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
 numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
 numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=XM9_Vllv7HzH5wZIR2lwFictyX68XDtNbyLkXlL6NTI,11003
-numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=v2hJJTAQeRmoG59-hnhgMEp5BSVA73QHtEoy636VKao,24107
+numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=cAoQmZ0bO8i3wPTQq5D0UeMtfnXdGebqYpU4W0kUIEY,24237
 numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=WdeUoWzsYNYodx8kMRLVIjnNs0QzwpCihd2Q0AaqItE,226
 numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=Tj9ACrzQqNmDSO6xfpzw12EsQknSywQ-ZGuWMbDdHnQ,4255
 numba_cuda/numba/cuda/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -145,8 +146,8 @@ numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=73FCQbNaA
 numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=y7cNQZOZJo5Sv16ql3E5QaRksw-U3RkXss9YDcNeiTk,2137
 numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=2in1Cq8y9zAFoka7H72wF1D0awEd3n7bv56sUPgoNAQ,3508
 numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=3MYNiMe75rgBF1T0vsJ7r-nkW5jPvov_tDms9KXo2UU,3449
-numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=8Tm1iD2x1BRryB1QY6qp6tdjJCE6Tx9p0LzcYwiExIU,7922
-numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=aTRyZSOJB3sAShw0YAEgHILrR-TCuowW9KYjtlRErKM,6892
+numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=jI43jMbPS9Rbr3YI2mZBrDwH9MGjmyVlczv7QxxPoAs,10993
+numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=eDVymTQXTzW0WeAgTMDKYtOi1YAM310IUxGp3Y1ICjs,13162
 numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=oX-l_L4H8rME1IolwhAyordSGJ152nnuqGAFdWjfgas,26587
 numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=0GWiwvZ1FTzSl1FfMxttkWaWrowASfXrSDT8XAR4ZHw,3560
 numba_cuda/numba/cuda/tests/cudapy/test_errors.py,sha256=jwHbNb2Ro5pbGOPFetmUhI-vG4s36OKCqMJ-lgWxHMY,2620
@@ -219,7 +220,7 @@ numba_cuda/numba/cuda/tests/data/warn.cu,sha256=6L-qsXJIxAr_n3hVMAz_EZ5j0skcJAfg
 numba_cuda/numba/cuda/tests/doc_examples/__init__.py,sha256=GdfSq6pRVSOQwmgNi7ZFQ5l0yg4-2gNar_0Rz0buUpM,157
 numba_cuda/numba/cuda/tests/doc_examples/test_cg.py,sha256=9UQAez1jp3vQ0BIfoRCnGJGP17nznNcon-XFR4grqzQ,2905
 numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py,sha256=DRzvoE2iCaISJb2lkshBkJyYBEfdpqZLRXG_N9XRaFk,2305
-numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=RgZO7xYkJIlSIuJK4k3_APEJAekjkKy5wKOMFdfRoAM,2654
+numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=PoHbrTMFk8rewm7XH_8Vv1733sI-YHOzxoBI4nFhuBA,2773
 numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py,sha256=UH15R0DbMA4iHLmoZ0GtcttGCNctOUif-u2448JMmRo,5177
 numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py,sha256=hS-X_T7x3-BcBanazmnmGxJE_o1A9b9f_VGk0YlJP4o,6135
 numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py,sha256=_0snszis_UE7LxU5lw9ReNF19Dh5iV0yRy18mUWNd1c,3491
@@ -238,14 +239,14 @@ numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA
 numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
 numba_cuda/numba/cuda/tests/nrt/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
 numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=Cx2DGhm2bJheShP2Ja1w9YLlRTeAMM7u1UYHsPnTzA8,4552
-numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=b3rtK018qslhUU5UsAAa3s-mjlnlfxAwTJmARTVD2j4,7650
-numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=Wq46oICum9IXnbQ97vV8V7g-3U01PLQEQbaGSNdRuMg,3163
+numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=wByXeagVoxsAu_pmfuYQ7vmeJt82h4VXwCBsDYQfsps,7727
+numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=SnVvTis8YyaqsElRaGQ-34dnWgGavvc2Ovm2xZ_PD3Q,3240
 numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq6pJwHEwmKVmJOJxPBtsMTbnuzqYkik,2679
 numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=V0raLZLGSiWbE_K-JluI0CnmNkXbhlMVj-TH7P1OV8E,5014
 numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
 numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
-numba_cuda-0.4.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
-numba_cuda-0.4.0.dist-info/METADATA,sha256=BWlfqEMCG0dlSXORk9sKzY7nT_YdQzk9eQ7fBX4rvlY,1496
-numba_cuda-0.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-numba_cuda-0.4.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
-numba_cuda-0.4.0.dist-info/RECORD,,
+numba_cuda-0.6.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
+numba_cuda-0.6.0.dist-info/METADATA,sha256=iNU56EXHsnAcAcwgNXglPh6H47Quz31_-6r9RevpJ_Q,1836
+numba_cuda-0.6.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
+numba_cuda-0.6.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
+numba_cuda-0.6.0.dist-info/RECORD,,

{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
+Generator: setuptools (75.8.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

numba-cuda 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

numba-cuda 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl