numba-cuda 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.6.0
@@ -1,14 +1,17 @@
1
1
  from llvmlite import ir
2
2
  from numba.core.typing.templates import ConcreteTemplate
3
+ from numba.core import ir as numba_ir
3
4
  from numba.core import (cgutils, types, typing, funcdesc, config, compiler,
4
5
  sigutils, utils)
5
6
  from numba.core.compiler import (sanitize_compile_result_entries, CompilerBase,
6
7
  DefaultPassBuilder, Flags, Option,
7
8
  CompileResult)
8
9
  from numba.core.compiler_lock import global_compiler_lock
9
- from numba.core.compiler_machinery import (LoweringPass,
10
+ from numba.core.compiler_machinery import (FunctionPass, LoweringPass,
10
11
  PassManager, register_pass)
12
+ from numba.core.interpreter import Interpreter
11
13
  from numba.core.errors import NumbaInvalidConfigWarning
14
+ from numba.core.untyped_passes import TranslateByteCode
12
15
  from numba.core.typed_passes import (IRLegalization, NativeLowering,
13
16
  AnnotateTypes)
14
17
  from warnings import warn
@@ -143,13 +146,74 @@ class CreateLibrary(LoweringPass):
143
146
  return True
144
147
 
145
148
 
149
+ class CUDABytecodeInterpreter(Interpreter):
150
+ # Based on the superclass implementation, but names the resulting variable
151
+ # "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
152
+ # https://github.com/numba/numba/pull/9888
153
+ #
154
+ # This can be removed once that PR is available in an upstream Numba
155
+ # release.
156
+ def _op_JUMP_IF(self, inst, pred, iftrue):
157
+ brs = {
158
+ True: inst.get_jump_target(),
159
+ False: inst.next,
160
+ }
161
+ truebr = brs[iftrue]
162
+ falsebr = brs[not iftrue]
163
+
164
+ name = "$bool%s" % (inst.offset)
165
+ gv_fn = numba_ir.Global("bool", bool, loc=self.loc)
166
+ self.store(value=gv_fn, name=name)
167
+
168
+ callres = numba_ir.Expr.call(self.get(name), (self.get(pred),), (),
169
+ loc=self.loc)
170
+
171
+ pname = "$%spred" % (inst.offset)
172
+ predicate = self.store(value=callres, name=pname)
173
+ bra = numba_ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr,
174
+ loc=self.loc)
175
+ self.current_block.append(bra)
176
+
177
+
178
+ @register_pass(mutates_CFG=True, analysis_only=False)
179
+ class CUDATranslateBytecode(FunctionPass):
180
+ _name = "cuda_translate_bytecode"
181
+
182
+ def __init__(self):
183
+ FunctionPass.__init__(self)
184
+
185
+ def run_pass(self, state):
186
+ func_id = state['func_id']
187
+ bc = state['bc']
188
+ interp = CUDABytecodeInterpreter(func_id)
189
+ func_ir = interp.interpret(bc)
190
+ state['func_ir'] = func_ir
191
+ return True
192
+
193
+
146
194
  class CUDACompiler(CompilerBase):
147
195
  def define_pipelines(self):
148
196
  dpb = DefaultPassBuilder
149
197
  pm = PassManager('cuda')
150
198
 
151
199
  untyped_passes = dpb.define_untyped_pipeline(self.state)
152
- pm.passes.extend(untyped_passes.passes)
200
+
201
+ # Rather than replicating the whole untyped passes definition in
202
+ # numba-cuda, it seems cleaner to take the pass list and replace the
203
+ # TranslateBytecode pass with our own.
204
+
205
+ def replace_translate_pass(implementation, description):
206
+ if implementation is TranslateByteCode:
207
+ return (CUDATranslateBytecode, description)
208
+ else:
209
+ return (implementation, description)
210
+
211
+ cuda_untyped_passes = [
212
+ replace_translate_pass(implementation, description)
213
+ for implementation, description in untyped_passes.passes
214
+ ]
215
+
216
+ pm.passes.extend(cuda_untyped_passes)
153
217
 
154
218
  typed_passes = dpb.define_typed_pipeline(self.state)
155
219
  pm.passes.extend(typed_passes.passes)
@@ -352,6 +416,18 @@ def kernel_fixup(kernel, debug):
352
416
  kernel.return_value = ir.ReturnValue(kernel, ir.VoidType())
353
417
  kernel.args = kernel.args[1:]
354
418
 
419
+ # If debug metadata is present, remove the return value from it
420
+
421
+ if kernel_metadata := getattr(kernel, 'metadata', None):
422
+ if dbg_metadata := kernel_metadata.get('dbg', None):
423
+ for name, value in dbg_metadata.operands:
424
+ if name == "type":
425
+ type_metadata = value
426
+ for tm_name, tm_value in type_metadata.operands:
427
+ if tm_name == 'types':
428
+ types = tm_value
429
+ types.operands = types.operands[1:]
430
+
355
431
  # Mark as a kernel for NVVM
356
432
 
357
433
  nvvm.set_cuda_kernel(kernel)
@@ -570,16 +646,16 @@ def compile_ptx_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
570
646
  abi=abi, abi_info=abi_info)
571
647
 
572
648
 
573
- def declare_device_function(name, restype, argtypes):
574
- return declare_device_function_template(name, restype, argtypes).key
649
+ def declare_device_function(name, restype, argtypes, link):
650
+ return declare_device_function_template(name, restype, argtypes, link).key
575
651
 
576
652
 
577
- def declare_device_function_template(name, restype, argtypes):
653
+ def declare_device_function_template(name, restype, argtypes, link):
578
654
  from .descriptor import cuda_target
579
655
  typingctx = cuda_target.typing_context
580
656
  targetctx = cuda_target.target_context
581
657
  sig = typing.signature(restype, *argtypes)
582
- extfn = ExternFunction(name, sig)
658
+ extfn = ExternFunction(name, sig, link)
583
659
 
584
660
  class device_function_template(ConcreteTemplate):
585
661
  key = extfn
@@ -593,7 +669,8 @@ def declare_device_function_template(name, restype, argtypes):
593
669
  return device_function_template
594
670
 
595
671
 
596
- class ExternFunction(object):
597
- def __init__(self, name, sig):
672
+ class ExternFunction:
673
+ def __init__(self, name, sig, link):
598
674
  self.name = name
599
675
  self.sig = sig
676
+ self.link = link
@@ -403,16 +403,20 @@ _genfp16_binary_operator(operator.itruediv)
403
403
 
404
404
 
405
405
  def _resolve_wrapped_unary(fname):
406
+ link = tuple()
406
407
  decl = declare_device_function_template(f'__numba_wrapper_{fname}',
407
408
  types.float16,
408
- (types.float16,))
409
+ (types.float16,),
410
+ link)
409
411
  return types.Function(decl)
410
412
 
411
413
 
412
414
  def _resolve_wrapped_binary(fname):
415
+ link = tuple()
413
416
  decl = declare_device_function_template(f'__numba_wrapper_{fname}',
414
417
  types.float16,
415
- (types.float16, types.float16,))
418
+ (types.float16, types.float16,),
419
+ link)
416
420
  return types.Function(decl)
417
421
 
418
422
 
@@ -2,8 +2,12 @@ from .mappings import FILE_EXTENSION_MAP
2
2
 
3
3
 
4
4
  class LinkableCode:
5
- """An object that can be passed in the `link` list argument to `@cuda.jit`
6
- kernels to supply code to be linked from memory."""
5
+ """An object that holds code to be linked from memory.
6
+
7
+ :param data: A buffer containing the data to link.
8
+ :param name: The name of the file to be referenced in any compilation or
9
+ linking errors that may be produced.
10
+ """
7
11
 
8
12
  def __init__(self, data, name=None):
9
13
  self.data = data
@@ -15,49 +19,49 @@ class LinkableCode:
15
19
 
16
20
 
17
21
  class PTXSource(LinkableCode):
18
- """PTX Source code in memory"""
22
+ """PTX source code in memory."""
19
23
 
20
24
  kind = FILE_EXTENSION_MAP["ptx"]
21
25
  default_name = "<unnamed-ptx>"
22
26
 
23
27
 
24
28
  class CUSource(LinkableCode):
25
- """CUDA C/C++ Source code in memory"""
29
+ """CUDA C/C++ source code in memory."""
26
30
 
27
31
  kind = "cu"
28
32
  default_name = "<unnamed-cu>"
29
33
 
30
34
 
31
35
  class Fatbin(LinkableCode):
32
- """A fatbin ELF in memory"""
36
+ """An ELF Fatbin in memory."""
33
37
 
34
38
  kind = FILE_EXTENSION_MAP["fatbin"]
35
39
  default_name = "<unnamed-fatbin>"
36
40
 
37
41
 
38
42
  class Cubin(LinkableCode):
39
- """A cubin ELF in memory"""
43
+ """An ELF Cubin in memory."""
40
44
 
41
45
  kind = FILE_EXTENSION_MAP["cubin"]
42
46
  default_name = "<unnamed-cubin>"
43
47
 
44
48
 
45
49
  class Archive(LinkableCode):
46
- """An archive of objects in memory"""
50
+ """An archive of objects in memory."""
47
51
 
48
52
  kind = FILE_EXTENSION_MAP["a"]
49
53
  default_name = "<unnamed-archive>"
50
54
 
51
55
 
52
56
  class Object(LinkableCode):
53
- """An object file in memory"""
57
+ """An object file in memory."""
54
58
 
55
59
  kind = FILE_EXTENSION_MAP["o"]
56
60
  default_name = "<unnamed-object>"
57
61
 
58
62
 
59
63
  class LTOIR(LinkableCode):
60
- """An LTOIR file in memory"""
64
+ """An LTOIR file in memory."""
61
65
 
62
66
  kind = "ltoir"
63
67
  default_name = "<unnamed-ltoir>"
@@ -314,7 +314,9 @@ COMPUTE_CAPABILITIES = (
314
314
  (6, 0), (6, 1), (6, 2),
315
315
  (7, 0), (7, 2), (7, 5),
316
316
  (8, 0), (8, 6), (8, 7), (8, 9),
317
- (9, 0)
317
+ (9, 0),
318
+ (10, 0), (10, 1),
319
+ (12, 0),
318
320
  )
319
321
 
320
322
  # Maps CTK version -> (min supported cc, max supported cc) inclusive
@@ -331,6 +333,9 @@ CTK_SUPPORTED = {
331
333
  (12, 2): ((5, 0), (9, 0)),
332
334
  (12, 3): ((5, 0), (9, 0)),
333
335
  (12, 4): ((5, 0), (9, 0)),
336
+ (12, 5): ((5, 0), (9, 0)),
337
+ (12, 6): ((5, 0), (9, 0)),
338
+ (12, 8): ((5, 0), (12, 0)),
334
339
  }
335
340
 
336
341
 
@@ -0,0 +1,44 @@
1
+ from llvmlite import ir
2
+ from numba.core import types
3
+ from numba.core.debuginfo import DIBuilder
4
+ from numba.cuda.types import GridGroup
5
+
6
+ _BYTE_SIZE = 8
7
+
8
+
9
+ class CUDADIBuilder(DIBuilder):
10
+
11
+ def _var_type(self, lltype, size, datamodel=None):
12
+ is_bool = False
13
+ is_grid_group = False
14
+
15
+ if isinstance(lltype, ir.IntType):
16
+ if datamodel is None:
17
+ if size == 1:
18
+ name = str(lltype)
19
+ is_bool = True
20
+ else:
21
+ name = str(datamodel.fe_type)
22
+ if isinstance(datamodel.fe_type, types.Boolean):
23
+ is_bool = True
24
+ elif isinstance(datamodel.fe_type, GridGroup):
25
+ is_grid_group = True
26
+
27
+ if is_bool or is_grid_group:
28
+ m = self.module
29
+ bitsize = _BYTE_SIZE * size
30
+ # Boolean type workaround until upstream Numba is fixed
31
+ if is_bool:
32
+ ditok = "DW_ATE_boolean"
33
+ # GridGroup type should use numba.cuda implementation
34
+ elif is_grid_group:
35
+ ditok = "DW_ATE_unsigned"
36
+
37
+ return m.add_debug_info('DIBasicType', {
38
+ 'name': name,
39
+ 'size': bitsize,
40
+ 'encoding': ir.DIToken(ditok),
41
+ })
42
+
43
+ # For other cases, use upstream Numba implementation
44
+ return super()._var_type(lltype, size, datamodel=datamodel)
@@ -173,7 +173,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
173
173
  return disp
174
174
 
175
175
 
176
- def declare_device(name, sig):
176
+ def declare_device(name, sig, link=None):
177
177
  """
178
178
  Declare the signature of a foreign function. Returns a descriptor that can
179
179
  be used to call the function from a Python kernel.
@@ -181,10 +181,17 @@ def declare_device(name, sig):
181
181
  :param name: The name of the foreign function.
182
182
  :type name: str
183
183
  :param sig: The Numba signature of the function.
184
+ :param link: External code to link when calling the function.
184
185
  """
186
+ if link is None:
187
+ link = tuple()
188
+ else:
189
+ if not isinstance(link, (list, tuple, set)):
190
+ link = (link,)
191
+
185
192
  argtypes, restype = sigutils.normalize_signature(sig)
186
193
  if restype is None:
187
194
  msg = 'Return type must be provided for device declarations'
188
195
  raise TypeError(msg)
189
196
 
190
- return declare_device_function(name, restype, argtypes)
197
+ return declare_device_function(name, restype, argtypes, link)
@@ -4,17 +4,19 @@ import re
4
4
  import sys
5
5
  import ctypes
6
6
  import functools
7
+ from collections import defaultdict
7
8
 
8
- from numba.core import config, serialize, sigutils, types, typing, utils
9
+ from numba.core import config, ir, serialize, sigutils, types, typing, utils
9
10
  from numba.core.caching import Cache, CacheImpl
10
11
  from numba.core.compiler_lock import global_compiler_lock
11
12
  from numba.core.dispatcher import Dispatcher
12
13
  from numba.core.errors import NumbaPerformanceWarning
13
14
  from numba.core.typing.typeof import Purpose, typeof
14
-
15
+ from numba.core.types.functions import Function
15
16
  from numba.cuda.api import get_current_device
16
17
  from numba.cuda.args import wrap_arg
17
- from numba.cuda.compiler import compile_cuda, CUDACompiler, kernel_fixup
18
+ from numba.cuda.compiler import (compile_cuda, CUDACompiler, kernel_fixup,
19
+ ExternFunction)
18
20
  from numba.cuda.cudadrv import driver
19
21
  from numba.cuda.cudadrv.devices import get_context
20
22
  from numba.cuda.descriptor import cuda_target
@@ -41,6 +43,55 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
41
43
  reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
42
44
 
43
45
 
46
+ def get_cres_link_objects(cres):
47
+ """Given a compile result, return a set of all linkable code objects that
48
+ are required for it to be fully linked."""
49
+
50
+ link_objects = set()
51
+
52
+ # List of calls into declared device functions
53
+ device_func_calls = [
54
+ (name, v) for name, v in cres.fndesc.typemap.items() if (
55
+ isinstance(v, cuda_types.CUDADispatcher)
56
+ )
57
+ ]
58
+
59
+ # List of tuples with SSA name of calls and corresponding signature
60
+ call_signatures = [
61
+ (call.func.name, sig)
62
+ for call, sig in cres.fndesc.calltypes.items() if (
63
+ isinstance(call, ir.Expr) and call.op == 'call'
64
+ )
65
+ ]
66
+
67
+ # Map SSA names to all invoked signatures
68
+ call_signature_d = defaultdict(list)
69
+ for name, sig in call_signatures:
70
+ call_signature_d[name].append(sig)
71
+
72
+ # Add the link objects from the current function's callees
73
+ for name, v in device_func_calls:
74
+ for sig in call_signature_d.get(name, []):
75
+ called_cres = v.dispatcher.overloads[sig.args]
76
+ called_link_objects = get_cres_link_objects(called_cres)
77
+ link_objects.update(called_link_objects)
78
+
79
+ # From this point onwards, we are only interested in ExternFunction
80
+ # declarations - these are the calls made directly in this function to
81
+ # them.
82
+ for name, v in cres.fndesc.typemap.items():
83
+ if not isinstance(v, Function):
84
+ continue
85
+
86
+ if not isinstance(v.typing_key, ExternFunction):
87
+ continue
88
+
89
+ for obj in v.typing_key.link:
90
+ link_objects.add(obj)
91
+
92
+ return link_objects
93
+
94
+
44
95
  class _Kernel(serialize.ReduceMixin):
45
96
  '''
46
97
  CUDA Kernel specialized for a given set of argument types. When called, this
@@ -158,6 +209,9 @@ class _Kernel(serialize.ReduceMixin):
158
209
 
159
210
  self.maybe_link_nrt(link, tgt_ctx, asm)
160
211
 
212
+ for obj in get_cres_link_objects(cres):
213
+ lib.add_linking_file(obj)
214
+
161
215
  for filepath in link:
162
216
  lib.add_linking_file(filepath)
163
217
 
@@ -256,7 +310,11 @@ class _Kernel(serialize.ReduceMixin):
256
310
  """
257
311
  cufunc = self._codelibrary.get_cufunc()
258
312
 
259
- if hasattr(self, "target_context") and self.target_context.enable_nrt:
313
+ if (
314
+ hasattr(self, "target_context")
315
+ and self.target_context.enable_nrt
316
+ and config.CUDA_NRT_STATS
317
+ ):
260
318
  rtsys.ensure_initialized()
261
319
  rtsys.set_memsys_to_module(cufunc.module)
262
320
  # We don't know which stream the kernel will be launched on, so
@@ -3,8 +3,7 @@ from functools import cached_property
3
3
  import llvmlite.binding as ll
4
4
  from llvmlite import ir
5
5
 
6
- from numba.core import (cgutils, config, debuginfo, itanium_mangler, types,
7
- typing, utils)
6
+ from numba.core import cgutils, config, itanium_mangler, types, typing
8
7
  from numba.core.dispatcher import Dispatcher
9
8
  from numba.core.base import BaseContext
10
9
  from numba.core.callconv import BaseCallConv, MinimalCallConv
@@ -12,7 +11,8 @@ from numba.core.typing import cmathdecl
12
11
  from numba.core import datamodel
13
12
 
14
13
  from .cudadrv import nvvm
15
- from numba.cuda import codegen, nvvmutils, ufuncs
14
+ from numba.cuda import codegen, ufuncs
15
+ from numba.cuda.debuginfo import CUDADIBuilder
16
16
  from numba.cuda.models import cuda_data_manager
17
17
 
18
18
  # -----------------------------------------------------------------------------
@@ -80,7 +80,7 @@ class CUDATargetContext(BaseContext):
80
80
 
81
81
  @property
82
82
  def DIBuilder(self):
83
- return debuginfo.DIBuilder
83
+ return CUDADIBuilder
84
84
 
85
85
  @property
86
86
  def enable_boundscheck(self):
@@ -150,136 +150,6 @@ class CUDATargetContext(BaseContext):
150
150
  return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
151
151
  uid=uid)
152
152
 
153
- def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
154
- nvvm_options, filename, linenum,
155
- max_registers=None, lto=False):
156
- """
157
- Adapt a code library ``codelib`` with the numba compiled CUDA kernel
158
- with name ``fname`` and arguments ``argtypes`` for NVVM.
159
- A new library is created with a wrapper function that can be used as
160
- the kernel entry point for the given kernel.
161
-
162
- Returns the new code library and the wrapper function.
163
-
164
- Parameters:
165
-
166
- codelib: The CodeLibrary containing the device function to wrap
167
- in a kernel call.
168
- fndesc: The FunctionDescriptor of the source function.
169
- debug: Whether to compile with debug.
170
- lineinfo: Whether to emit line info.
171
- nvvm_options: Dict of NVVM options used when compiling the new library.
172
- filename: The source filename that the function is contained in.
173
- linenum: The source line that the function is on.
174
- max_registers: The max_registers argument for the code library.
175
- """
176
- kernel_name = itanium_mangler.prepend_namespace(
177
- fndesc.llvm_func_name, ns='cudapy',
178
- )
179
- library = self.codegen().create_library(f'{codelib.name}_kernel_',
180
- entry_name=kernel_name,
181
- nvvm_options=nvvm_options,
182
- max_registers=max_registers,
183
- lto=lto
184
- )
185
- library.add_linking_library(codelib)
186
- wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
187
- debug, lineinfo, filename,
188
- linenum)
189
- return library, wrapper
190
-
191
- def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
192
- lineinfo, filename, linenum):
193
- """
194
- Generate the kernel wrapper in the given ``library``.
195
- The function being wrapped is described by ``fndesc``.
196
- The wrapper function is returned.
197
- """
198
-
199
- argtypes = fndesc.argtypes
200
- arginfo = self.get_arg_packer(argtypes)
201
- argtys = list(arginfo.argument_types)
202
- wrapfnty = ir.FunctionType(ir.VoidType(), argtys)
203
- wrapper_module = self.create_module("cuda.kernel.wrapper")
204
- fnty = ir.FunctionType(ir.IntType(32),
205
- [self.call_conv.get_return_type(types.pyobject)]
206
- + argtys)
207
- func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)
208
-
209
- prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
210
- wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
211
- builder = ir.IRBuilder(wrapfn.append_basic_block(''))
212
-
213
- if debug or lineinfo:
214
- directives_only = lineinfo and not debug
215
- debuginfo = self.DIBuilder(module=wrapper_module,
216
- filepath=filename,
217
- cgctx=self,
218
- directives_only=directives_only)
219
- debuginfo.mark_subprogram(
220
- wrapfn, kernel_name, fndesc.args, argtypes, linenum,
221
- )
222
- debuginfo.mark_location(builder, linenum)
223
-
224
- # Define error handling variable
225
- def define_error_gv(postfix):
226
- name = wrapfn.name + postfix
227
- gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32),
228
- name)
229
- gv.initializer = ir.Constant(gv.type.pointee, None)
230
- return gv
231
-
232
- gv_exc = define_error_gv("__errcode__")
233
- gv_tid = []
234
- gv_ctaid = []
235
- for i in 'xyz':
236
- gv_tid.append(define_error_gv("__tid%s__" % i))
237
- gv_ctaid.append(define_error_gv("__ctaid%s__" % i))
238
-
239
- callargs = arginfo.from_arguments(builder, wrapfn.args)
240
- status, _ = self.call_conv.call_function(
241
- builder, func, types.void, argtypes, callargs)
242
-
243
- if debug:
244
- # Check error status
245
- with cgutils.if_likely(builder, status.is_ok):
246
- builder.ret_void()
247
-
248
- with builder.if_then(builder.not_(status.is_python_exc)):
249
- # User exception raised
250
- old = ir.Constant(gv_exc.type.pointee, None)
251
-
252
- # Use atomic cmpxchg to prevent rewriting the error status
253
- # Only the first error is recorded
254
-
255
- xchg = builder.cmpxchg(gv_exc, old, status.code,
256
- 'monotonic', 'monotonic')
257
- changed = builder.extract_value(xchg, 1)
258
-
259
- # If the xchange is successful, save the thread ID.
260
- sreg = nvvmutils.SRegBuilder(builder)
261
- with builder.if_then(changed):
262
- for dim, ptr, in zip("xyz", gv_tid):
263
- val = sreg.tid(dim)
264
- builder.store(val, ptr)
265
-
266
- for dim, ptr, in zip("xyz", gv_ctaid):
267
- val = sreg.ctaid(dim)
268
- builder.store(val, ptr)
269
-
270
- builder.ret_void()
271
-
272
- nvvm.set_cuda_kernel(wrapfn)
273
- library.add_ir_module(wrapper_module)
274
- if debug or lineinfo:
275
- debuginfo.finalize()
276
- library.finalize()
277
-
278
- if config.DUMP_LLVM:
279
- utils.dump_llvm(fndesc, wrapper_module)
280
-
281
- return library.get_function(wrapfn.name)
282
-
283
153
  def make_constant_array(self, builder, aryty, arr):
284
154
  """
285
155
  Unlike the parent version. This returns a a pointer in the constant
@@ -115,12 +115,22 @@ def skip_on_arm(reason):
115
115
  def skip_if_cuda_includes_missing(fn):
116
116
  # Skip when cuda.h is not available - generally this should indicate
117
117
  # whether the CUDA includes are available or not
118
- cuda_h = os.path.join(config.CUDA_INCLUDE_PATH, 'cuda.h')
118
+ cuda_include_path = libs.get_cuda_include_dir()
119
+ cuda_h = os.path.join(cuda_include_path, 'cuda.h')
119
120
  cuda_h_file = (os.path.exists(cuda_h) and os.path.isfile(cuda_h))
120
121
  reason = 'CUDA include dir not available on this system'
121
122
  return unittest.skipUnless(cuda_h_file, reason)(fn)
122
123
 
123
124
 
125
+ def skip_if_curand_kernel_missing(fn):
126
+ cuda_include_path = libs.get_cuda_include_dir()
127
+ curand_kernel_h = os.path.join(cuda_include_path, 'curand_kernel.h')
128
+ curand_kernel_h_file = (os.path.exists(curand_kernel_h) and
129
+ os.path.isfile(curand_kernel_h))
130
+ reason = 'curand_kernel.h not available on this system'
131
+ return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
132
+
133
+
124
134
  def skip_if_mvc_enabled(reason):
125
135
  """Skip a test if Minor Version Compatibility is enabled"""
126
136
  return unittest.skipIf(config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY,
@@ -72,6 +72,57 @@ class TestCudaDebugInfo(CUDATestCase):
72
72
  def f(x):
73
73
  x[0] = 0
74
74
 
75
+ def test_issue_9888(self):
76
+ # Compiler created symbol should not be emitted in DILocalVariable
77
+ # See Numba Issue #9888 https://github.com/numba/numba/pull/9888
78
+ sig = (types.boolean,)
79
+
80
+ @cuda.jit(sig, debug=True, opt=False)
81
+ def f(cond):
82
+ if cond:
83
+ x = 1 # noqa: F841
84
+ else:
85
+ x = 0 # noqa: F841
86
+
87
+ llvm_ir = f.inspect_llvm(sig)
88
+ # A varible name starting with "bool" in the debug metadata
89
+ pat = r'!DILocalVariable\(.*name:\s+\"bool'
90
+ match = re.compile(pat).search(llvm_ir)
91
+ self.assertIsNone(match, msg=llvm_ir)
92
+
93
+ def test_bool_type(self):
94
+ sig = (types.int32, types.int32)
95
+
96
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
97
+ def f(x, y):
98
+ z = x == y # noqa: F841
99
+
100
+ llvm_ir = f.inspect_llvm(sig)
101
+
102
+ # extract the metadata node id from `type` field of DILocalVariable
103
+ pat = r'!DILocalVariable\(.*name:\s+"z".*type:\s+!(\d+)'
104
+ match = re.compile(pat).search(llvm_ir)
105
+ self.assertIsNotNone(match, msg=llvm_ir)
106
+ mdnode_id = match.group(1)
107
+
108
+ # verify the DIBasicType has correct encoding attribute DW_ATE_boolean
109
+ pat = rf'!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean'
110
+ match = re.compile(pat).search(llvm_ir)
111
+ self.assertIsNotNone(match, msg=llvm_ir)
112
+
113
+ def test_grid_group_type(self):
114
+ sig = (types.int32,)
115
+
116
+ @cuda.jit(sig, debug=True, opt=False)
117
+ def f(x):
118
+ grid = cuda.cg.this_grid() # noqa: F841
119
+
120
+ llvm_ir = f.inspect_llvm(sig)
121
+
122
+ pat = r'!DIBasicType\(.*DW_ATE_unsigned, name: "GridGroup", size: 64'
123
+ match = re.compile(pat).search(llvm_ir)
124
+ self.assertIsNotNone(match, msg=llvm_ir)
125
+
75
126
  @unittest.skip("Wrappers no longer exist")
76
127
  def test_wrapper_has_debuginfo(self):
77
128
  sig = (types.int32[::1],)
@@ -217,6 +268,36 @@ class TestCudaDebugInfo(CUDATestCase):
217
268
  three_device_fns(kernel_debug=False, leaf_debug=True)
218
269
  three_device_fns(kernel_debug=False, leaf_debug=False)
219
270
 
271
+ def test_kernel_args_types(self):
272
+ sig = (types.int32, types.int32)
273
+
274
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
275
+ def f(x, y):
276
+ z = x + y # noqa: F841
277
+
278
+ llvm_ir = f.inspect_llvm(sig)
279
+
280
+ # extract the metadata node id from `types` field of DISubroutineType
281
+ pat = r'!DISubroutineType\(types:\s+!(\d+)\)'
282
+ match = re.compile(pat).search(llvm_ir)
283
+ self.assertIsNotNone(match, msg=llvm_ir)
284
+ mdnode_id = match.group(1)
285
+
286
+ # extract the metadata node ids from the flexible node of types
287
+ pat = rf'!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}'
288
+ match = re.compile(pat).search(llvm_ir)
289
+ self.assertIsNotNone(match, msg=llvm_ir)
290
+ mdnode_id1 = match.group(1)
291
+ mdnode_id2 = match.group(2)
292
+
293
+ # verify each of the two metadata nodes match expected type
294
+ pat = rf'!{mdnode_id1}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
295
+ match = re.compile(pat).search(llvm_ir)
296
+ self.assertIsNotNone(match, msg=llvm_ir)
297
+ pat = rf'!{mdnode_id2}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
298
+ match = re.compile(pat).search(llvm_ir)
299
+ self.assertIsNotNone(match, msg=llvm_ir)
300
+
220
301
 
221
302
  if __name__ == '__main__':
222
303
  unittest.main()
@@ -1,11 +1,14 @@
1
1
  import re
2
- import types
2
+ import cffi
3
3
 
4
4
  import numpy as np
5
5
 
6
- from numba.cuda.testing import unittest, skip_on_cudasim, CUDATestCase
7
- from numba import cuda, jit, float32, int32
6
+ from numba.cuda.testing import (skip_if_curand_kernel_missing, skip_on_cudasim,
7
+ test_data_dir, unittest, CUDATestCase)
8
+ from numba import cuda, jit, float32, int32, types
8
9
  from numba.core.errors import TypingError
10
+ from numba.tests.support import skip_unless_cffi
11
+ from types import ModuleType
9
12
 
10
13
 
11
14
  class TestDeviceFunc(CUDATestCase):
@@ -92,7 +95,7 @@ class TestDeviceFunc(CUDATestCase):
92
95
  def add(a, b):
93
96
  return a + b
94
97
 
95
- mymod = types.ModuleType(name='mymod')
98
+ mymod = ModuleType(name='mymod')
96
99
  mymod.add = add
97
100
  del add
98
101
 
@@ -192,31 +195,287 @@ class TestDeviceFunc(CUDATestCase):
192
195
 
193
196
  self.assertEqual(0x04010203, x[0])
194
197
 
195
- def _test_declare_device(self, decl):
198
+
199
+ times2_cu = cuda.CUSource("""
200
+ extern "C" __device__
201
+ int times2(int *out, int a)
202
+ {
203
+ *out = a * 2;
204
+ return 0;
205
+ }
206
+ """)
207
+
208
+ times3_cu = cuda.CUSource("""
209
+ extern "C" __device__
210
+ int times3(int *out, int a)
211
+ {
212
+ *out = a * 3;
213
+ return 0;
214
+ }
215
+ """)
216
+
217
+ times4_cu = cuda.CUSource("""
218
+ extern "C" __device__
219
+ int times2(int *out, int a);
220
+
221
+ extern "C" __device__
222
+ int times4(int *out, int a)
223
+ {
224
+ int tmp;
225
+ times2(&tmp, a);
226
+ *out = tmp * 2;
227
+ return 0;
228
+ }
229
+ """)
230
+
231
+ jitlink_user_cu = cuda.CUSource("""
232
+ extern "C" __device__
233
+ int array_mutator(void *out, int *a);
234
+
235
+ extern "C" __device__
236
+ int use_array_mutator(void *out, int *a) {
237
+ array_mutator(out, a);
238
+ return 0;
239
+ }
240
+ """)
241
+
242
+ rng_cu = cuda.CUSource("""
243
+ #include <curand_kernel.h>
244
+
245
+ extern "C" __device__
246
+ int random_number(unsigned int *out, unsigned long long seed)
247
+ {
248
+ // Initialize state
249
+ curandStateXORWOW_t state;
250
+ unsigned long long sequence = 1;
251
+ unsigned long long offset = 0;
252
+ curand_init(seed, sequence, offset, &state);
253
+
254
+ // Generate one random number
255
+ *out = curand(&state);
256
+
257
+ // Report no exception
258
+ return 0;
259
+ }""")
260
+
261
+
262
+ @skip_on_cudasim('External functions unsupported in the simulator')
263
+ class TestDeclareDevice(CUDATestCase):
264
+
265
+ def check_api(self, decl):
196
266
  self.assertEqual(decl.name, 'f1')
197
267
  self.assertEqual(decl.sig.args, (float32[:],))
198
268
  self.assertEqual(decl.sig.return_type, int32)
199
269
 
200
- @skip_on_cudasim('cudasim does not check signatures')
201
270
  def test_declare_device_signature(self):
202
271
  f1 = cuda.declare_device('f1', int32(float32[:]))
203
- self._test_declare_device(f1)
272
+ self.check_api(f1)
204
273
 
205
- @skip_on_cudasim('cudasim does not check signatures')
206
274
  def test_declare_device_string(self):
207
275
  f1 = cuda.declare_device('f1', 'int32(float32[:])')
208
- self._test_declare_device(f1)
276
+ self.check_api(f1)
209
277
 
210
- @skip_on_cudasim('cudasim does not check signatures')
211
278
  def test_bad_declare_device_tuple(self):
212
279
  with self.assertRaisesRegex(TypeError, 'Return type'):
213
280
  cuda.declare_device('f1', (float32[:],))
214
281
 
215
- @skip_on_cudasim('cudasim does not check signatures')
216
282
  def test_bad_declare_device_string(self):
217
283
  with self.assertRaisesRegex(TypeError, 'Return type'):
218
284
  cuda.declare_device('f1', '(float32[:],)')
219
285
 
286
+ def test_link_cu_source(self):
287
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
288
+
289
+ @cuda.jit
290
+ def kernel(r, x):
291
+ i = cuda.grid(1)
292
+ if i < len(r):
293
+ r[i] = times2(x[i])
294
+
295
+ x = np.arange(10, dtype=np.int32)
296
+ r = np.empty_like(x)
297
+
298
+ kernel[1, 32](r, x)
299
+
300
+ np.testing.assert_equal(r, x * 2)
301
+
302
+ def _test_link_multiple_sources(self, link_type):
303
+ link = link_type([times2_cu, times4_cu])
304
+ times4 = cuda.declare_device('times4', 'int32(int32)', link=link)
305
+
306
+ @cuda.jit
307
+ def kernel(r, x):
308
+ i = cuda.grid(1)
309
+ if i < len(r):
310
+ r[i] = times4(x[i])
311
+
312
+ x = np.arange(10, dtype=np.int32)
313
+ r = np.empty_like(x)
314
+
315
+ kernel[1, 32](r, x)
316
+
317
+ np.testing.assert_equal(r, x * 4)
318
+
319
+ def test_link_multiple_sources_set(self):
320
+ self._test_link_multiple_sources(set)
321
+
322
+ def test_link_multiple_sources_tuple(self):
323
+ self._test_link_multiple_sources(tuple)
324
+
325
+ def test_link_multiple_sources_list(self):
326
+ self._test_link_multiple_sources(list)
327
+
328
+ @skip_unless_cffi
329
+ def test_link_sources_in_memory_and_on_disk(self):
330
+ jitlink_cu = str(test_data_dir / "jitlink.cu")
331
+ link = [jitlink_cu, jitlink_user_cu]
332
+ sig = types.void(types.CPointer(types.int32))
333
+ ext_fn = cuda.declare_device("use_array_mutator", sig, link=link)
334
+
335
+ ffi = cffi.FFI()
336
+
337
+ @cuda.jit
338
+ def kernel(x):
339
+ ptr = ffi.from_buffer(x)
340
+ ext_fn(ptr)
341
+
342
+ x = np.arange(2, dtype=np.int32)
343
+ kernel[1, 1](x)
344
+
345
+ expected = np.ones(2, dtype=np.int32)
346
+ np.testing.assert_equal(x, expected)
347
+
348
+ @skip_if_curand_kernel_missing
349
+ def test_include_cuda_header(self):
350
+ sig = types.int32(types.uint64)
351
+ link = [rng_cu]
352
+ random_number = cuda.declare_device("random_number", sig, link=link)
353
+
354
+ @cuda.jit
355
+ def kernel(x, seed):
356
+ x[0] = random_number(seed)
357
+
358
+ x = np.zeros(1, dtype=np.uint32)
359
+ kernel[1, 1](x, 1)
360
+ np.testing.assert_equal(x[0], 323845807)
361
+
362
+ def test_declared_in_called_function(self):
363
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
364
+
365
+ @cuda.jit
366
+ def device_func(x):
367
+ return times2(x)
368
+
369
+ @cuda.jit
370
+ def kernel(r, x):
371
+ i = cuda.grid(1)
372
+ if i < len(r):
373
+ r[i] = device_func(x[i])
374
+
375
+ x = np.arange(10, dtype=np.int32)
376
+ r = np.empty_like(x)
377
+
378
+ kernel[1, 32](r, x)
379
+
380
+ np.testing.assert_equal(r, x * 2)
381
+
382
+ def test_declared_in_called_function_twice(self):
383
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
384
+
385
+ @cuda.jit
386
+ def device_func_1(x):
387
+ return times2(x)
388
+
389
+ @cuda.jit
390
+ def device_func_2(x):
391
+ return device_func_1(x)
392
+
393
+ @cuda.jit
394
+ def kernel(r, x):
395
+ i = cuda.grid(1)
396
+ if i < len(r):
397
+ r[i] = device_func_2(x[i])
398
+
399
+ x = np.arange(10, dtype=np.int32)
400
+ r = np.empty_like(x)
401
+
402
+ kernel[1, 32](r, x)
403
+
404
+ np.testing.assert_equal(r, x * 2)
405
+
406
+ def test_declared_in_called_function_two_calls(self):
407
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
408
+
409
+ @cuda.jit
410
+ def device_func(x):
411
+ return times2(x)
412
+
413
+ @cuda.jit
414
+ def kernel(r, x):
415
+ i = cuda.grid(1)
416
+ if i < len(r):
417
+ r[i] = device_func(x[i]) + device_func(x[i] + i)
418
+
419
+ x = np.arange(10, dtype=np.int32)
420
+ r = np.empty_like(x)
421
+
422
+ kernel[1, 32](r, x)
423
+
424
+ np.testing.assert_equal(r, x * 6)
425
+
426
+ def test_call_declared_function_twice(self):
427
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
428
+
429
+ @cuda.jit
430
+ def kernel(r, x):
431
+ i = cuda.grid(1)
432
+ if i < len(r):
433
+ r[i] = times2(x[i]) + times2(x[i] + i)
434
+
435
+ x = np.arange(10, dtype=np.int32)
436
+ r = np.empty_like(x)
437
+
438
+ kernel[1, 32](r, x)
439
+
440
+ np.testing.assert_equal(r, x * 6)
441
+
442
+ def test_declared_in_called_function_and_parent(self):
443
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
444
+
445
+ @cuda.jit
446
+ def device_func(x):
447
+ return times2(x)
448
+
449
+ @cuda.jit
450
+ def kernel(r, x):
451
+ i = cuda.grid(1)
452
+ if i < len(r):
453
+ r[i] = device_func(x[i]) + times2(x[i])
454
+
455
+ x = np.arange(10, dtype=np.int32)
456
+ r = np.empty_like(x)
457
+
458
+ kernel[1, 32](r, x)
459
+
460
+ np.testing.assert_equal(r, x * 4)
461
+
462
+ def test_call_two_different_declared_functions(self):
463
+ times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
464
+ times3 = cuda.declare_device('times3', 'int32(int32)', link=times3_cu)
465
+
466
+ @cuda.jit
467
+ def kernel(r, x):
468
+ i = cuda.grid(1)
469
+ if i < len(r):
470
+ r[i] = times2(x[i]) + times3(x[i])
471
+
472
+ x = np.arange(10, dtype=np.int32)
473
+ r = np.empty_like(x)
474
+
475
+ kernel[1, 32](r, x)
476
+
477
+ np.testing.assert_equal(r, x * 5)
478
+
220
479
 
221
480
  if __name__ == '__main__':
222
481
  unittest.main()
@@ -15,16 +15,18 @@ class TestFFI(CUDATestCase):
15
15
  import numpy as np
16
16
  import os
17
17
 
18
- # Declaration of the foreign function
19
- mul = cuda.declare_device('mul_f32_f32', 'float32(float32, float32)')
20
-
21
18
  # Path to the source containing the foreign function
22
19
  # (here assumed to be in a subdirectory called "ffi")
23
20
  basedir = os.path.dirname(os.path.abspath(__file__))
24
21
  functions_cu = os.path.join(basedir, 'ffi', 'functions.cu')
25
22
 
26
- # Kernel that links in functions.cu and calls mul
27
- @cuda.jit(link=[functions_cu])
23
+ # Declaration of the foreign function
24
+ mul = cuda.declare_device('mul_f32_f32', 'float32(float32, float32)',
25
+ link=functions_cu)
26
+
27
+ # A kernel that calls mul; functions.cu is linked automatically due to
28
+ # the call to mul.
29
+ @cuda.jit
28
30
  def multiply_vectors(r, x, y):
29
31
  i = cuda.grid(1)
30
32
 
@@ -54,14 +56,15 @@ class TestFFI(CUDATestCase):
54
56
 
55
57
  # magictoken.ex_from_buffer_decl.begin
56
58
  signature = 'float32(CPointer(float32), int32)'
57
- sum_reduce = cuda.declare_device('sum_reduce', signature)
59
+ sum_reduce = cuda.declare_device('sum_reduce', signature,
60
+ link=functions_cu)
58
61
  # magictoken.ex_from_buffer_decl.end
59
62
 
60
63
  # magictoken.ex_from_buffer_kernel.begin
61
64
  import cffi
62
65
  ffi = cffi.FFI()
63
66
 
64
- @cuda.jit(link=[functions_cu])
67
+ @cuda.jit
65
68
  def reduction_caller(result, array):
66
69
  array_ptr = ffi.from_buffer(array)
67
70
  result[()] = sum_reduce(array_ptr, len(array))
@@ -171,7 +171,10 @@ class TestNrtStatistics(CUDATestCase):
171
171
  arr = cuda_arange(5 * tmp[0]) # noqa: F841
172
172
  return None
173
173
 
174
- with override_config('CUDA_ENABLE_NRT', True):
174
+ with (
175
+ override_config('CUDA_ENABLE_NRT', True),
176
+ override_config('CUDA_NRT_STATS', True)
177
+ ):
175
178
  # Switch on stats
176
179
  rtsys.memsys_enable_stats()
177
180
  # check the stats are on
@@ -18,7 +18,10 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
18
18
  super(TestNrtRefCt, self).tearDown()
19
19
 
20
20
  def run(self, result=None):
21
- with override_config("CUDA_ENABLE_NRT", True):
21
+ with (
22
+ override_config("CUDA_ENABLE_NRT", True),
23
+ override_config('CUDA_NRT_STATS', True)
24
+ ):
22
25
  super(TestNrtRefCt, self).run(result)
23
26
 
24
27
  def test_no_return(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: numba-cuda
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -27,7 +27,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
27
27
  To raise questions or initiate discussions, please use the [Numba Discourse
28
28
  forum](https://numba.discourse.group).
29
29
 
30
- ## Building from source
30
+ ## Installation with pip
31
+
32
+ ```shell
33
+ pip install numba-cuda
34
+ ```
35
+
36
+ ## Installation with Conda
37
+
38
+ ```shell
39
+ conda install -c conda-forge numba-cuda
40
+ ```
41
+
42
+ ## Installation from source
31
43
 
32
44
  Install as an editable install:
33
45
 
@@ -53,3 +65,9 @@ which will show a path like:
53
65
  ```
54
66
  <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
55
67
  ```
68
+
69
+ ## Contributing Guide
70
+
71
+ Review the
72
+ [CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
73
+ file for information on how to contribute code and issues to the project.
@@ -1,6 +1,6 @@
1
1
  _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
2
2
  _numba_cuda_redirector.py,sha256=QKJmYICSQvjvph0Zw9OW015MsuKxIF28GPFjR35AXLM,2681
3
- numba_cuda/VERSION,sha256=QLjrQACpE6d5EJBTXykdPTaYdBYqie88nj1OiHobnnk,6
3
+ numba_cuda/VERSION,sha256=l6XW5UCmEg0Jw53bZn4Ojiusf8wv_vgTuC4I_WA2W84,6
4
4
  numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
5
5
  numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
6
6
  numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
@@ -9,19 +9,20 @@ numba_cuda/numba/cuda/api_util.py,sha256=aQfUV2-4RM_oGVvckMjbMr5e3effOQNX04v1T0O
9
9
  numba_cuda/numba/cuda/args.py,sha256=HloHkw_PQal2DT-I70Xf_XbnGObS1jiUgcRrQ85Gq28,1978
10
10
  numba_cuda/numba/cuda/cg.py,sha256=9V1uZqyGOJX1aFd9c6GAPbLSqq83lE8LoP-vxxrKENY,1490
11
11
  numba_cuda/numba/cuda/codegen.py,sha256=ghdYBKZ3Mzk2UlLE64HkrAjb60PN9fibSNkWFRQuj4M,13184
12
- numba_cuda/numba/cuda/compiler.py,sha256=_0qfSjnLnF29B-t8NQRJt4FBUIKxZJE6xN47_G7oRio,21339
12
+ numba_cuda/numba/cuda/compiler.py,sha256=aWP_aunOOw8RZsTKf-S3YdH5MDkY6kLN5Xr5B2XgOfk,24214
13
13
  numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=iv84_F6Q9kFjV_kclrQz1msh6Dud8mI3qNkswTid7Qc,953
14
14
  numba_cuda/numba/cuda/cuda_fp16.h,sha256=1IC0mdNdkvKbvAe0-f4uYVS7WFrVqOyI1nRUbBiqr6A,126844
15
15
  numba_cuda/numba/cuda/cuda_fp16.hpp,sha256=vJ7NUr2X2tKhAP7ojydAiCoOjVO6n4QGoXD6m9Srrlw,89130
16
16
  numba_cuda/numba/cuda/cuda_paths.py,sha256=C0gA72QLWUMfvXkFpw1WqqaFqfsQ7HM72hQVXG0A7RU,10023
17
- numba_cuda/numba/cuda/cudadecl.py,sha256=ZUssRdTvS4sVwvJWTmaRTvrMXMbkPZ_qVp8JMXoXFoc,23300
17
+ numba_cuda/numba/cuda/cudadecl.py,sha256=6h_Je6cXmfr4VjBowkr-OOGlsXei-QqGlcjU4Yv-m-4,23438
18
18
  numba_cuda/numba/cuda/cudaimpl.py,sha256=0oHjDwBC4JmfpwS1Fsn1bm5YWVru5vZvvnO414P4TS0,38840
19
19
  numba_cuda/numba/cuda/cudamath.py,sha256=EFNtdzEytAZuwijdRoFGzVKCeal76UzzaNy7wUFQx8I,3978
20
- numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZpwJocM,7823
20
+ numba_cuda/numba/cuda/debuginfo.py,sha256=lMIs7UAOfkqUvD9sx-nNEY8qP9DhWF9X38xnW3yo_Qc,1433
21
+ numba_cuda/numba/cuda/decorators.py,sha256=MqmbEXVVgIV1G_feYtccKBRTDL0VALWf0LjbrVfJo4s,8041
21
22
  numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
22
23
  numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
23
24
  numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
24
- numba_cuda/numba/cuda/dispatcher.py,sha256=cJH7Jm-U26PyU-M2Igevar_Q_c_k9R-A99InnRGPzX0,42444
25
+ numba_cuda/numba/cuda/dispatcher.py,sha256=j2nAjlqNAIAoQVCQ4ZQD--hQDsnFLXedlvaXdCMNKEc,44354
25
26
  numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
26
27
  numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
27
28
  numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
@@ -39,8 +40,8 @@ numba_cuda/numba/cuda/random.py,sha256=khX8iDdde_RTUPWhAqrxZacHRQAorFr7BokPuxRWz
39
40
  numba_cuda/numba/cuda/reshape_funcs.cu,sha256=H5UAa-VAvoxW9SQwJO88ZrDXC64nWALW3Ch4cHAAqO4,4325
40
41
  numba_cuda/numba/cuda/simulator_init.py,sha256=W_bPRtmPGOQVuiprbgt7ENnnnELv_LPCeLDIsfsvFZ8,460
41
42
  numba_cuda/numba/cuda/stubs.py,sha256=W3tozv4ganMnfbdFqyPjgQXYeX8GQhwx_xXgv8jk6iM,22270
42
- numba_cuda/numba/cuda/target.py,sha256=hBflzmxCGlmTugWT1sYhZj9f4HkQAMK2RQ9lO85pMW4,17052
43
- numba_cuda/numba/cuda/testing.py,sha256=E0wP2vfno1yWsl0v1zg31kpbU8FrKxTF-5y9Iv4WjA4,6412
43
+ numba_cuda/numba/cuda/target.py,sha256=MWpdHs2K17Lus4e318FNbR533q24MhovGS6Q1ob9x_4,11354
44
+ numba_cuda/numba/cuda/testing.py,sha256=tG1FBm_gqW4esDxCaecMvCRKvwYEg7Yu2Q60ARNnes0,6873
44
45
  numba_cuda/numba/cuda/types.py,sha256=WVfjcly_VUpG9FfKueiEPzZm2NV8Hg0XAFg3bNzPdVc,1314
45
46
  numba_cuda/numba/cuda/ufuncs.py,sha256=txw27IxG80W1Yo7e-XwL2AMcQo0fMnxMjBIMy-n5pCo,23317
46
47
  numba_cuda/numba/cuda/utils.py,sha256=JId22EI3KkQosW6Dafdaw43qU0xXXO_4JOENLap8klU,630
@@ -55,11 +56,11 @@ numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWx
55
56
  numba_cuda/numba/cuda/cudadrv/enums.py,sha256=Wy5dzukTk4TnWCowg_PLceET_v2xEyiWLu9TyH8pXr8,23742
56
57
  numba_cuda/numba/cuda/cudadrv/error.py,sha256=zEIryW6aIy8GG4ypmTliB6RgY4Gy2n8ckz7I6W99LUM,524
57
58
  numba_cuda/numba/cuda/cudadrv/libs.py,sha256=Gk9zQ1CKcsZsWl-_9QneXeP9VH5q5R1I3Cx043UOytk,7240
58
- numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=Q_YTv0apBo9t8pkMlKrthPPSVeLd376ZTmVDF5NtVVo,1328
59
+ numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=bWBvnndrzWu24SXm7cilCwNFXShJgNmbMfj1Wzemito,1456
59
60
  numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
60
61
  numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
61
62
  numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=XM9_Vllv7HzH5wZIR2lwFictyX68XDtNbyLkXlL6NTI,11003
62
- numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=v2hJJTAQeRmoG59-hnhgMEp5BSVA73QHtEoy636VKao,24107
63
+ numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=cAoQmZ0bO8i3wPTQq5D0UeMtfnXdGebqYpU4W0kUIEY,24237
63
64
  numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=WdeUoWzsYNYodx8kMRLVIjnNs0QzwpCihd2Q0AaqItE,226
64
65
  numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=Tj9ACrzQqNmDSO6xfpzw12EsQknSywQ-ZGuWMbDdHnQ,4255
65
66
  numba_cuda/numba/cuda/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -145,8 +146,8 @@ numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=73FCQbNaA
145
146
  numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=y7cNQZOZJo5Sv16ql3E5QaRksw-U3RkXss9YDcNeiTk,2137
146
147
  numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=2in1Cq8y9zAFoka7H72wF1D0awEd3n7bv56sUPgoNAQ,3508
147
148
  numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=3MYNiMe75rgBF1T0vsJ7r-nkW5jPvov_tDms9KXo2UU,3449
148
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=8Tm1iD2x1BRryB1QY6qp6tdjJCE6Tx9p0LzcYwiExIU,7922
149
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=aTRyZSOJB3sAShw0YAEgHILrR-TCuowW9KYjtlRErKM,6892
149
+ numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=jI43jMbPS9Rbr3YI2mZBrDwH9MGjmyVlczv7QxxPoAs,10993
150
+ numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=eDVymTQXTzW0WeAgTMDKYtOi1YAM310IUxGp3Y1ICjs,13162
150
151
  numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=oX-l_L4H8rME1IolwhAyordSGJ152nnuqGAFdWjfgas,26587
151
152
  numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=0GWiwvZ1FTzSl1FfMxttkWaWrowASfXrSDT8XAR4ZHw,3560
152
153
  numba_cuda/numba/cuda/tests/cudapy/test_errors.py,sha256=jwHbNb2Ro5pbGOPFetmUhI-vG4s36OKCqMJ-lgWxHMY,2620
@@ -219,7 +220,7 @@ numba_cuda/numba/cuda/tests/data/warn.cu,sha256=6L-qsXJIxAr_n3hVMAz_EZ5j0skcJAfg
219
220
  numba_cuda/numba/cuda/tests/doc_examples/__init__.py,sha256=GdfSq6pRVSOQwmgNi7ZFQ5l0yg4-2gNar_0Rz0buUpM,157
220
221
  numba_cuda/numba/cuda/tests/doc_examples/test_cg.py,sha256=9UQAez1jp3vQ0BIfoRCnGJGP17nznNcon-XFR4grqzQ,2905
221
222
  numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py,sha256=DRzvoE2iCaISJb2lkshBkJyYBEfdpqZLRXG_N9XRaFk,2305
222
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=RgZO7xYkJIlSIuJK4k3_APEJAekjkKy5wKOMFdfRoAM,2654
223
+ numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=PoHbrTMFk8rewm7XH_8Vv1733sI-YHOzxoBI4nFhuBA,2773
223
224
  numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py,sha256=UH15R0DbMA4iHLmoZ0GtcttGCNctOUif-u2448JMmRo,5177
224
225
  numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py,sha256=hS-X_T7x3-BcBanazmnmGxJE_o1A9b9f_VGk0YlJP4o,6135
225
226
  numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py,sha256=_0snszis_UE7LxU5lw9ReNF19Dh5iV0yRy18mUWNd1c,3491
@@ -238,14 +239,14 @@ numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA
238
239
  numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
239
240
  numba_cuda/numba/cuda/tests/nrt/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
240
241
  numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=Cx2DGhm2bJheShP2Ja1w9YLlRTeAMM7u1UYHsPnTzA8,4552
241
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=b3rtK018qslhUU5UsAAa3s-mjlnlfxAwTJmARTVD2j4,7650
242
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=Wq46oICum9IXnbQ97vV8V7g-3U01PLQEQbaGSNdRuMg,3163
242
+ numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=wByXeagVoxsAu_pmfuYQ7vmeJt82h4VXwCBsDYQfsps,7727
243
+ numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=SnVvTis8YyaqsElRaGQ-34dnWgGavvc2Ovm2xZ_PD3Q,3240
243
244
  numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq6pJwHEwmKVmJOJxPBtsMTbnuzqYkik,2679
244
245
  numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=V0raLZLGSiWbE_K-JluI0CnmNkXbhlMVj-TH7P1OV8E,5014
245
246
  numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
246
247
  numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
247
- numba_cuda-0.4.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
248
- numba_cuda-0.4.0.dist-info/METADATA,sha256=BWlfqEMCG0dlSXORk9sKzY7nT_YdQzk9eQ7fBX4rvlY,1496
249
- numba_cuda-0.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
250
- numba_cuda-0.4.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
251
- numba_cuda-0.4.0.dist-info/RECORD,,
248
+ numba_cuda-0.6.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
249
+ numba_cuda-0.6.0.dist-info/METADATA,sha256=iNU56EXHsnAcAcwgNXglPh6H47Quz31_-6r9RevpJ_Q,1836
250
+ numba_cuda-0.6.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
251
+ numba_cuda-0.6.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
252
+ numba_cuda-0.6.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.8.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5