numba-cuda 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/compiler.py +85 -8
- numba_cuda/numba/cuda/cudadecl.py +6 -2
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +13 -9
- numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -1
- numba_cuda/numba/cuda/debuginfo.py +44 -0
- numba_cuda/numba/cuda/decorators.py +9 -2
- numba_cuda/numba/cuda/dispatcher.py +62 -4
- numba_cuda/numba/cuda/target.py +4 -134
- numba_cuda/numba/cuda/testing.py +11 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +81 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +270 -11
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +10 -7
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +4 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
- {numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/METADATA +20 -2
- {numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/RECORD +20 -19
- {numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/WHEEL +1 -1
- {numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/LICENSE +0 -0
- {numba_cuda-0.4.0.dist-info → numba_cuda-0.6.0.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.6.0
|
@@ -1,14 +1,17 @@
|
|
1
1
|
from llvmlite import ir
|
2
2
|
from numba.core.typing.templates import ConcreteTemplate
|
3
|
+
from numba.core import ir as numba_ir
|
3
4
|
from numba.core import (cgutils, types, typing, funcdesc, config, compiler,
|
4
5
|
sigutils, utils)
|
5
6
|
from numba.core.compiler import (sanitize_compile_result_entries, CompilerBase,
|
6
7
|
DefaultPassBuilder, Flags, Option,
|
7
8
|
CompileResult)
|
8
9
|
from numba.core.compiler_lock import global_compiler_lock
|
9
|
-
from numba.core.compiler_machinery import (LoweringPass,
|
10
|
+
from numba.core.compiler_machinery import (FunctionPass, LoweringPass,
|
10
11
|
PassManager, register_pass)
|
12
|
+
from numba.core.interpreter import Interpreter
|
11
13
|
from numba.core.errors import NumbaInvalidConfigWarning
|
14
|
+
from numba.core.untyped_passes import TranslateByteCode
|
12
15
|
from numba.core.typed_passes import (IRLegalization, NativeLowering,
|
13
16
|
AnnotateTypes)
|
14
17
|
from warnings import warn
|
@@ -143,13 +146,74 @@ class CreateLibrary(LoweringPass):
|
|
143
146
|
return True
|
144
147
|
|
145
148
|
|
149
|
+
class CUDABytecodeInterpreter(Interpreter):
|
150
|
+
# Based on the superclass implementation, but names the resulting variable
|
151
|
+
# "$bool<N>" instead of "bool<N>" - see Numba PR #9888:
|
152
|
+
# https://github.com/numba/numba/pull/9888
|
153
|
+
#
|
154
|
+
# This can be removed once that PR is available in an upstream Numba
|
155
|
+
# release.
|
156
|
+
def _op_JUMP_IF(self, inst, pred, iftrue):
|
157
|
+
brs = {
|
158
|
+
True: inst.get_jump_target(),
|
159
|
+
False: inst.next,
|
160
|
+
}
|
161
|
+
truebr = brs[iftrue]
|
162
|
+
falsebr = brs[not iftrue]
|
163
|
+
|
164
|
+
name = "$bool%s" % (inst.offset)
|
165
|
+
gv_fn = numba_ir.Global("bool", bool, loc=self.loc)
|
166
|
+
self.store(value=gv_fn, name=name)
|
167
|
+
|
168
|
+
callres = numba_ir.Expr.call(self.get(name), (self.get(pred),), (),
|
169
|
+
loc=self.loc)
|
170
|
+
|
171
|
+
pname = "$%spred" % (inst.offset)
|
172
|
+
predicate = self.store(value=callres, name=pname)
|
173
|
+
bra = numba_ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr,
|
174
|
+
loc=self.loc)
|
175
|
+
self.current_block.append(bra)
|
176
|
+
|
177
|
+
|
178
|
+
@register_pass(mutates_CFG=True, analysis_only=False)
|
179
|
+
class CUDATranslateBytecode(FunctionPass):
|
180
|
+
_name = "cuda_translate_bytecode"
|
181
|
+
|
182
|
+
def __init__(self):
|
183
|
+
FunctionPass.__init__(self)
|
184
|
+
|
185
|
+
def run_pass(self, state):
|
186
|
+
func_id = state['func_id']
|
187
|
+
bc = state['bc']
|
188
|
+
interp = CUDABytecodeInterpreter(func_id)
|
189
|
+
func_ir = interp.interpret(bc)
|
190
|
+
state['func_ir'] = func_ir
|
191
|
+
return True
|
192
|
+
|
193
|
+
|
146
194
|
class CUDACompiler(CompilerBase):
|
147
195
|
def define_pipelines(self):
|
148
196
|
dpb = DefaultPassBuilder
|
149
197
|
pm = PassManager('cuda')
|
150
198
|
|
151
199
|
untyped_passes = dpb.define_untyped_pipeline(self.state)
|
152
|
-
|
200
|
+
|
201
|
+
# Rather than replicating the whole untyped passes definition in
|
202
|
+
# numba-cuda, it seems cleaner to take the pass list and replace the
|
203
|
+
# TranslateBytecode pass with our own.
|
204
|
+
|
205
|
+
def replace_translate_pass(implementation, description):
|
206
|
+
if implementation is TranslateByteCode:
|
207
|
+
return (CUDATranslateBytecode, description)
|
208
|
+
else:
|
209
|
+
return (implementation, description)
|
210
|
+
|
211
|
+
cuda_untyped_passes = [
|
212
|
+
replace_translate_pass(implementation, description)
|
213
|
+
for implementation, description in untyped_passes.passes
|
214
|
+
]
|
215
|
+
|
216
|
+
pm.passes.extend(cuda_untyped_passes)
|
153
217
|
|
154
218
|
typed_passes = dpb.define_typed_pipeline(self.state)
|
155
219
|
pm.passes.extend(typed_passes.passes)
|
@@ -352,6 +416,18 @@ def kernel_fixup(kernel, debug):
|
|
352
416
|
kernel.return_value = ir.ReturnValue(kernel, ir.VoidType())
|
353
417
|
kernel.args = kernel.args[1:]
|
354
418
|
|
419
|
+
# If debug metadata is present, remove the return value from it
|
420
|
+
|
421
|
+
if kernel_metadata := getattr(kernel, 'metadata', None):
|
422
|
+
if dbg_metadata := kernel_metadata.get('dbg', None):
|
423
|
+
for name, value in dbg_metadata.operands:
|
424
|
+
if name == "type":
|
425
|
+
type_metadata = value
|
426
|
+
for tm_name, tm_value in type_metadata.operands:
|
427
|
+
if tm_name == 'types':
|
428
|
+
types = tm_value
|
429
|
+
types.operands = types.operands[1:]
|
430
|
+
|
355
431
|
# Mark as a kernel for NVVM
|
356
432
|
|
357
433
|
nvvm.set_cuda_kernel(kernel)
|
@@ -570,16 +646,16 @@ def compile_ptx_for_current_device(pyfunc, sig, debug=None, lineinfo=False,
|
|
570
646
|
abi=abi, abi_info=abi_info)
|
571
647
|
|
572
648
|
|
573
|
-
def declare_device_function(name, restype, argtypes):
|
574
|
-
return declare_device_function_template(name, restype, argtypes).key
|
649
|
+
def declare_device_function(name, restype, argtypes, link):
|
650
|
+
return declare_device_function_template(name, restype, argtypes, link).key
|
575
651
|
|
576
652
|
|
577
|
-
def declare_device_function_template(name, restype, argtypes):
|
653
|
+
def declare_device_function_template(name, restype, argtypes, link):
|
578
654
|
from .descriptor import cuda_target
|
579
655
|
typingctx = cuda_target.typing_context
|
580
656
|
targetctx = cuda_target.target_context
|
581
657
|
sig = typing.signature(restype, *argtypes)
|
582
|
-
extfn = ExternFunction(name, sig)
|
658
|
+
extfn = ExternFunction(name, sig, link)
|
583
659
|
|
584
660
|
class device_function_template(ConcreteTemplate):
|
585
661
|
key = extfn
|
@@ -593,7 +669,8 @@ def declare_device_function_template(name, restype, argtypes):
|
|
593
669
|
return device_function_template
|
594
670
|
|
595
671
|
|
596
|
-
class ExternFunction
|
597
|
-
def __init__(self, name, sig):
|
672
|
+
class ExternFunction:
|
673
|
+
def __init__(self, name, sig, link):
|
598
674
|
self.name = name
|
599
675
|
self.sig = sig
|
676
|
+
self.link = link
|
@@ -403,16 +403,20 @@ _genfp16_binary_operator(operator.itruediv)
|
|
403
403
|
|
404
404
|
|
405
405
|
def _resolve_wrapped_unary(fname):
|
406
|
+
link = tuple()
|
406
407
|
decl = declare_device_function_template(f'__numba_wrapper_{fname}',
|
407
408
|
types.float16,
|
408
|
-
(types.float16,)
|
409
|
+
(types.float16,),
|
410
|
+
link)
|
409
411
|
return types.Function(decl)
|
410
412
|
|
411
413
|
|
412
414
|
def _resolve_wrapped_binary(fname):
|
415
|
+
link = tuple()
|
413
416
|
decl = declare_device_function_template(f'__numba_wrapper_{fname}',
|
414
417
|
types.float16,
|
415
|
-
(types.float16, types.float16,)
|
418
|
+
(types.float16, types.float16,),
|
419
|
+
link)
|
416
420
|
return types.Function(decl)
|
417
421
|
|
418
422
|
|
@@ -2,8 +2,12 @@ from .mappings import FILE_EXTENSION_MAP
|
|
2
2
|
|
3
3
|
|
4
4
|
class LinkableCode:
|
5
|
-
"""An object that
|
6
|
-
|
5
|
+
"""An object that holds code to be linked from memory.
|
6
|
+
|
7
|
+
:param data: A buffer containing the data to link.
|
8
|
+
:param name: The name of the file to be referenced in any compilation or
|
9
|
+
linking errors that may be produced.
|
10
|
+
"""
|
7
11
|
|
8
12
|
def __init__(self, data, name=None):
|
9
13
|
self.data = data
|
@@ -15,49 +19,49 @@ class LinkableCode:
|
|
15
19
|
|
16
20
|
|
17
21
|
class PTXSource(LinkableCode):
|
18
|
-
"""PTX
|
22
|
+
"""PTX source code in memory."""
|
19
23
|
|
20
24
|
kind = FILE_EXTENSION_MAP["ptx"]
|
21
25
|
default_name = "<unnamed-ptx>"
|
22
26
|
|
23
27
|
|
24
28
|
class CUSource(LinkableCode):
|
25
|
-
"""CUDA C/C++
|
29
|
+
"""CUDA C/C++ source code in memory."""
|
26
30
|
|
27
31
|
kind = "cu"
|
28
32
|
default_name = "<unnamed-cu>"
|
29
33
|
|
30
34
|
|
31
35
|
class Fatbin(LinkableCode):
|
32
|
-
"""
|
36
|
+
"""An ELF Fatbin in memory."""
|
33
37
|
|
34
38
|
kind = FILE_EXTENSION_MAP["fatbin"]
|
35
39
|
default_name = "<unnamed-fatbin>"
|
36
40
|
|
37
41
|
|
38
42
|
class Cubin(LinkableCode):
|
39
|
-
"""
|
43
|
+
"""An ELF Cubin in memory."""
|
40
44
|
|
41
45
|
kind = FILE_EXTENSION_MAP["cubin"]
|
42
46
|
default_name = "<unnamed-cubin>"
|
43
47
|
|
44
48
|
|
45
49
|
class Archive(LinkableCode):
|
46
|
-
"""An archive of objects in memory"""
|
50
|
+
"""An archive of objects in memory."""
|
47
51
|
|
48
52
|
kind = FILE_EXTENSION_MAP["a"]
|
49
53
|
default_name = "<unnamed-archive>"
|
50
54
|
|
51
55
|
|
52
56
|
class Object(LinkableCode):
|
53
|
-
"""An object file in memory"""
|
57
|
+
"""An object file in memory."""
|
54
58
|
|
55
59
|
kind = FILE_EXTENSION_MAP["o"]
|
56
60
|
default_name = "<unnamed-object>"
|
57
61
|
|
58
62
|
|
59
63
|
class LTOIR(LinkableCode):
|
60
|
-
"""An LTOIR file in memory"""
|
64
|
+
"""An LTOIR file in memory."""
|
61
65
|
|
62
66
|
kind = "ltoir"
|
63
67
|
default_name = "<unnamed-ltoir>"
|
@@ -314,7 +314,9 @@ COMPUTE_CAPABILITIES = (
|
|
314
314
|
(6, 0), (6, 1), (6, 2),
|
315
315
|
(7, 0), (7, 2), (7, 5),
|
316
316
|
(8, 0), (8, 6), (8, 7), (8, 9),
|
317
|
-
(9, 0)
|
317
|
+
(9, 0),
|
318
|
+
(10, 0), (10, 1),
|
319
|
+
(12, 0),
|
318
320
|
)
|
319
321
|
|
320
322
|
# Maps CTK version -> (min supported cc, max supported cc) inclusive
|
@@ -331,6 +333,9 @@ CTK_SUPPORTED = {
|
|
331
333
|
(12, 2): ((5, 0), (9, 0)),
|
332
334
|
(12, 3): ((5, 0), (9, 0)),
|
333
335
|
(12, 4): ((5, 0), (9, 0)),
|
336
|
+
(12, 5): ((5, 0), (9, 0)),
|
337
|
+
(12, 6): ((5, 0), (9, 0)),
|
338
|
+
(12, 8): ((5, 0), (12, 0)),
|
334
339
|
}
|
335
340
|
|
336
341
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from llvmlite import ir
|
2
|
+
from numba.core import types
|
3
|
+
from numba.core.debuginfo import DIBuilder
|
4
|
+
from numba.cuda.types import GridGroup
|
5
|
+
|
6
|
+
_BYTE_SIZE = 8
|
7
|
+
|
8
|
+
|
9
|
+
class CUDADIBuilder(DIBuilder):
|
10
|
+
|
11
|
+
def _var_type(self, lltype, size, datamodel=None):
|
12
|
+
is_bool = False
|
13
|
+
is_grid_group = False
|
14
|
+
|
15
|
+
if isinstance(lltype, ir.IntType):
|
16
|
+
if datamodel is None:
|
17
|
+
if size == 1:
|
18
|
+
name = str(lltype)
|
19
|
+
is_bool = True
|
20
|
+
else:
|
21
|
+
name = str(datamodel.fe_type)
|
22
|
+
if isinstance(datamodel.fe_type, types.Boolean):
|
23
|
+
is_bool = True
|
24
|
+
elif isinstance(datamodel.fe_type, GridGroup):
|
25
|
+
is_grid_group = True
|
26
|
+
|
27
|
+
if is_bool or is_grid_group:
|
28
|
+
m = self.module
|
29
|
+
bitsize = _BYTE_SIZE * size
|
30
|
+
# Boolean type workaround until upstream Numba is fixed
|
31
|
+
if is_bool:
|
32
|
+
ditok = "DW_ATE_boolean"
|
33
|
+
# GridGroup type should use numba.cuda implementation
|
34
|
+
elif is_grid_group:
|
35
|
+
ditok = "DW_ATE_unsigned"
|
36
|
+
|
37
|
+
return m.add_debug_info('DIBasicType', {
|
38
|
+
'name': name,
|
39
|
+
'size': bitsize,
|
40
|
+
'encoding': ir.DIToken(ditok),
|
41
|
+
})
|
42
|
+
|
43
|
+
# For other cases, use upstream Numba implementation
|
44
|
+
return super()._var_type(lltype, size, datamodel=datamodel)
|
@@ -173,7 +173,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
173
173
|
return disp
|
174
174
|
|
175
175
|
|
176
|
-
def declare_device(name, sig):
|
176
|
+
def declare_device(name, sig, link=None):
|
177
177
|
"""
|
178
178
|
Declare the signature of a foreign function. Returns a descriptor that can
|
179
179
|
be used to call the function from a Python kernel.
|
@@ -181,10 +181,17 @@ def declare_device(name, sig):
|
|
181
181
|
:param name: The name of the foreign function.
|
182
182
|
:type name: str
|
183
183
|
:param sig: The Numba signature of the function.
|
184
|
+
:param link: External code to link when calling the function.
|
184
185
|
"""
|
186
|
+
if link is None:
|
187
|
+
link = tuple()
|
188
|
+
else:
|
189
|
+
if not isinstance(link, (list, tuple, set)):
|
190
|
+
link = (link,)
|
191
|
+
|
185
192
|
argtypes, restype = sigutils.normalize_signature(sig)
|
186
193
|
if restype is None:
|
187
194
|
msg = 'Return type must be provided for device declarations'
|
188
195
|
raise TypeError(msg)
|
189
196
|
|
190
|
-
return declare_device_function(name, restype, argtypes)
|
197
|
+
return declare_device_function(name, restype, argtypes, link)
|
@@ -4,17 +4,19 @@ import re
|
|
4
4
|
import sys
|
5
5
|
import ctypes
|
6
6
|
import functools
|
7
|
+
from collections import defaultdict
|
7
8
|
|
8
|
-
from numba.core import config, serialize, sigutils, types, typing, utils
|
9
|
+
from numba.core import config, ir, serialize, sigutils, types, typing, utils
|
9
10
|
from numba.core.caching import Cache, CacheImpl
|
10
11
|
from numba.core.compiler_lock import global_compiler_lock
|
11
12
|
from numba.core.dispatcher import Dispatcher
|
12
13
|
from numba.core.errors import NumbaPerformanceWarning
|
13
14
|
from numba.core.typing.typeof import Purpose, typeof
|
14
|
-
|
15
|
+
from numba.core.types.functions import Function
|
15
16
|
from numba.cuda.api import get_current_device
|
16
17
|
from numba.cuda.args import wrap_arg
|
17
|
-
from numba.cuda.compiler import compile_cuda, CUDACompiler, kernel_fixup
|
18
|
+
from numba.cuda.compiler import (compile_cuda, CUDACompiler, kernel_fixup,
|
19
|
+
ExternFunction)
|
18
20
|
from numba.cuda.cudadrv import driver
|
19
21
|
from numba.cuda.cudadrv.devices import get_context
|
20
22
|
from numba.cuda.descriptor import cuda_target
|
@@ -41,6 +43,55 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
|
|
41
43
|
reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
|
42
44
|
|
43
45
|
|
46
|
+
def get_cres_link_objects(cres):
|
47
|
+
"""Given a compile result, return a set of all linkable code objects that
|
48
|
+
are required for it to be fully linked."""
|
49
|
+
|
50
|
+
link_objects = set()
|
51
|
+
|
52
|
+
# List of calls into declared device functions
|
53
|
+
device_func_calls = [
|
54
|
+
(name, v) for name, v in cres.fndesc.typemap.items() if (
|
55
|
+
isinstance(v, cuda_types.CUDADispatcher)
|
56
|
+
)
|
57
|
+
]
|
58
|
+
|
59
|
+
# List of tuples with SSA name of calls and corresponding signature
|
60
|
+
call_signatures = [
|
61
|
+
(call.func.name, sig)
|
62
|
+
for call, sig in cres.fndesc.calltypes.items() if (
|
63
|
+
isinstance(call, ir.Expr) and call.op == 'call'
|
64
|
+
)
|
65
|
+
]
|
66
|
+
|
67
|
+
# Map SSA names to all invoked signatures
|
68
|
+
call_signature_d = defaultdict(list)
|
69
|
+
for name, sig in call_signatures:
|
70
|
+
call_signature_d[name].append(sig)
|
71
|
+
|
72
|
+
# Add the link objects from the current function's callees
|
73
|
+
for name, v in device_func_calls:
|
74
|
+
for sig in call_signature_d.get(name, []):
|
75
|
+
called_cres = v.dispatcher.overloads[sig.args]
|
76
|
+
called_link_objects = get_cres_link_objects(called_cres)
|
77
|
+
link_objects.update(called_link_objects)
|
78
|
+
|
79
|
+
# From this point onwards, we are only interested in ExternFunction
|
80
|
+
# declarations - these are the calls made directly in this function to
|
81
|
+
# them.
|
82
|
+
for name, v in cres.fndesc.typemap.items():
|
83
|
+
if not isinstance(v, Function):
|
84
|
+
continue
|
85
|
+
|
86
|
+
if not isinstance(v.typing_key, ExternFunction):
|
87
|
+
continue
|
88
|
+
|
89
|
+
for obj in v.typing_key.link:
|
90
|
+
link_objects.add(obj)
|
91
|
+
|
92
|
+
return link_objects
|
93
|
+
|
94
|
+
|
44
95
|
class _Kernel(serialize.ReduceMixin):
|
45
96
|
'''
|
46
97
|
CUDA Kernel specialized for a given set of argument types. When called, this
|
@@ -158,6 +209,9 @@ class _Kernel(serialize.ReduceMixin):
|
|
158
209
|
|
159
210
|
self.maybe_link_nrt(link, tgt_ctx, asm)
|
160
211
|
|
212
|
+
for obj in get_cres_link_objects(cres):
|
213
|
+
lib.add_linking_file(obj)
|
214
|
+
|
161
215
|
for filepath in link:
|
162
216
|
lib.add_linking_file(filepath)
|
163
217
|
|
@@ -256,7 +310,11 @@ class _Kernel(serialize.ReduceMixin):
|
|
256
310
|
"""
|
257
311
|
cufunc = self._codelibrary.get_cufunc()
|
258
312
|
|
259
|
-
if
|
313
|
+
if (
|
314
|
+
hasattr(self, "target_context")
|
315
|
+
and self.target_context.enable_nrt
|
316
|
+
and config.CUDA_NRT_STATS
|
317
|
+
):
|
260
318
|
rtsys.ensure_initialized()
|
261
319
|
rtsys.set_memsys_to_module(cufunc.module)
|
262
320
|
# We don't know which stream the kernel will be launched on, so
|
numba_cuda/numba/cuda/target.py
CHANGED
@@ -3,8 +3,7 @@ from functools import cached_property
|
|
3
3
|
import llvmlite.binding as ll
|
4
4
|
from llvmlite import ir
|
5
5
|
|
6
|
-
from numba.core import
|
7
|
-
typing, utils)
|
6
|
+
from numba.core import cgutils, config, itanium_mangler, types, typing
|
8
7
|
from numba.core.dispatcher import Dispatcher
|
9
8
|
from numba.core.base import BaseContext
|
10
9
|
from numba.core.callconv import BaseCallConv, MinimalCallConv
|
@@ -12,7 +11,8 @@ from numba.core.typing import cmathdecl
|
|
12
11
|
from numba.core import datamodel
|
13
12
|
|
14
13
|
from .cudadrv import nvvm
|
15
|
-
from numba.cuda import codegen,
|
14
|
+
from numba.cuda import codegen, ufuncs
|
15
|
+
from numba.cuda.debuginfo import CUDADIBuilder
|
16
16
|
from numba.cuda.models import cuda_data_manager
|
17
17
|
|
18
18
|
# -----------------------------------------------------------------------------
|
@@ -80,7 +80,7 @@ class CUDATargetContext(BaseContext):
|
|
80
80
|
|
81
81
|
@property
|
82
82
|
def DIBuilder(self):
|
83
|
-
return
|
83
|
+
return CUDADIBuilder
|
84
84
|
|
85
85
|
@property
|
86
86
|
def enable_boundscheck(self):
|
@@ -150,136 +150,6 @@ class CUDATargetContext(BaseContext):
|
|
150
150
|
return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags,
|
151
151
|
uid=uid)
|
152
152
|
|
153
|
-
def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
|
154
|
-
nvvm_options, filename, linenum,
|
155
|
-
max_registers=None, lto=False):
|
156
|
-
"""
|
157
|
-
Adapt a code library ``codelib`` with the numba compiled CUDA kernel
|
158
|
-
with name ``fname`` and arguments ``argtypes`` for NVVM.
|
159
|
-
A new library is created with a wrapper function that can be used as
|
160
|
-
the kernel entry point for the given kernel.
|
161
|
-
|
162
|
-
Returns the new code library and the wrapper function.
|
163
|
-
|
164
|
-
Parameters:
|
165
|
-
|
166
|
-
codelib: The CodeLibrary containing the device function to wrap
|
167
|
-
in a kernel call.
|
168
|
-
fndesc: The FunctionDescriptor of the source function.
|
169
|
-
debug: Whether to compile with debug.
|
170
|
-
lineinfo: Whether to emit line info.
|
171
|
-
nvvm_options: Dict of NVVM options used when compiling the new library.
|
172
|
-
filename: The source filename that the function is contained in.
|
173
|
-
linenum: The source line that the function is on.
|
174
|
-
max_registers: The max_registers argument for the code library.
|
175
|
-
"""
|
176
|
-
kernel_name = itanium_mangler.prepend_namespace(
|
177
|
-
fndesc.llvm_func_name, ns='cudapy',
|
178
|
-
)
|
179
|
-
library = self.codegen().create_library(f'{codelib.name}_kernel_',
|
180
|
-
entry_name=kernel_name,
|
181
|
-
nvvm_options=nvvm_options,
|
182
|
-
max_registers=max_registers,
|
183
|
-
lto=lto
|
184
|
-
)
|
185
|
-
library.add_linking_library(codelib)
|
186
|
-
wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
|
187
|
-
debug, lineinfo, filename,
|
188
|
-
linenum)
|
189
|
-
return library, wrapper
|
190
|
-
|
191
|
-
def generate_kernel_wrapper(self, library, fndesc, kernel_name, debug,
|
192
|
-
lineinfo, filename, linenum):
|
193
|
-
"""
|
194
|
-
Generate the kernel wrapper in the given ``library``.
|
195
|
-
The function being wrapped is described by ``fndesc``.
|
196
|
-
The wrapper function is returned.
|
197
|
-
"""
|
198
|
-
|
199
|
-
argtypes = fndesc.argtypes
|
200
|
-
arginfo = self.get_arg_packer(argtypes)
|
201
|
-
argtys = list(arginfo.argument_types)
|
202
|
-
wrapfnty = ir.FunctionType(ir.VoidType(), argtys)
|
203
|
-
wrapper_module = self.create_module("cuda.kernel.wrapper")
|
204
|
-
fnty = ir.FunctionType(ir.IntType(32),
|
205
|
-
[self.call_conv.get_return_type(types.pyobject)]
|
206
|
-
+ argtys)
|
207
|
-
func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)
|
208
|
-
|
209
|
-
prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
|
210
|
-
wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
|
211
|
-
builder = ir.IRBuilder(wrapfn.append_basic_block(''))
|
212
|
-
|
213
|
-
if debug or lineinfo:
|
214
|
-
directives_only = lineinfo and not debug
|
215
|
-
debuginfo = self.DIBuilder(module=wrapper_module,
|
216
|
-
filepath=filename,
|
217
|
-
cgctx=self,
|
218
|
-
directives_only=directives_only)
|
219
|
-
debuginfo.mark_subprogram(
|
220
|
-
wrapfn, kernel_name, fndesc.args, argtypes, linenum,
|
221
|
-
)
|
222
|
-
debuginfo.mark_location(builder, linenum)
|
223
|
-
|
224
|
-
# Define error handling variable
|
225
|
-
def define_error_gv(postfix):
|
226
|
-
name = wrapfn.name + postfix
|
227
|
-
gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32),
|
228
|
-
name)
|
229
|
-
gv.initializer = ir.Constant(gv.type.pointee, None)
|
230
|
-
return gv
|
231
|
-
|
232
|
-
gv_exc = define_error_gv("__errcode__")
|
233
|
-
gv_tid = []
|
234
|
-
gv_ctaid = []
|
235
|
-
for i in 'xyz':
|
236
|
-
gv_tid.append(define_error_gv("__tid%s__" % i))
|
237
|
-
gv_ctaid.append(define_error_gv("__ctaid%s__" % i))
|
238
|
-
|
239
|
-
callargs = arginfo.from_arguments(builder, wrapfn.args)
|
240
|
-
status, _ = self.call_conv.call_function(
|
241
|
-
builder, func, types.void, argtypes, callargs)
|
242
|
-
|
243
|
-
if debug:
|
244
|
-
# Check error status
|
245
|
-
with cgutils.if_likely(builder, status.is_ok):
|
246
|
-
builder.ret_void()
|
247
|
-
|
248
|
-
with builder.if_then(builder.not_(status.is_python_exc)):
|
249
|
-
# User exception raised
|
250
|
-
old = ir.Constant(gv_exc.type.pointee, None)
|
251
|
-
|
252
|
-
# Use atomic cmpxchg to prevent rewriting the error status
|
253
|
-
# Only the first error is recorded
|
254
|
-
|
255
|
-
xchg = builder.cmpxchg(gv_exc, old, status.code,
|
256
|
-
'monotonic', 'monotonic')
|
257
|
-
changed = builder.extract_value(xchg, 1)
|
258
|
-
|
259
|
-
# If the xchange is successful, save the thread ID.
|
260
|
-
sreg = nvvmutils.SRegBuilder(builder)
|
261
|
-
with builder.if_then(changed):
|
262
|
-
for dim, ptr, in zip("xyz", gv_tid):
|
263
|
-
val = sreg.tid(dim)
|
264
|
-
builder.store(val, ptr)
|
265
|
-
|
266
|
-
for dim, ptr, in zip("xyz", gv_ctaid):
|
267
|
-
val = sreg.ctaid(dim)
|
268
|
-
builder.store(val, ptr)
|
269
|
-
|
270
|
-
builder.ret_void()
|
271
|
-
|
272
|
-
nvvm.set_cuda_kernel(wrapfn)
|
273
|
-
library.add_ir_module(wrapper_module)
|
274
|
-
if debug or lineinfo:
|
275
|
-
debuginfo.finalize()
|
276
|
-
library.finalize()
|
277
|
-
|
278
|
-
if config.DUMP_LLVM:
|
279
|
-
utils.dump_llvm(fndesc, wrapper_module)
|
280
|
-
|
281
|
-
return library.get_function(wrapfn.name)
|
282
|
-
|
283
153
|
def make_constant_array(self, builder, aryty, arr):
|
284
154
|
"""
|
285
155
|
Unlike the parent version. This returns a a pointer in the constant
|
numba_cuda/numba/cuda/testing.py
CHANGED
@@ -115,12 +115,22 @@ def skip_on_arm(reason):
|
|
115
115
|
def skip_if_cuda_includes_missing(fn):
|
116
116
|
# Skip when cuda.h is not available - generally this should indicate
|
117
117
|
# whether the CUDA includes are available or not
|
118
|
-
|
118
|
+
cuda_include_path = libs.get_cuda_include_dir()
|
119
|
+
cuda_h = os.path.join(cuda_include_path, 'cuda.h')
|
119
120
|
cuda_h_file = (os.path.exists(cuda_h) and os.path.isfile(cuda_h))
|
120
121
|
reason = 'CUDA include dir not available on this system'
|
121
122
|
return unittest.skipUnless(cuda_h_file, reason)(fn)
|
122
123
|
|
123
124
|
|
125
|
+
def skip_if_curand_kernel_missing(fn):
|
126
|
+
cuda_include_path = libs.get_cuda_include_dir()
|
127
|
+
curand_kernel_h = os.path.join(cuda_include_path, 'curand_kernel.h')
|
128
|
+
curand_kernel_h_file = (os.path.exists(curand_kernel_h) and
|
129
|
+
os.path.isfile(curand_kernel_h))
|
130
|
+
reason = 'curand_kernel.h not available on this system'
|
131
|
+
return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
|
132
|
+
|
133
|
+
|
124
134
|
def skip_if_mvc_enabled(reason):
|
125
135
|
"""Skip a test if Minor Version Compatibility is enabled"""
|
126
136
|
return unittest.skipIf(config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY,
|
@@ -72,6 +72,57 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
72
72
|
def f(x):
|
73
73
|
x[0] = 0
|
74
74
|
|
75
|
+
def test_issue_9888(self):
|
76
|
+
# Compiler created symbol should not be emitted in DILocalVariable
|
77
|
+
# See Numba Issue #9888 https://github.com/numba/numba/pull/9888
|
78
|
+
sig = (types.boolean,)
|
79
|
+
|
80
|
+
@cuda.jit(sig, debug=True, opt=False)
|
81
|
+
def f(cond):
|
82
|
+
if cond:
|
83
|
+
x = 1 # noqa: F841
|
84
|
+
else:
|
85
|
+
x = 0 # noqa: F841
|
86
|
+
|
87
|
+
llvm_ir = f.inspect_llvm(sig)
|
88
|
+
# A varible name starting with "bool" in the debug metadata
|
89
|
+
pat = r'!DILocalVariable\(.*name:\s+\"bool'
|
90
|
+
match = re.compile(pat).search(llvm_ir)
|
91
|
+
self.assertIsNone(match, msg=llvm_ir)
|
92
|
+
|
93
|
+
def test_bool_type(self):
|
94
|
+
sig = (types.int32, types.int32)
|
95
|
+
|
96
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
97
|
+
def f(x, y):
|
98
|
+
z = x == y # noqa: F841
|
99
|
+
|
100
|
+
llvm_ir = f.inspect_llvm(sig)
|
101
|
+
|
102
|
+
# extract the metadata node id from `type` field of DILocalVariable
|
103
|
+
pat = r'!DILocalVariable\(.*name:\s+"z".*type:\s+!(\d+)'
|
104
|
+
match = re.compile(pat).search(llvm_ir)
|
105
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
106
|
+
mdnode_id = match.group(1)
|
107
|
+
|
108
|
+
# verify the DIBasicType has correct encoding attribute DW_ATE_boolean
|
109
|
+
pat = rf'!{mdnode_id}\s+=\s+!DIBasicType\(.*DW_ATE_boolean'
|
110
|
+
match = re.compile(pat).search(llvm_ir)
|
111
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
112
|
+
|
113
|
+
def test_grid_group_type(self):
|
114
|
+
sig = (types.int32,)
|
115
|
+
|
116
|
+
@cuda.jit(sig, debug=True, opt=False)
|
117
|
+
def f(x):
|
118
|
+
grid = cuda.cg.this_grid() # noqa: F841
|
119
|
+
|
120
|
+
llvm_ir = f.inspect_llvm(sig)
|
121
|
+
|
122
|
+
pat = r'!DIBasicType\(.*DW_ATE_unsigned, name: "GridGroup", size: 64'
|
123
|
+
match = re.compile(pat).search(llvm_ir)
|
124
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
125
|
+
|
75
126
|
@unittest.skip("Wrappers no longer exist")
|
76
127
|
def test_wrapper_has_debuginfo(self):
|
77
128
|
sig = (types.int32[::1],)
|
@@ -217,6 +268,36 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
217
268
|
three_device_fns(kernel_debug=False, leaf_debug=True)
|
218
269
|
three_device_fns(kernel_debug=False, leaf_debug=False)
|
219
270
|
|
271
|
+
def test_kernel_args_types(self):
|
272
|
+
sig = (types.int32, types.int32)
|
273
|
+
|
274
|
+
@cuda.jit("void(int32, int32)", debug=True, opt=False)
|
275
|
+
def f(x, y):
|
276
|
+
z = x + y # noqa: F841
|
277
|
+
|
278
|
+
llvm_ir = f.inspect_llvm(sig)
|
279
|
+
|
280
|
+
# extract the metadata node id from `types` field of DISubroutineType
|
281
|
+
pat = r'!DISubroutineType\(types:\s+!(\d+)\)'
|
282
|
+
match = re.compile(pat).search(llvm_ir)
|
283
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
284
|
+
mdnode_id = match.group(1)
|
285
|
+
|
286
|
+
# extract the metadata node ids from the flexible node of types
|
287
|
+
pat = rf'!{mdnode_id}\s+=\s+!{{\s+!(\d+),\s+!(\d+)\s+}}'
|
288
|
+
match = re.compile(pat).search(llvm_ir)
|
289
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
290
|
+
mdnode_id1 = match.group(1)
|
291
|
+
mdnode_id2 = match.group(2)
|
292
|
+
|
293
|
+
# verify each of the two metadata nodes match expected type
|
294
|
+
pat = rf'!{mdnode_id1}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
|
295
|
+
match = re.compile(pat).search(llvm_ir)
|
296
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
297
|
+
pat = rf'!{mdnode_id2}\s+=\s+!DIBasicType\(.*DW_ATE_signed,\s+name:\s+"int32"' # noqa: E501
|
298
|
+
match = re.compile(pat).search(llvm_ir)
|
299
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
300
|
+
|
220
301
|
|
221
302
|
if __name__ == '__main__':
|
222
303
|
unittest.main()
|
@@ -1,11 +1,14 @@
|
|
1
1
|
import re
|
2
|
-
import
|
2
|
+
import cffi
|
3
3
|
|
4
4
|
import numpy as np
|
5
5
|
|
6
|
-
from numba.cuda.testing import
|
7
|
-
|
6
|
+
from numba.cuda.testing import (skip_if_curand_kernel_missing, skip_on_cudasim,
|
7
|
+
test_data_dir, unittest, CUDATestCase)
|
8
|
+
from numba import cuda, jit, float32, int32, types
|
8
9
|
from numba.core.errors import TypingError
|
10
|
+
from numba.tests.support import skip_unless_cffi
|
11
|
+
from types import ModuleType
|
9
12
|
|
10
13
|
|
11
14
|
class TestDeviceFunc(CUDATestCase):
|
@@ -92,7 +95,7 @@ class TestDeviceFunc(CUDATestCase):
|
|
92
95
|
def add(a, b):
|
93
96
|
return a + b
|
94
97
|
|
95
|
-
mymod =
|
98
|
+
mymod = ModuleType(name='mymod')
|
96
99
|
mymod.add = add
|
97
100
|
del add
|
98
101
|
|
@@ -192,31 +195,287 @@ class TestDeviceFunc(CUDATestCase):
|
|
192
195
|
|
193
196
|
self.assertEqual(0x04010203, x[0])
|
194
197
|
|
195
|
-
|
198
|
+
|
199
|
+
times2_cu = cuda.CUSource("""
|
200
|
+
extern "C" __device__
|
201
|
+
int times2(int *out, int a)
|
202
|
+
{
|
203
|
+
*out = a * 2;
|
204
|
+
return 0;
|
205
|
+
}
|
206
|
+
""")
|
207
|
+
|
208
|
+
times3_cu = cuda.CUSource("""
|
209
|
+
extern "C" __device__
|
210
|
+
int times3(int *out, int a)
|
211
|
+
{
|
212
|
+
*out = a * 3;
|
213
|
+
return 0;
|
214
|
+
}
|
215
|
+
""")
|
216
|
+
|
217
|
+
times4_cu = cuda.CUSource("""
|
218
|
+
extern "C" __device__
|
219
|
+
int times2(int *out, int a);
|
220
|
+
|
221
|
+
extern "C" __device__
|
222
|
+
int times4(int *out, int a)
|
223
|
+
{
|
224
|
+
int tmp;
|
225
|
+
times2(&tmp, a);
|
226
|
+
*out = tmp * 2;
|
227
|
+
return 0;
|
228
|
+
}
|
229
|
+
""")
|
230
|
+
|
231
|
+
jitlink_user_cu = cuda.CUSource("""
|
232
|
+
extern "C" __device__
|
233
|
+
int array_mutator(void *out, int *a);
|
234
|
+
|
235
|
+
extern "C" __device__
|
236
|
+
int use_array_mutator(void *out, int *a) {
|
237
|
+
array_mutator(out, a);
|
238
|
+
return 0;
|
239
|
+
}
|
240
|
+
""")
|
241
|
+
|
242
|
+
rng_cu = cuda.CUSource("""
|
243
|
+
#include <curand_kernel.h>
|
244
|
+
|
245
|
+
extern "C" __device__
|
246
|
+
int random_number(unsigned int *out, unsigned long long seed)
|
247
|
+
{
|
248
|
+
// Initialize state
|
249
|
+
curandStateXORWOW_t state;
|
250
|
+
unsigned long long sequence = 1;
|
251
|
+
unsigned long long offset = 0;
|
252
|
+
curand_init(seed, sequence, offset, &state);
|
253
|
+
|
254
|
+
// Generate one random number
|
255
|
+
*out = curand(&state);
|
256
|
+
|
257
|
+
// Report no exception
|
258
|
+
return 0;
|
259
|
+
}""")
|
260
|
+
|
261
|
+
|
262
|
+
@skip_on_cudasim('External functions unsupported in the simulator')
|
263
|
+
class TestDeclareDevice(CUDATestCase):
|
264
|
+
|
265
|
+
def check_api(self, decl):
|
196
266
|
self.assertEqual(decl.name, 'f1')
|
197
267
|
self.assertEqual(decl.sig.args, (float32[:],))
|
198
268
|
self.assertEqual(decl.sig.return_type, int32)
|
199
269
|
|
200
|
-
@skip_on_cudasim('cudasim does not check signatures')
|
201
270
|
def test_declare_device_signature(self):
|
202
271
|
f1 = cuda.declare_device('f1', int32(float32[:]))
|
203
|
-
self.
|
272
|
+
self.check_api(f1)
|
204
273
|
|
205
|
-
@skip_on_cudasim('cudasim does not check signatures')
|
206
274
|
def test_declare_device_string(self):
|
207
275
|
f1 = cuda.declare_device('f1', 'int32(float32[:])')
|
208
|
-
self.
|
276
|
+
self.check_api(f1)
|
209
277
|
|
210
|
-
@skip_on_cudasim('cudasim does not check signatures')
|
211
278
|
def test_bad_declare_device_tuple(self):
|
212
279
|
with self.assertRaisesRegex(TypeError, 'Return type'):
|
213
280
|
cuda.declare_device('f1', (float32[:],))
|
214
281
|
|
215
|
-
@skip_on_cudasim('cudasim does not check signatures')
|
216
282
|
def test_bad_declare_device_string(self):
|
217
283
|
with self.assertRaisesRegex(TypeError, 'Return type'):
|
218
284
|
cuda.declare_device('f1', '(float32[:],)')
|
219
285
|
|
286
|
+
def test_link_cu_source(self):
|
287
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
288
|
+
|
289
|
+
@cuda.jit
|
290
|
+
def kernel(r, x):
|
291
|
+
i = cuda.grid(1)
|
292
|
+
if i < len(r):
|
293
|
+
r[i] = times2(x[i])
|
294
|
+
|
295
|
+
x = np.arange(10, dtype=np.int32)
|
296
|
+
r = np.empty_like(x)
|
297
|
+
|
298
|
+
kernel[1, 32](r, x)
|
299
|
+
|
300
|
+
np.testing.assert_equal(r, x * 2)
|
301
|
+
|
302
|
+
def _test_link_multiple_sources(self, link_type):
|
303
|
+
link = link_type([times2_cu, times4_cu])
|
304
|
+
times4 = cuda.declare_device('times4', 'int32(int32)', link=link)
|
305
|
+
|
306
|
+
@cuda.jit
|
307
|
+
def kernel(r, x):
|
308
|
+
i = cuda.grid(1)
|
309
|
+
if i < len(r):
|
310
|
+
r[i] = times4(x[i])
|
311
|
+
|
312
|
+
x = np.arange(10, dtype=np.int32)
|
313
|
+
r = np.empty_like(x)
|
314
|
+
|
315
|
+
kernel[1, 32](r, x)
|
316
|
+
|
317
|
+
np.testing.assert_equal(r, x * 4)
|
318
|
+
|
319
|
+
def test_link_multiple_sources_set(self):
|
320
|
+
self._test_link_multiple_sources(set)
|
321
|
+
|
322
|
+
def test_link_multiple_sources_tuple(self):
|
323
|
+
self._test_link_multiple_sources(tuple)
|
324
|
+
|
325
|
+
def test_link_multiple_sources_list(self):
|
326
|
+
self._test_link_multiple_sources(list)
|
327
|
+
|
328
|
+
@skip_unless_cffi
|
329
|
+
def test_link_sources_in_memory_and_on_disk(self):
|
330
|
+
jitlink_cu = str(test_data_dir / "jitlink.cu")
|
331
|
+
link = [jitlink_cu, jitlink_user_cu]
|
332
|
+
sig = types.void(types.CPointer(types.int32))
|
333
|
+
ext_fn = cuda.declare_device("use_array_mutator", sig, link=link)
|
334
|
+
|
335
|
+
ffi = cffi.FFI()
|
336
|
+
|
337
|
+
@cuda.jit
|
338
|
+
def kernel(x):
|
339
|
+
ptr = ffi.from_buffer(x)
|
340
|
+
ext_fn(ptr)
|
341
|
+
|
342
|
+
x = np.arange(2, dtype=np.int32)
|
343
|
+
kernel[1, 1](x)
|
344
|
+
|
345
|
+
expected = np.ones(2, dtype=np.int32)
|
346
|
+
np.testing.assert_equal(x, expected)
|
347
|
+
|
348
|
+
@skip_if_curand_kernel_missing
|
349
|
+
def test_include_cuda_header(self):
|
350
|
+
sig = types.int32(types.uint64)
|
351
|
+
link = [rng_cu]
|
352
|
+
random_number = cuda.declare_device("random_number", sig, link=link)
|
353
|
+
|
354
|
+
@cuda.jit
|
355
|
+
def kernel(x, seed):
|
356
|
+
x[0] = random_number(seed)
|
357
|
+
|
358
|
+
x = np.zeros(1, dtype=np.uint32)
|
359
|
+
kernel[1, 1](x, 1)
|
360
|
+
np.testing.assert_equal(x[0], 323845807)
|
361
|
+
|
362
|
+
def test_declared_in_called_function(self):
|
363
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
364
|
+
|
365
|
+
@cuda.jit
|
366
|
+
def device_func(x):
|
367
|
+
return times2(x)
|
368
|
+
|
369
|
+
@cuda.jit
|
370
|
+
def kernel(r, x):
|
371
|
+
i = cuda.grid(1)
|
372
|
+
if i < len(r):
|
373
|
+
r[i] = device_func(x[i])
|
374
|
+
|
375
|
+
x = np.arange(10, dtype=np.int32)
|
376
|
+
r = np.empty_like(x)
|
377
|
+
|
378
|
+
kernel[1, 32](r, x)
|
379
|
+
|
380
|
+
np.testing.assert_equal(r, x * 2)
|
381
|
+
|
382
|
+
def test_declared_in_called_function_twice(self):
|
383
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
384
|
+
|
385
|
+
@cuda.jit
|
386
|
+
def device_func_1(x):
|
387
|
+
return times2(x)
|
388
|
+
|
389
|
+
@cuda.jit
|
390
|
+
def device_func_2(x):
|
391
|
+
return device_func_1(x)
|
392
|
+
|
393
|
+
@cuda.jit
|
394
|
+
def kernel(r, x):
|
395
|
+
i = cuda.grid(1)
|
396
|
+
if i < len(r):
|
397
|
+
r[i] = device_func_2(x[i])
|
398
|
+
|
399
|
+
x = np.arange(10, dtype=np.int32)
|
400
|
+
r = np.empty_like(x)
|
401
|
+
|
402
|
+
kernel[1, 32](r, x)
|
403
|
+
|
404
|
+
np.testing.assert_equal(r, x * 2)
|
405
|
+
|
406
|
+
def test_declared_in_called_function_two_calls(self):
|
407
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
408
|
+
|
409
|
+
@cuda.jit
|
410
|
+
def device_func(x):
|
411
|
+
return times2(x)
|
412
|
+
|
413
|
+
@cuda.jit
|
414
|
+
def kernel(r, x):
|
415
|
+
i = cuda.grid(1)
|
416
|
+
if i < len(r):
|
417
|
+
r[i] = device_func(x[i]) + device_func(x[i] + i)
|
418
|
+
|
419
|
+
x = np.arange(10, dtype=np.int32)
|
420
|
+
r = np.empty_like(x)
|
421
|
+
|
422
|
+
kernel[1, 32](r, x)
|
423
|
+
|
424
|
+
np.testing.assert_equal(r, x * 6)
|
425
|
+
|
426
|
+
def test_call_declared_function_twice(self):
|
427
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
428
|
+
|
429
|
+
@cuda.jit
|
430
|
+
def kernel(r, x):
|
431
|
+
i = cuda.grid(1)
|
432
|
+
if i < len(r):
|
433
|
+
r[i] = times2(x[i]) + times2(x[i] + i)
|
434
|
+
|
435
|
+
x = np.arange(10, dtype=np.int32)
|
436
|
+
r = np.empty_like(x)
|
437
|
+
|
438
|
+
kernel[1, 32](r, x)
|
439
|
+
|
440
|
+
np.testing.assert_equal(r, x * 6)
|
441
|
+
|
442
|
+
def test_declared_in_called_function_and_parent(self):
|
443
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
444
|
+
|
445
|
+
@cuda.jit
|
446
|
+
def device_func(x):
|
447
|
+
return times2(x)
|
448
|
+
|
449
|
+
@cuda.jit
|
450
|
+
def kernel(r, x):
|
451
|
+
i = cuda.grid(1)
|
452
|
+
if i < len(r):
|
453
|
+
r[i] = device_func(x[i]) + times2(x[i])
|
454
|
+
|
455
|
+
x = np.arange(10, dtype=np.int32)
|
456
|
+
r = np.empty_like(x)
|
457
|
+
|
458
|
+
kernel[1, 32](r, x)
|
459
|
+
|
460
|
+
np.testing.assert_equal(r, x * 4)
|
461
|
+
|
462
|
+
def test_call_two_different_declared_functions(self):
|
463
|
+
times2 = cuda.declare_device('times2', 'int32(int32)', link=times2_cu)
|
464
|
+
times3 = cuda.declare_device('times3', 'int32(int32)', link=times3_cu)
|
465
|
+
|
466
|
+
@cuda.jit
|
467
|
+
def kernel(r, x):
|
468
|
+
i = cuda.grid(1)
|
469
|
+
if i < len(r):
|
470
|
+
r[i] = times2(x[i]) + times3(x[i])
|
471
|
+
|
472
|
+
x = np.arange(10, dtype=np.int32)
|
473
|
+
r = np.empty_like(x)
|
474
|
+
|
475
|
+
kernel[1, 32](r, x)
|
476
|
+
|
477
|
+
np.testing.assert_equal(r, x * 5)
|
478
|
+
|
220
479
|
|
221
480
|
if __name__ == '__main__':
|
222
481
|
unittest.main()
|
@@ -15,16 +15,18 @@ class TestFFI(CUDATestCase):
|
|
15
15
|
import numpy as np
|
16
16
|
import os
|
17
17
|
|
18
|
-
# Declaration of the foreign function
|
19
|
-
mul = cuda.declare_device('mul_f32_f32', 'float32(float32, float32)')
|
20
|
-
|
21
18
|
# Path to the source containing the foreign function
|
22
19
|
# (here assumed to be in a subdirectory called "ffi")
|
23
20
|
basedir = os.path.dirname(os.path.abspath(__file__))
|
24
21
|
functions_cu = os.path.join(basedir, 'ffi', 'functions.cu')
|
25
22
|
|
26
|
-
#
|
27
|
-
|
23
|
+
# Declaration of the foreign function
|
24
|
+
mul = cuda.declare_device('mul_f32_f32', 'float32(float32, float32)',
|
25
|
+
link=functions_cu)
|
26
|
+
|
27
|
+
# A kernel that calls mul; functions.cu is linked automatically due to
|
28
|
+
# the call to mul.
|
29
|
+
@cuda.jit
|
28
30
|
def multiply_vectors(r, x, y):
|
29
31
|
i = cuda.grid(1)
|
30
32
|
|
@@ -54,14 +56,15 @@ class TestFFI(CUDATestCase):
|
|
54
56
|
|
55
57
|
# magictoken.ex_from_buffer_decl.begin
|
56
58
|
signature = 'float32(CPointer(float32), int32)'
|
57
|
-
sum_reduce = cuda.declare_device('sum_reduce', signature
|
59
|
+
sum_reduce = cuda.declare_device('sum_reduce', signature,
|
60
|
+
link=functions_cu)
|
58
61
|
# magictoken.ex_from_buffer_decl.end
|
59
62
|
|
60
63
|
# magictoken.ex_from_buffer_kernel.begin
|
61
64
|
import cffi
|
62
65
|
ffi = cffi.FFI()
|
63
66
|
|
64
|
-
@cuda.jit
|
67
|
+
@cuda.jit
|
65
68
|
def reduction_caller(result, array):
|
66
69
|
array_ptr = ffi.from_buffer(array)
|
67
70
|
result[()] = sum_reduce(array_ptr, len(array))
|
@@ -171,7 +171,10 @@ class TestNrtStatistics(CUDATestCase):
|
|
171
171
|
arr = cuda_arange(5 * tmp[0]) # noqa: F841
|
172
172
|
return None
|
173
173
|
|
174
|
-
with
|
174
|
+
with (
|
175
|
+
override_config('CUDA_ENABLE_NRT', True),
|
176
|
+
override_config('CUDA_NRT_STATS', True)
|
177
|
+
):
|
175
178
|
# Switch on stats
|
176
179
|
rtsys.memsys_enable_stats()
|
177
180
|
# check the stats are on
|
@@ -18,7 +18,10 @@ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
|
18
18
|
super(TestNrtRefCt, self).tearDown()
|
19
19
|
|
20
20
|
def run(self, result=None):
|
21
|
-
with
|
21
|
+
with (
|
22
|
+
override_config("CUDA_ENABLE_NRT", True),
|
23
|
+
override_config('CUDA_NRT_STATS', True)
|
24
|
+
):
|
22
25
|
super(TestNrtRefCt, self).run(result)
|
23
26
|
|
24
27
|
def test_no_return(self):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: numba-cuda
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: CUDA target for Numba
|
5
5
|
Author: Anaconda Inc., NVIDIA Corporation
|
6
6
|
License: BSD 2-clause
|
@@ -27,7 +27,19 @@ tracker](https://github.com/NVIDIA/numba-cuda/issues).
|
|
27
27
|
To raise questions or initiate discussions, please use the [Numba Discourse
|
28
28
|
forum](https://numba.discourse.group).
|
29
29
|
|
30
|
-
##
|
30
|
+
## Installation with pip
|
31
|
+
|
32
|
+
```shell
|
33
|
+
pip install numba-cuda
|
34
|
+
```
|
35
|
+
|
36
|
+
## Installation with Conda
|
37
|
+
|
38
|
+
```shell
|
39
|
+
conda install -c conda-forge numba-cuda
|
40
|
+
```
|
41
|
+
|
42
|
+
## Installation from source
|
31
43
|
|
32
44
|
Install as an editable install:
|
33
45
|
|
@@ -53,3 +65,9 @@ which will show a path like:
|
|
53
65
|
```
|
54
66
|
<path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
|
55
67
|
```
|
68
|
+
|
69
|
+
## Contributing Guide
|
70
|
+
|
71
|
+
Review the
|
72
|
+
[CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
|
73
|
+
file for information on how to contribute code and issues to the project.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
_numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
|
2
2
|
_numba_cuda_redirector.py,sha256=QKJmYICSQvjvph0Zw9OW015MsuKxIF28GPFjR35AXLM,2681
|
3
|
-
numba_cuda/VERSION,sha256=
|
3
|
+
numba_cuda/VERSION,sha256=l6XW5UCmEg0Jw53bZn4Ojiusf8wv_vgTuC4I_WA2W84,6
|
4
4
|
numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
|
5
5
|
numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
|
6
6
|
numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
|
@@ -9,19 +9,20 @@ numba_cuda/numba/cuda/api_util.py,sha256=aQfUV2-4RM_oGVvckMjbMr5e3effOQNX04v1T0O
|
|
9
9
|
numba_cuda/numba/cuda/args.py,sha256=HloHkw_PQal2DT-I70Xf_XbnGObS1jiUgcRrQ85Gq28,1978
|
10
10
|
numba_cuda/numba/cuda/cg.py,sha256=9V1uZqyGOJX1aFd9c6GAPbLSqq83lE8LoP-vxxrKENY,1490
|
11
11
|
numba_cuda/numba/cuda/codegen.py,sha256=ghdYBKZ3Mzk2UlLE64HkrAjb60PN9fibSNkWFRQuj4M,13184
|
12
|
-
numba_cuda/numba/cuda/compiler.py,sha256=
|
12
|
+
numba_cuda/numba/cuda/compiler.py,sha256=aWP_aunOOw8RZsTKf-S3YdH5MDkY6kLN5Xr5B2XgOfk,24214
|
13
13
|
numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=iv84_F6Q9kFjV_kclrQz1msh6Dud8mI3qNkswTid7Qc,953
|
14
14
|
numba_cuda/numba/cuda/cuda_fp16.h,sha256=1IC0mdNdkvKbvAe0-f4uYVS7WFrVqOyI1nRUbBiqr6A,126844
|
15
15
|
numba_cuda/numba/cuda/cuda_fp16.hpp,sha256=vJ7NUr2X2tKhAP7ojydAiCoOjVO6n4QGoXD6m9Srrlw,89130
|
16
16
|
numba_cuda/numba/cuda/cuda_paths.py,sha256=C0gA72QLWUMfvXkFpw1WqqaFqfsQ7HM72hQVXG0A7RU,10023
|
17
|
-
numba_cuda/numba/cuda/cudadecl.py,sha256=
|
17
|
+
numba_cuda/numba/cuda/cudadecl.py,sha256=6h_Je6cXmfr4VjBowkr-OOGlsXei-QqGlcjU4Yv-m-4,23438
|
18
18
|
numba_cuda/numba/cuda/cudaimpl.py,sha256=0oHjDwBC4JmfpwS1Fsn1bm5YWVru5vZvvnO414P4TS0,38840
|
19
19
|
numba_cuda/numba/cuda/cudamath.py,sha256=EFNtdzEytAZuwijdRoFGzVKCeal76UzzaNy7wUFQx8I,3978
|
20
|
-
numba_cuda/numba/cuda/
|
20
|
+
numba_cuda/numba/cuda/debuginfo.py,sha256=lMIs7UAOfkqUvD9sx-nNEY8qP9DhWF9X38xnW3yo_Qc,1433
|
21
|
+
numba_cuda/numba/cuda/decorators.py,sha256=MqmbEXVVgIV1G_feYtccKBRTDL0VALWf0LjbrVfJo4s,8041
|
21
22
|
numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
|
22
23
|
numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
|
23
24
|
numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
|
24
|
-
numba_cuda/numba/cuda/dispatcher.py,sha256=
|
25
|
+
numba_cuda/numba/cuda/dispatcher.py,sha256=j2nAjlqNAIAoQVCQ4ZQD--hQDsnFLXedlvaXdCMNKEc,44354
|
25
26
|
numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
|
26
27
|
numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
|
27
28
|
numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
|
@@ -39,8 +40,8 @@ numba_cuda/numba/cuda/random.py,sha256=khX8iDdde_RTUPWhAqrxZacHRQAorFr7BokPuxRWz
|
|
39
40
|
numba_cuda/numba/cuda/reshape_funcs.cu,sha256=H5UAa-VAvoxW9SQwJO88ZrDXC64nWALW3Ch4cHAAqO4,4325
|
40
41
|
numba_cuda/numba/cuda/simulator_init.py,sha256=W_bPRtmPGOQVuiprbgt7ENnnnELv_LPCeLDIsfsvFZ8,460
|
41
42
|
numba_cuda/numba/cuda/stubs.py,sha256=W3tozv4ganMnfbdFqyPjgQXYeX8GQhwx_xXgv8jk6iM,22270
|
42
|
-
numba_cuda/numba/cuda/target.py,sha256=
|
43
|
-
numba_cuda/numba/cuda/testing.py,sha256=
|
43
|
+
numba_cuda/numba/cuda/target.py,sha256=MWpdHs2K17Lus4e318FNbR533q24MhovGS6Q1ob9x_4,11354
|
44
|
+
numba_cuda/numba/cuda/testing.py,sha256=tG1FBm_gqW4esDxCaecMvCRKvwYEg7Yu2Q60ARNnes0,6873
|
44
45
|
numba_cuda/numba/cuda/types.py,sha256=WVfjcly_VUpG9FfKueiEPzZm2NV8Hg0XAFg3bNzPdVc,1314
|
45
46
|
numba_cuda/numba/cuda/ufuncs.py,sha256=txw27IxG80W1Yo7e-XwL2AMcQo0fMnxMjBIMy-n5pCo,23317
|
46
47
|
numba_cuda/numba/cuda/utils.py,sha256=JId22EI3KkQosW6Dafdaw43qU0xXXO_4JOENLap8klU,630
|
@@ -55,11 +56,11 @@ numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWx
|
|
55
56
|
numba_cuda/numba/cuda/cudadrv/enums.py,sha256=Wy5dzukTk4TnWCowg_PLceET_v2xEyiWLu9TyH8pXr8,23742
|
56
57
|
numba_cuda/numba/cuda/cudadrv/error.py,sha256=zEIryW6aIy8GG4ypmTliB6RgY4Gy2n8ckz7I6W99LUM,524
|
57
58
|
numba_cuda/numba/cuda/cudadrv/libs.py,sha256=Gk9zQ1CKcsZsWl-_9QneXeP9VH5q5R1I3Cx043UOytk,7240
|
58
|
-
numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=
|
59
|
+
numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=bWBvnndrzWu24SXm7cilCwNFXShJgNmbMfj1Wzemito,1456
|
59
60
|
numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
|
60
61
|
numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
|
61
62
|
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=XM9_Vllv7HzH5wZIR2lwFictyX68XDtNbyLkXlL6NTI,11003
|
62
|
-
numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=
|
63
|
+
numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=cAoQmZ0bO8i3wPTQq5D0UeMtfnXdGebqYpU4W0kUIEY,24237
|
63
64
|
numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=WdeUoWzsYNYodx8kMRLVIjnNs0QzwpCihd2Q0AaqItE,226
|
64
65
|
numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=Tj9ACrzQqNmDSO6xfpzw12EsQknSywQ-ZGuWMbDdHnQ,4255
|
65
66
|
numba_cuda/numba/cuda/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -145,8 +146,8 @@ numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=73FCQbNaA
|
|
145
146
|
numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=y7cNQZOZJo5Sv16ql3E5QaRksw-U3RkXss9YDcNeiTk,2137
|
146
147
|
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=2in1Cq8y9zAFoka7H72wF1D0awEd3n7bv56sUPgoNAQ,3508
|
147
148
|
numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=3MYNiMe75rgBF1T0vsJ7r-nkW5jPvov_tDms9KXo2UU,3449
|
148
|
-
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=
|
149
|
-
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=
|
149
|
+
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=jI43jMbPS9Rbr3YI2mZBrDwH9MGjmyVlczv7QxxPoAs,10993
|
150
|
+
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=eDVymTQXTzW0WeAgTMDKYtOi1YAM310IUxGp3Y1ICjs,13162
|
150
151
|
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=oX-l_L4H8rME1IolwhAyordSGJ152nnuqGAFdWjfgas,26587
|
151
152
|
numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=0GWiwvZ1FTzSl1FfMxttkWaWrowASfXrSDT8XAR4ZHw,3560
|
152
153
|
numba_cuda/numba/cuda/tests/cudapy/test_errors.py,sha256=jwHbNb2Ro5pbGOPFetmUhI-vG4s36OKCqMJ-lgWxHMY,2620
|
@@ -219,7 +220,7 @@ numba_cuda/numba/cuda/tests/data/warn.cu,sha256=6L-qsXJIxAr_n3hVMAz_EZ5j0skcJAfg
|
|
219
220
|
numba_cuda/numba/cuda/tests/doc_examples/__init__.py,sha256=GdfSq6pRVSOQwmgNi7ZFQ5l0yg4-2gNar_0Rz0buUpM,157
|
220
221
|
numba_cuda/numba/cuda/tests/doc_examples/test_cg.py,sha256=9UQAez1jp3vQ0BIfoRCnGJGP17nznNcon-XFR4grqzQ,2905
|
221
222
|
numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py,sha256=DRzvoE2iCaISJb2lkshBkJyYBEfdpqZLRXG_N9XRaFk,2305
|
222
|
-
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=
|
223
|
+
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=PoHbrTMFk8rewm7XH_8Vv1733sI-YHOzxoBI4nFhuBA,2773
|
223
224
|
numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py,sha256=UH15R0DbMA4iHLmoZ0GtcttGCNctOUif-u2448JMmRo,5177
|
224
225
|
numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py,sha256=hS-X_T7x3-BcBanazmnmGxJE_o1A9b9f_VGk0YlJP4o,6135
|
225
226
|
numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py,sha256=_0snszis_UE7LxU5lw9ReNF19Dh5iV0yRy18mUWNd1c,3491
|
@@ -238,14 +239,14 @@ numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA
|
|
238
239
|
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
|
239
240
|
numba_cuda/numba/cuda/tests/nrt/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
|
240
241
|
numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=Cx2DGhm2bJheShP2Ja1w9YLlRTeAMM7u1UYHsPnTzA8,4552
|
241
|
-
numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=
|
242
|
-
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=
|
242
|
+
numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=wByXeagVoxsAu_pmfuYQ7vmeJt82h4VXwCBsDYQfsps,7727
|
243
|
+
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=SnVvTis8YyaqsElRaGQ-34dnWgGavvc2Ovm2xZ_PD3Q,3240
|
243
244
|
numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq6pJwHEwmKVmJOJxPBtsMTbnuzqYkik,2679
|
244
245
|
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=V0raLZLGSiWbE_K-JluI0CnmNkXbhlMVj-TH7P1OV8E,5014
|
245
246
|
numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
|
246
247
|
numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
|
247
|
-
numba_cuda-0.
|
248
|
-
numba_cuda-0.
|
249
|
-
numba_cuda-0.
|
250
|
-
numba_cuda-0.
|
251
|
-
numba_cuda-0.
|
248
|
+
numba_cuda-0.6.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
|
249
|
+
numba_cuda-0.6.0.dist-info/METADATA,sha256=iNU56EXHsnAcAcwgNXglPh6H47Quz31_-6r9RevpJ_Q,1836
|
250
|
+
numba_cuda-0.6.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
251
|
+
numba_cuda-0.6.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
|
252
|
+
numba_cuda-0.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|