numba-cuda 0.11.0__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/{cuda_bf16.py → _internal/cuda_bf16.py} +1 -1
- numba_cuda/numba/cuda/api.py +13 -0
- numba_cuda/numba/cuda/bf16.py +112 -0
- numba_cuda/numba/cuda/cg.py +2 -0
- numba_cuda/numba/cuda/codegen.py +8 -0
- numba_cuda/numba/cuda/compiler.py +2 -1
- numba_cuda/numba/cuda/cudadecl.py +6 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +4 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +23 -1
- numba_cuda/numba/cuda/debuginfo.py +27 -0
- numba_cuda/numba/cuda/decorators.py +5 -2
- numba_cuda/numba/cuda/dispatcher.py +2 -2
- numba_cuda/numba/cuda/target.py +10 -1
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +0 -12
- numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +55 -0
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +49 -23
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +34 -51
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +34 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +17 -0
- numba_cuda/numba/cuda/tests/data/cta_barrier.cu +23 -0
- numba_cuda/numba/cuda/tests/data/include/add.cuh +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +3 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +9 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +48 -1
- {numba_cuda-0.11.0.dist-info → numba_cuda-0.12.1.dist-info}/METADATA +1 -1
- {numba_cuda-0.11.0.dist-info → numba_cuda-0.12.1.dist-info}/RECORD +31 -24
- {numba_cuda-0.11.0.dist-info → numba_cuda-0.12.1.dist-info}/WHEEL +1 -1
- {numba_cuda-0.11.0.dist-info → numba_cuda-0.12.1.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.11.0.dist-info → numba_cuda-0.12.1.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.12.1
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# Generator Information:
|
3
3
|
# Ast_canopy version: 0.3.0
|
4
4
|
# Numbast version: 0.3.0
|
5
|
-
# Generation command: /home/wangm/numbast/numbast/src/numbast/__main__.py --cfg-path configs/cuda_bf16.yml --output-dir numba_cuda/numba/cuda/
|
5
|
+
# Generation command: /home/wangm/numbast/numbast/src/numbast/__main__.py --cfg-path configs/cuda_bf16.yml --output-dir numba_cuda/numba/cuda/_internal
|
6
6
|
# Static binding generator parameters: {'cfg_path': 'configs/cuda_bf16.yml', 'output_dir': 'numba_cuda/numba/cuda/', 'entry_point': None, 'retain': None, 'types': None, 'datamodels': None, 'compute_capability': None, 'run_ruff_format': True}
|
7
7
|
# Config file path (relative to the path of the generated binding): ../../../../configs/cuda_bf16.yml
|
8
8
|
# Cudatoolkit version: (12, 8)
|
numba_cuda/numba/cuda/api.py
CHANGED
@@ -10,6 +10,7 @@ import numpy as np
|
|
10
10
|
from .cudadrv import devicearray, devices, driver
|
11
11
|
from numba.core import config
|
12
12
|
from numba.cuda.api_util import prepare_shape_strides_dtype
|
13
|
+
from numba.cuda.cudadrv.runtime import get_version
|
13
14
|
|
14
15
|
# NDarray device helper
|
15
16
|
|
@@ -95,6 +96,18 @@ def is_float16_supported():
|
|
95
96
|
return True
|
96
97
|
|
97
98
|
|
99
|
+
def is_bfloat16_supported():
|
100
|
+
"""Whether bfloat16 are supported.
|
101
|
+
|
102
|
+
bfloat16 are only supported on devices with compute capability >= 8.0 and cuda version >= 12.0
|
103
|
+
"""
|
104
|
+
cuda_version = get_version()
|
105
|
+
return current_context().device.supports_bfloat16 and cuda_version >= (
|
106
|
+
12,
|
107
|
+
0,
|
108
|
+
)
|
109
|
+
|
110
|
+
|
98
111
|
@require_context
|
99
112
|
def to_device(obj, stream=0, copy=True, to=None):
|
100
113
|
"""to_device(obj, stream=0, copy=True, to=None)
|
@@ -0,0 +1,112 @@
|
|
1
|
+
from numba.cuda._internal.cuda_bf16 import (
|
2
|
+
_type_class___nv_bfloat16,
|
3
|
+
nv_bfloat16 as bfloat16,
|
4
|
+
htrunc,
|
5
|
+
hceil,
|
6
|
+
hfloor,
|
7
|
+
hrint,
|
8
|
+
hsqrt,
|
9
|
+
hrsqrt,
|
10
|
+
hrcp,
|
11
|
+
hlog,
|
12
|
+
hlog2,
|
13
|
+
hlog10,
|
14
|
+
hcos,
|
15
|
+
hsin,
|
16
|
+
hexp,
|
17
|
+
hexp2,
|
18
|
+
hexp10,
|
19
|
+
htanh,
|
20
|
+
htanh_approx,
|
21
|
+
)
|
22
|
+
from numba.extending import overload
|
23
|
+
|
24
|
+
import math
|
25
|
+
|
26
|
+
|
27
|
+
def _make_unary(a, func):
|
28
|
+
if isinstance(a, _type_class___nv_bfloat16):
|
29
|
+
return lambda a: func(a)
|
30
|
+
|
31
|
+
|
32
|
+
# Bind low++ bindings to math APIs
|
33
|
+
@overload(math.trunc, target="cuda")
|
34
|
+
def trunc_ol(a):
|
35
|
+
return _make_unary(a, htrunc)
|
36
|
+
|
37
|
+
|
38
|
+
@overload(math.ceil, target="cuda")
|
39
|
+
def ceil_ol(a):
|
40
|
+
return _make_unary(a, hceil)
|
41
|
+
|
42
|
+
|
43
|
+
@overload(math.floor, target="cuda")
|
44
|
+
def floor_ol(a):
|
45
|
+
return _make_unary(a, hfloor)
|
46
|
+
|
47
|
+
|
48
|
+
@overload(math.sqrt, target="cuda")
|
49
|
+
def sqrt_ol(a):
|
50
|
+
return _make_unary(a, hsqrt)
|
51
|
+
|
52
|
+
|
53
|
+
@overload(math.log, target="cuda")
|
54
|
+
def log_ol(a):
|
55
|
+
return _make_unary(a, hlog)
|
56
|
+
|
57
|
+
|
58
|
+
@overload(math.log10, target="cuda")
|
59
|
+
def log10_ol(a):
|
60
|
+
return _make_unary(a, hlog10)
|
61
|
+
|
62
|
+
|
63
|
+
@overload(math.cos, target="cuda")
|
64
|
+
def cos_ol(a):
|
65
|
+
return _make_unary(a, hcos)
|
66
|
+
|
67
|
+
|
68
|
+
@overload(math.sin, target="cuda")
|
69
|
+
def sin_ol(a):
|
70
|
+
return _make_unary(a, hsin)
|
71
|
+
|
72
|
+
|
73
|
+
@overload(math.tanh, target="cuda")
|
74
|
+
def tanh_ol(a):
|
75
|
+
return _make_unary(a, htanh)
|
76
|
+
|
77
|
+
|
78
|
+
@overload(math.exp, target="cuda")
|
79
|
+
def exp_ol(a):
|
80
|
+
return _make_unary(a, hexp)
|
81
|
+
|
82
|
+
|
83
|
+
try:
|
84
|
+
from math import exp2
|
85
|
+
|
86
|
+
@overload(exp2, target="cuda")
|
87
|
+
def exp2_ol(a):
|
88
|
+
return _make_unary(a, hexp2)
|
89
|
+
except ImportError:
|
90
|
+
pass
|
91
|
+
|
92
|
+
|
93
|
+
__all__ = [
|
94
|
+
"bfloat16",
|
95
|
+
"htrunc",
|
96
|
+
"hceil",
|
97
|
+
"hfloor",
|
98
|
+
"hrint",
|
99
|
+
"hsqrt",
|
100
|
+
"hrsqrt",
|
101
|
+
"hrcp",
|
102
|
+
"hlog",
|
103
|
+
"hlog2",
|
104
|
+
"hlog10",
|
105
|
+
"hcos",
|
106
|
+
"hsin",
|
107
|
+
"htanh",
|
108
|
+
"htanh_approx",
|
109
|
+
"hexp",
|
110
|
+
"hexp2",
|
111
|
+
"hexp10",
|
112
|
+
]
|
numba_cuda/numba/cuda/cg.py
CHANGED
@@ -23,6 +23,7 @@ def _this_grid(typingctx):
|
|
23
23
|
sig = signature(grid_group)
|
24
24
|
|
25
25
|
def codegen(context, builder, sig, args):
|
26
|
+
context.active_code_library.use_cooperative = True
|
26
27
|
one = context.get_constant(types.int32, 1)
|
27
28
|
mod = builder.module
|
28
29
|
return builder.call(
|
@@ -45,6 +46,7 @@ def _grid_group_sync(typingctx, group):
|
|
45
46
|
sig = signature(types.int32, group)
|
46
47
|
|
47
48
|
def codegen(context, builder, sig, args):
|
49
|
+
context.active_code_library.use_cooperative = True
|
48
50
|
flags = context.get_constant(types.int32, 0)
|
49
51
|
mod = builder.module
|
50
52
|
return builder.call(
|
numba_cuda/numba/cuda/codegen.py
CHANGED
@@ -70,6 +70,8 @@ class ExternalCodeLibrary(CodeLibrary):
|
|
70
70
|
self._setup_functions = []
|
71
71
|
self._teardown_functions = []
|
72
72
|
|
73
|
+
self.use_cooperative = False
|
74
|
+
|
73
75
|
@property
|
74
76
|
def modules(self):
|
75
77
|
# There are no LLVM IR modules in an ExternalCodeLibrary
|
@@ -181,6 +183,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
181
183
|
self._nvvm_options = nvvm_options
|
182
184
|
self._entry_name = entry_name
|
183
185
|
|
186
|
+
self.use_cooperative = False
|
187
|
+
|
184
188
|
@property
|
185
189
|
def llvm_strs(self):
|
186
190
|
if self._llvm_strs is None:
|
@@ -352,6 +356,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
352
356
|
self._linking_files.update(library._linking_files)
|
353
357
|
self._setup_functions.extend(library._setup_functions)
|
354
358
|
self._teardown_functions.extend(library._teardown_functions)
|
359
|
+
self.use_cooperative |= library.use_cooperative
|
355
360
|
|
356
361
|
def add_linking_file(self, path_or_obj):
|
357
362
|
if isinstance(path_or_obj, LinkableCode):
|
@@ -442,6 +447,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
442
447
|
nvvm_options=self._nvvm_options,
|
443
448
|
needs_cudadevrt=self.needs_cudadevrt,
|
444
449
|
nrt=nrt,
|
450
|
+
use_cooperative=self.use_cooperative,
|
445
451
|
)
|
446
452
|
|
447
453
|
@classmethod
|
@@ -458,6 +464,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
458
464
|
nvvm_options,
|
459
465
|
needs_cudadevrt,
|
460
466
|
nrt,
|
467
|
+
use_cooperative,
|
461
468
|
):
|
462
469
|
"""
|
463
470
|
Rebuild an instance.
|
@@ -472,6 +479,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
|
|
472
479
|
instance._max_registers = max_registers
|
473
480
|
instance._nvvm_options = nvvm_options
|
474
481
|
instance.needs_cudadevrt = needs_cudadevrt
|
482
|
+
instance.use_cooperative = use_cooperative
|
475
483
|
|
476
484
|
instance._finalized = True
|
477
485
|
if nrt:
|
@@ -797,7 +797,7 @@ def compile_ptx_for_current_device(
|
|
797
797
|
)
|
798
798
|
|
799
799
|
|
800
|
-
def declare_device_function(name, restype, argtypes, link):
|
800
|
+
def declare_device_function(name, restype, argtypes, link, use_cooperative):
|
801
801
|
from .descriptor import cuda_target
|
802
802
|
|
803
803
|
typingctx = cuda_target.typing_context
|
@@ -816,6 +816,7 @@ def declare_device_function(name, restype, argtypes, link):
|
|
816
816
|
lib = ExternalCodeLibrary(f"{name}_externals", targetctx.codegen())
|
817
817
|
for file in link:
|
818
818
|
lib.add_linking_file(file)
|
819
|
+
lib.use_cooperative = use_cooperative
|
819
820
|
|
820
821
|
# ExternalFunctionDescriptor provides a lowering implementation for calling
|
821
822
|
# external functions
|
@@ -423,7 +423,11 @@ _genfp16_binary_operator(operator.itruediv)
|
|
423
423
|
def _resolve_wrapped_unary(fname):
|
424
424
|
link = tuple()
|
425
425
|
decl = declare_device_function(
|
426
|
-
f"__numba_wrapper_{fname}",
|
426
|
+
f"__numba_wrapper_{fname}",
|
427
|
+
types.float16,
|
428
|
+
(types.float16,),
|
429
|
+
link,
|
430
|
+
use_cooperative=False,
|
427
431
|
)
|
428
432
|
return types.Function(decl)
|
429
433
|
|
@@ -438,6 +442,7 @@ def _resolve_wrapped_binary(fname):
|
|
438
442
|
types.float16,
|
439
443
|
),
|
440
444
|
link,
|
445
|
+
use_cooperative=False,
|
441
446
|
)
|
442
447
|
return types.Function(decl)
|
443
448
|
|
@@ -714,6 +714,10 @@ class Device(object):
|
|
714
714
|
def supports_float16(self):
|
715
715
|
return self.compute_capability >= (5, 3)
|
716
716
|
|
717
|
+
@property
|
718
|
+
def supports_bfloat16(self):
|
719
|
+
return self.compute_capability >= (8, 0)
|
720
|
+
|
717
721
|
|
718
722
|
def met_requirement_for_device(device):
|
719
723
|
if device.compute_capability < MIN_REQUIRED_CC:
|
@@ -6,13 +6,21 @@ from numba.cuda.cudadrv.error import (
|
|
6
6
|
NvrtcCompilationError,
|
7
7
|
NvrtcSupportError,
|
8
8
|
)
|
9
|
+
from numba import config
|
9
10
|
from numba.cuda.cuda_paths import get_cuda_paths
|
11
|
+
from numba.cuda.utils import _readenv
|
10
12
|
|
11
13
|
import functools
|
12
14
|
import os
|
13
15
|
import threading
|
14
16
|
import warnings
|
15
17
|
|
18
|
+
NVRTC_EXTRA_SEARCH_PATHS = _readenv(
|
19
|
+
"NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", str, ""
|
20
|
+
) or getattr(config, "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", "")
|
21
|
+
if not hasattr(config, "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
|
22
|
+
config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = NVRTC_EXTRA_SEARCH_PATHS
|
23
|
+
|
16
24
|
# Opaque handle for compilation unit
|
17
25
|
nvrtc_program = c_void_p
|
18
26
|
|
@@ -383,10 +391,24 @@ def compile(src, name, cc, ltoir=False):
|
|
383
391
|
else:
|
384
392
|
numba_include = f"-I{os.path.join(numba_cuda_path, 'include', '12')}"
|
385
393
|
|
394
|
+
if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
|
395
|
+
extra_search_paths = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
|
396
|
+
extra_includes = [f"-I{p}" for p in extra_search_paths]
|
397
|
+
else:
|
398
|
+
extra_includes = []
|
399
|
+
|
386
400
|
nrt_path = os.path.join(numba_cuda_path, "runtime")
|
387
401
|
nrt_include = f"-I{nrt_path}"
|
388
402
|
|
389
|
-
options = [
|
403
|
+
options = [
|
404
|
+
arch,
|
405
|
+
numba_include,
|
406
|
+
*cuda_include,
|
407
|
+
nrt_include,
|
408
|
+
*extra_includes,
|
409
|
+
"-rdc",
|
410
|
+
"true",
|
411
|
+
]
|
390
412
|
|
391
413
|
if ltoir:
|
392
414
|
options.append("-dlto")
|
@@ -59,6 +59,33 @@ class CUDADIBuilder(DIBuilder):
|
|
59
59
|
# For other cases, use upstream Numba implementation
|
60
60
|
return super()._var_type(lltype, size, datamodel=datamodel)
|
61
61
|
|
62
|
+
def _di_subroutine_type(self, line, function, argmap):
|
63
|
+
# The function call conv needs encoding.
|
64
|
+
llfunc = function
|
65
|
+
md = []
|
66
|
+
|
67
|
+
# Create metadata type for return value
|
68
|
+
if len(llfunc.args) > 0:
|
69
|
+
lltype = llfunc.args[0].type
|
70
|
+
size = self.cgctx.get_abi_sizeof(lltype)
|
71
|
+
mdtype = self._var_type(lltype, size, datamodel=None)
|
72
|
+
md.append(mdtype)
|
73
|
+
|
74
|
+
# Create metadata type for arguments
|
75
|
+
for idx, (name, nbtype) in enumerate(argmap.items()):
|
76
|
+
datamodel = self.cgctx.data_model_manager[nbtype]
|
77
|
+
lltype = self.cgctx.get_value_type(nbtype)
|
78
|
+
size = self.cgctx.get_abi_sizeof(lltype)
|
79
|
+
mdtype = self._var_type(lltype, size, datamodel=datamodel)
|
80
|
+
md.append(mdtype)
|
81
|
+
|
82
|
+
return self.module.add_debug_info(
|
83
|
+
"DISubroutineType",
|
84
|
+
{
|
85
|
+
"types": self.module.add_metadata(md),
|
86
|
+
},
|
87
|
+
)
|
88
|
+
|
62
89
|
def mark_variable(
|
63
90
|
self,
|
64
91
|
builder,
|
@@ -229,7 +229,7 @@ def jit(
|
|
229
229
|
return disp
|
230
230
|
|
231
231
|
|
232
|
-
def declare_device(name, sig, link=None):
|
232
|
+
def declare_device(name, sig, link=None, use_cooperative=False):
|
233
233
|
"""
|
234
234
|
Declare the signature of a foreign function. Returns a descriptor that can
|
235
235
|
be used to call the function from a Python kernel.
|
@@ -238,6 +238,7 @@ def declare_device(name, sig, link=None):
|
|
238
238
|
:type name: str
|
239
239
|
:param sig: The Numba signature of the function.
|
240
240
|
:param link: External code to link when calling the function.
|
241
|
+
:param use_cooperative: External code requires cooperative launch.
|
241
242
|
"""
|
242
243
|
if link is None:
|
243
244
|
link = tuple()
|
@@ -250,6 +251,8 @@ def declare_device(name, sig, link=None):
|
|
250
251
|
msg = "Return type must be provided for device declarations"
|
251
252
|
raise TypeError(msg)
|
252
253
|
|
253
|
-
template = declare_device_function(
|
254
|
+
template = declare_device_function(
|
255
|
+
name, restype, argtypes, link, use_cooperative
|
256
|
+
)
|
254
257
|
|
255
258
|
return template.key
|
@@ -151,8 +151,8 @@ class _Kernel(serialize.ReduceMixin):
|
|
151
151
|
|
152
152
|
asm = lib.get_asm_str()
|
153
153
|
|
154
|
-
#
|
155
|
-
self.cooperative =
|
154
|
+
# The code library contains functions that require cooperative launch.
|
155
|
+
self.cooperative = lib.use_cooperative
|
156
156
|
# We need to link against cudadevrt if grid sync is being used.
|
157
157
|
if self.cooperative:
|
158
158
|
lib.needs_cudadevrt = True
|
numba_cuda/numba/cuda/target.py
CHANGED
@@ -290,7 +290,16 @@ class CUDATargetContext(BaseContext):
|
|
290
290
|
|
291
291
|
|
292
292
|
class CUDACallConv(MinimalCallConv):
|
293
|
-
|
293
|
+
def decorate_function(self, fn, args, fe_argtypes, noalias=False):
|
294
|
+
"""
|
295
|
+
Set names and attributes of function arguments.
|
296
|
+
"""
|
297
|
+
assert not noalias
|
298
|
+
arginfo = self._get_arg_packer(fe_argtypes)
|
299
|
+
# Do not prefix "arg." on argument name, so that nvvm compiler
|
300
|
+
# can track debug info of argument more accurately
|
301
|
+
arginfo.assign_names(self.get_arguments(fn), args)
|
302
|
+
fn.args[0].name = ".ret"
|
294
303
|
|
295
304
|
|
296
305
|
class CUDACABICallConv(BaseCallConv):
|
@@ -203,18 +203,6 @@ def simple_usecase_kernel(r, x):
|
|
203
203
|
simple_usecase_caller = CUDAUseCase(simple_usecase_kernel)
|
204
204
|
|
205
205
|
|
206
|
-
# Usecase with cooperative groups
|
207
|
-
|
208
|
-
|
209
|
-
@cuda.jit(cache=True)
|
210
|
-
def cg_usecase_kernel(r, x):
|
211
|
-
grid = cuda.cg.this_grid()
|
212
|
-
grid.sync()
|
213
|
-
|
214
|
-
|
215
|
-
cg_usecase = CUDAUseCase(cg_usecase_kernel)
|
216
|
-
|
217
|
-
|
218
206
|
class _TestModule(CUDATestCase):
|
219
207
|
"""
|
220
208
|
Tests for functionality of this module's functions.
|
@@ -0,0 +1,33 @@
|
|
1
|
+
from numba import cuda
|
2
|
+
from numba.cuda.testing import CUDATestCase
|
3
|
+
import sys
|
4
|
+
|
5
|
+
from numba.cuda.tests.cudapy.cache_usecases import CUDAUseCase
|
6
|
+
|
7
|
+
|
8
|
+
# Usecase with cooperative groups
|
9
|
+
|
10
|
+
|
11
|
+
@cuda.jit(cache=True)
|
12
|
+
def cg_usecase_kernel(r, x):
|
13
|
+
grid = cuda.cg.this_grid()
|
14
|
+
grid.sync()
|
15
|
+
|
16
|
+
|
17
|
+
cg_usecase = CUDAUseCase(cg_usecase_kernel)
|
18
|
+
|
19
|
+
|
20
|
+
class _TestModule(CUDATestCase):
|
21
|
+
"""
|
22
|
+
Tests for functionality of this module's functions.
|
23
|
+
Note this does not define any "test_*" method, instead check_module()
|
24
|
+
should be called by hand.
|
25
|
+
"""
|
26
|
+
|
27
|
+
def check_module(self, mod):
|
28
|
+
mod.cg_usecase(0)
|
29
|
+
|
30
|
+
|
31
|
+
def self_test():
|
32
|
+
mod = sys.modules[__name__]
|
33
|
+
_TestModule().check_module(mod)
|
@@ -0,0 +1,55 @@
|
|
1
|
+
from numba import cuda, float32
|
2
|
+
from numba.cuda.bf16 import bfloat16
|
3
|
+
from numba.cuda.testing import CUDATestCase
|
4
|
+
|
5
|
+
import math
|
6
|
+
|
7
|
+
|
8
|
+
class TestBfloat16HighLevelBindings(CUDATestCase):
|
9
|
+
def skip_unsupported(self):
|
10
|
+
if not cuda.is_bfloat16_supported():
|
11
|
+
self.skipTest(
|
12
|
+
"bfloat16 requires compute capability 8.0+ and CUDA version>= 12.0"
|
13
|
+
)
|
14
|
+
|
15
|
+
def test_use_type_in_kernel(self):
|
16
|
+
self.skip_unsupported()
|
17
|
+
|
18
|
+
@cuda.jit
|
19
|
+
def kernel():
|
20
|
+
bfloat16(3.14)
|
21
|
+
|
22
|
+
kernel[1, 1]()
|
23
|
+
|
24
|
+
def test_math_bindings(self):
|
25
|
+
self.skip_unsupported()
|
26
|
+
functions = [
|
27
|
+
math.trunc,
|
28
|
+
math.ceil,
|
29
|
+
math.floor,
|
30
|
+
math.sqrt,
|
31
|
+
math.log,
|
32
|
+
math.log10,
|
33
|
+
math.cos,
|
34
|
+
math.sin,
|
35
|
+
math.tanh,
|
36
|
+
math.exp,
|
37
|
+
math.exp2,
|
38
|
+
]
|
39
|
+
|
40
|
+
for f in functions:
|
41
|
+
with self.subTest(func=f):
|
42
|
+
|
43
|
+
@cuda.jit
|
44
|
+
def kernel(arr):
|
45
|
+
x = bfloat16(3.14)
|
46
|
+
y = f(x)
|
47
|
+
arr[0] = float32(y)
|
48
|
+
|
49
|
+
arr = cuda.device_array((1,), dtype="float32")
|
50
|
+
kernel[1, 1](arr)
|
51
|
+
|
52
|
+
if f in (math.exp, math.exp2):
|
53
|
+
self.assertAlmostEqual(arr[0], f(3.14), delta=1e-1)
|
54
|
+
else:
|
55
|
+
self.assertAlmostEqual(arr[0], f(3.14), delta=1e-2)
|
@@ -5,7 +5,7 @@ import numpy as np
|
|
5
5
|
from numba import int16, int32, int64, uint16, uint32, uint64, float32, float64
|
6
6
|
from numba.types import float16
|
7
7
|
|
8
|
-
from numba.cuda.cuda_bf16 import (
|
8
|
+
from numba.cuda._internal.cuda_bf16 import (
|
9
9
|
nv_bfloat16,
|
10
10
|
htrunc,
|
11
11
|
hceil,
|
@@ -22,21 +22,23 @@ from numba.cuda.cuda_bf16 import (
|
|
22
22
|
hexp,
|
23
23
|
hexp2,
|
24
24
|
hexp10,
|
25
|
+
htanh,
|
26
|
+
htanh_approx,
|
25
27
|
)
|
26
28
|
|
27
|
-
from numba.cuda.cudadrv.runtime import get_version
|
28
|
-
|
29
|
-
cuda_version = get_version()
|
30
|
-
|
31
29
|
dtypes = [int16, int32, int64, uint16, uint32, uint64, float32]
|
32
30
|
|
33
31
|
|
34
|
-
@unittest.skipIf(
|
35
|
-
(cuda.get_current_device().compute_capability < (8, 0)),
|
36
|
-
"bfloat16 requires compute capability 8.0+",
|
37
|
-
)
|
38
32
|
class Bfloat16Test(CUDATestCase):
|
33
|
+
def skip_unsupported(self):
|
34
|
+
if not cuda.is_bfloat16_supported():
|
35
|
+
self.skipTest(
|
36
|
+
"bfloat16 requires compute capability 8.0+ and CUDA version>= 12.0"
|
37
|
+
)
|
38
|
+
|
39
39
|
def test_ctor(self):
|
40
|
+
self.skip_unsupported()
|
41
|
+
|
40
42
|
@cuda.jit
|
41
43
|
def simple_kernel():
|
42
44
|
a = nv_bfloat16(float64(1.0)) # noqa: F841
|
@@ -47,18 +49,13 @@ class Bfloat16Test(CUDATestCase):
|
|
47
49
|
f = nv_bfloat16(uint16(6)) # noqa: F841
|
48
50
|
g = nv_bfloat16(uint32(7)) # noqa: F841
|
49
51
|
h = nv_bfloat16(uint64(8)) # noqa: F841
|
52
|
+
i = nv_bfloat16(float16(9)) # noqa: F841
|
50
53
|
|
51
54
|
simple_kernel[1, 1]()
|
52
55
|
|
53
|
-
if cuda_version >= (12, 0):
|
54
|
-
|
55
|
-
@cuda.jit
|
56
|
-
def simple_kernel_fp16():
|
57
|
-
i = nv_bfloat16(float16(9)) # noqa: F841
|
58
|
-
|
59
|
-
simple_kernel_fp16[1, 1]()
|
60
|
-
|
61
56
|
def test_casts(self):
|
57
|
+
self.skip_unsupported()
|
58
|
+
|
62
59
|
@cuda.jit
|
63
60
|
def simple_kernel(b, c, d, e, f, g, h):
|
64
61
|
a = nv_bfloat16(3.14)
|
@@ -90,6 +87,7 @@ class Bfloat16Test(CUDATestCase):
|
|
90
87
|
assert h[0] == 3
|
91
88
|
|
92
89
|
def test_ctor_cast_loop(self):
|
90
|
+
self.skip_unsupported()
|
93
91
|
for dtype in dtypes:
|
94
92
|
with self.subTest(dtype=dtype):
|
95
93
|
|
@@ -106,6 +104,8 @@ class Bfloat16Test(CUDATestCase):
|
|
106
104
|
assert a[0] == 3
|
107
105
|
|
108
106
|
def test_arithmetic(self):
|
107
|
+
self.skip_unsupported()
|
108
|
+
|
109
109
|
@cuda.jit
|
110
110
|
def simple_kernel(arith, logic):
|
111
111
|
# Binary Arithmetic Operators
|
@@ -175,6 +175,8 @@ class Bfloat16Test(CUDATestCase):
|
|
175
175
|
)
|
176
176
|
|
177
177
|
def test_math_func(self):
|
178
|
+
self.skip_unsupported()
|
179
|
+
|
178
180
|
@cuda.jit
|
179
181
|
def simple_kernel(a):
|
180
182
|
x = nv_bfloat16(3.14)
|
@@ -191,16 +193,18 @@ class Bfloat16Test(CUDATestCase):
|
|
191
193
|
a[9] = float32(hlog10(x))
|
192
194
|
a[10] = float32(hcos(x))
|
193
195
|
a[11] = float32(hsin(x))
|
194
|
-
a[12] = float32(
|
195
|
-
a[13] = float32(
|
196
|
-
a[14] = float32(
|
196
|
+
a[12] = float32(htanh(x))
|
197
|
+
a[13] = float32(htanh_approx(x))
|
198
|
+
a[14] = float32(hexp(x))
|
199
|
+
a[15] = float32(hexp2(x))
|
200
|
+
a[16] = float32(hexp10(x))
|
197
201
|
|
198
|
-
a = np.zeros(
|
202
|
+
a = np.zeros(17, dtype=np.float32)
|
199
203
|
simple_kernel[1, 1](a)
|
200
204
|
|
201
205
|
x = 3.14
|
202
206
|
np.testing.assert_allclose(
|
203
|
-
a[:
|
207
|
+
a[:14],
|
204
208
|
[
|
205
209
|
np.trunc(x),
|
206
210
|
np.ceil(x),
|
@@ -214,15 +218,19 @@ class Bfloat16Test(CUDATestCase):
|
|
214
218
|
np.log10(x),
|
215
219
|
np.cos(x),
|
216
220
|
np.sin(x),
|
221
|
+
np.tanh(x),
|
222
|
+
np.tanh(x),
|
217
223
|
],
|
218
224
|
atol=1e-2,
|
219
225
|
)
|
220
226
|
|
221
227
|
np.testing.assert_allclose(
|
222
|
-
a[
|
228
|
+
a[14:], [np.exp(x), np.exp2(x), np.power(10, x)], atol=1e2
|
223
229
|
)
|
224
230
|
|
225
231
|
def test_check_bfloat16_type(self):
|
232
|
+
self.skip_unsupported()
|
233
|
+
|
226
234
|
@cuda.jit
|
227
235
|
def kernel(arr):
|
228
236
|
x = nv_bfloat16(3.14)
|
@@ -237,6 +245,8 @@ class Bfloat16Test(CUDATestCase):
|
|
237
245
|
np.testing.assert_allclose(arr, [3.14], atol=1e-2)
|
238
246
|
|
239
247
|
def test_use_within_device_func(self):
|
248
|
+
self.skip_unsupported()
|
249
|
+
|
240
250
|
@cuda.jit(device=True)
|
241
251
|
def add_bf16(a, b):
|
242
252
|
return a + b
|
@@ -252,6 +262,22 @@ class Bfloat16Test(CUDATestCase):
|
|
252
262
|
|
253
263
|
np.testing.assert_allclose(arr, [8], atol=1e-2)
|
254
264
|
|
265
|
+
def test_use_binding_inside_dfunc(self):
|
266
|
+
@cuda.jit(device=True)
|
267
|
+
def f(arr):
|
268
|
+
pi = nv_bfloat16(3.14)
|
269
|
+
three = htrunc(pi)
|
270
|
+
arr[0] = float32(three)
|
271
|
+
|
272
|
+
@cuda.jit
|
273
|
+
def kernel(arr):
|
274
|
+
f(arr)
|
275
|
+
|
276
|
+
arr = np.zeros(1, np.float32)
|
277
|
+
kernel[1, 1](arr)
|
278
|
+
|
279
|
+
np.testing.assert_allclose(arr, [3], atol=1e-2)
|
280
|
+
|
255
281
|
|
256
282
|
if __name__ == "__main__":
|
257
283
|
unittest.main()
|
@@ -1,8 +1,6 @@
|
|
1
1
|
import multiprocessing
|
2
2
|
import os
|
3
3
|
import shutil
|
4
|
-
import subprocess
|
5
|
-
import sys
|
6
4
|
import unittest
|
7
5
|
import warnings
|
8
6
|
|
@@ -163,55 +161,6 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
163
161
|
f = mod.renamed_function2
|
164
162
|
self.assertPreciseEqual(f(2), 8)
|
165
163
|
|
166
|
-
@skip_unless_cc_60
|
167
|
-
@skip_if_cudadevrt_missing
|
168
|
-
@skip_if_mvc_enabled("CG not supported with MVC")
|
169
|
-
def test_cache_cg(self):
|
170
|
-
# Functions using cooperative groups should be cacheable. See Issue
|
171
|
-
# #8888: https://github.com/numba/numba/issues/8888
|
172
|
-
self.check_pycache(0)
|
173
|
-
mod = self.import_module()
|
174
|
-
self.check_pycache(0)
|
175
|
-
|
176
|
-
mod.cg_usecase(0)
|
177
|
-
self.check_pycache(2) # 1 index, 1 data
|
178
|
-
|
179
|
-
# Check the code runs ok from another process
|
180
|
-
self.run_in_separate_process()
|
181
|
-
|
182
|
-
@skip_unless_cc_60
|
183
|
-
@skip_if_cudadevrt_missing
|
184
|
-
@skip_if_mvc_enabled("CG not supported with MVC")
|
185
|
-
def test_cache_cg_clean_run(self):
|
186
|
-
# See Issue #9432: https://github.com/numba/numba/issues/9432
|
187
|
-
# If a cached function using CG sync was the first thing to compile,
|
188
|
-
# the compile would fail.
|
189
|
-
self.check_pycache(0)
|
190
|
-
|
191
|
-
# This logic is modelled on run_in_separate_process(), but executes the
|
192
|
-
# CG usecase directly in the subprocess.
|
193
|
-
code = """if 1:
|
194
|
-
import sys
|
195
|
-
|
196
|
-
sys.path.insert(0, %(tempdir)r)
|
197
|
-
mod = __import__(%(modname)r)
|
198
|
-
mod.cg_usecase(0)
|
199
|
-
""" % dict(tempdir=self.tempdir, modname=self.modname)
|
200
|
-
|
201
|
-
popen = subprocess.Popen(
|
202
|
-
[sys.executable, "-c", code],
|
203
|
-
stdout=subprocess.PIPE,
|
204
|
-
stderr=subprocess.PIPE,
|
205
|
-
)
|
206
|
-
out, err = popen.communicate(timeout=60)
|
207
|
-
if popen.returncode != 0:
|
208
|
-
raise AssertionError(
|
209
|
-
"process failed with code %s: \n"
|
210
|
-
"stdout follows\n%s\n"
|
211
|
-
"stderr follows\n%s\n"
|
212
|
-
% (popen.returncode, out.decode(), err.decode()),
|
213
|
-
)
|
214
|
-
|
215
164
|
def _test_pycache_fallback(self):
|
216
165
|
"""
|
217
166
|
With a disabled __pycache__, test there is a working fallback
|
@@ -275,6 +224,40 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
275
224
|
pass
|
276
225
|
|
277
226
|
|
227
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
228
|
+
class CUDACooperativeGroupTest(SerialMixin, DispatcherCacheUsecasesTest):
|
229
|
+
# See Issue #9432: https://github.com/numba/numba/issues/9432
|
230
|
+
# If a cached function using CG sync was the first thing to compile,
|
231
|
+
# the compile would fail.
|
232
|
+
here = os.path.dirname(__file__)
|
233
|
+
usecases_file = os.path.join(here, "cg_cache_usecases.py")
|
234
|
+
modname = "cuda_cooperative_caching_test_fodder"
|
235
|
+
|
236
|
+
def setUp(self):
|
237
|
+
DispatcherCacheUsecasesTest.setUp(self)
|
238
|
+
CUDATestCase.setUp(self)
|
239
|
+
|
240
|
+
def tearDown(self):
|
241
|
+
CUDATestCase.tearDown(self)
|
242
|
+
DispatcherCacheUsecasesTest.tearDown(self)
|
243
|
+
|
244
|
+
@skip_unless_cc_60
|
245
|
+
@skip_if_cudadevrt_missing
|
246
|
+
@skip_if_mvc_enabled("CG not supported with MVC")
|
247
|
+
def test_cache_cg(self):
|
248
|
+
# Functions using cooperative groups should be cacheable. See Issue
|
249
|
+
# #8888: https://github.com/numba/numba/issues/8888
|
250
|
+
self.check_pycache(0)
|
251
|
+
mod = self.import_module()
|
252
|
+
self.check_pycache(0)
|
253
|
+
|
254
|
+
mod.cg_usecase(0)
|
255
|
+
self.check_pycache(2) # 1 index, 1 data
|
256
|
+
|
257
|
+
# Check the code runs ok from another process
|
258
|
+
self.run_in_separate_process()
|
259
|
+
|
260
|
+
|
278
261
|
@skip_on_cudasim("Simulator does not implement caching")
|
279
262
|
class CUDAAndCPUCachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
280
263
|
here = os.path.dirname(__file__)
|
@@ -1,8 +1,13 @@
|
|
1
1
|
from __future__ import print_function
|
2
2
|
|
3
|
+
import os
|
4
|
+
|
5
|
+
import cffi
|
6
|
+
|
3
7
|
import numpy as np
|
4
8
|
|
5
9
|
from numba import config, cuda, int32
|
10
|
+
from numba.types import CPointer
|
6
11
|
from numba.cuda.testing import (
|
7
12
|
unittest,
|
8
13
|
CUDATestCase,
|
@@ -11,6 +16,9 @@ from numba.cuda.testing import (
|
|
11
16
|
skip_if_cudadevrt_missing,
|
12
17
|
skip_if_mvc_enabled,
|
13
18
|
)
|
19
|
+
from numba.core.typing import signature
|
20
|
+
|
21
|
+
ffi = cffi.FFI()
|
14
22
|
|
15
23
|
|
16
24
|
@cuda.jit
|
@@ -149,6 +157,32 @@ class TestCudaCooperativeGroups(CUDATestCase):
|
|
149
157
|
self.assertEqual(blocks1d, blocks2d)
|
150
158
|
self.assertEqual(blocks1d, blocks3d)
|
151
159
|
|
160
|
+
@skip_unless_cc_60
|
161
|
+
def test_external_cooperative_func(self):
|
162
|
+
cudapy_test_path = os.path.dirname(__file__)
|
163
|
+
tests_path = os.path.dirname(cudapy_test_path)
|
164
|
+
data_path = os.path.join(tests_path, "data")
|
165
|
+
src = os.path.join(data_path, "cta_barrier.cu")
|
166
|
+
|
167
|
+
sig = signature(
|
168
|
+
CPointer(int32),
|
169
|
+
)
|
170
|
+
cta_barrier = cuda.declare_device(
|
171
|
+
"cta_barrier", sig=sig, link=[src], use_cooperative=True
|
172
|
+
)
|
173
|
+
|
174
|
+
@cuda.jit
|
175
|
+
def kernel():
|
176
|
+
cta_barrier()
|
177
|
+
|
178
|
+
block_size = 32
|
179
|
+
grid_size = 1024
|
180
|
+
|
181
|
+
kernel[grid_size, block_size]()
|
182
|
+
|
183
|
+
overload = kernel.overloads[()]
|
184
|
+
self.assertTrue(overload.cooperative)
|
185
|
+
|
152
186
|
|
153
187
|
if __name__ == "__main__":
|
154
188
|
unittest.main()
|
@@ -310,6 +310,23 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
310
310
|
with captured_stdout():
|
311
311
|
self._test_kernel_args_types()
|
312
312
|
|
313
|
+
def test_kernel_args_names(self):
|
314
|
+
sig = (types.int32,)
|
315
|
+
|
316
|
+
@cuda.jit("void(int32)", debug=True, opt=False)
|
317
|
+
def f(x):
|
318
|
+
z = x # noqa: F841
|
319
|
+
|
320
|
+
llvm_ir = f.inspect_llvm(sig)
|
321
|
+
|
322
|
+
# Verify argument name is not prefixed with "arg."
|
323
|
+
pat = r"define void @.*\(i32 %\"x\"\)"
|
324
|
+
match = re.compile(pat).search(llvm_ir)
|
325
|
+
self.assertIsNotNone(match, msg=llvm_ir)
|
326
|
+
pat = r"define void @.*\(i32 %\"arg\.x\"\)"
|
327
|
+
match = re.compile(pat).search(llvm_ir)
|
328
|
+
self.assertIsNone(match, msg=llvm_ir)
|
329
|
+
|
313
330
|
def test_llvm_dbg_value(self):
|
314
331
|
sig = (types.int32, types.int32)
|
315
332
|
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#include <cooperative_groups.h>
|
2
|
+
#include <cuda/barrier>
|
3
|
+
|
4
|
+
namespace cg = cooperative_groups;
|
5
|
+
|
6
|
+
__device__ void _wait_on_tile(cuda::barrier<cuda::thread_scope_block> &tile)
|
7
|
+
{
|
8
|
+
auto token = tile.arrive();
|
9
|
+
tile.wait(std::move(token));
|
10
|
+
}
|
11
|
+
|
12
|
+
extern "C"
|
13
|
+
__device__ int cta_barrier(int *ret) {
|
14
|
+
auto cta = cg::this_thread_block();
|
15
|
+
cg::thread_block_tile<32> tile = cg::tiled_partition<32>(cta);
|
16
|
+
__shared__ cuda::barrier<cuda::thread_scope_block> barrier;
|
17
|
+
if (threadIdx.x == 0) {
|
18
|
+
init(&barrier, blockDim.x);
|
19
|
+
}
|
20
|
+
|
21
|
+
_wait_on_tile(barrier);
|
22
|
+
return 0;
|
23
|
+
}
|
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
import unittest
|
5
5
|
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
6
|
-
from numba.tests.support import skip_unless_cffi
|
6
|
+
from numba.tests.support import skip_unless_cffi, override_config
|
7
7
|
|
8
8
|
|
9
9
|
@skip_unless_cffi
|
@@ -85,6 +85,53 @@ class TestFFI(CUDATestCase):
|
|
85
85
|
actual = r[()]
|
86
86
|
np.testing.assert_allclose(expected, actual)
|
87
87
|
|
88
|
+
def test_ex_extra_includes(self):
|
89
|
+
import numpy as np
|
90
|
+
from numba import cuda, config
|
91
|
+
import os
|
92
|
+
|
93
|
+
basedir = os.path.dirname(os.path.abspath(__file__))
|
94
|
+
mul_dir = os.path.join(basedir, "ffi", "include")
|
95
|
+
saxpy_cu = os.path.join(basedir, "ffi", "saxpy.cu")
|
96
|
+
|
97
|
+
testdir = os.path.dirname(basedir)
|
98
|
+
add_dir = os.path.join(testdir, "data", "include")
|
99
|
+
|
100
|
+
includedir = ":".join([mul_dir, add_dir])
|
101
|
+
with override_config("CUDA_NVRTC_EXTRA_SEARCH_PATHS", includedir):
|
102
|
+
# magictoken.ex_extra_search_paths.begin
|
103
|
+
from numba import config
|
104
|
+
|
105
|
+
includedir = ":".join([mul_dir, add_dir])
|
106
|
+
config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = includedir
|
107
|
+
# magictoken.ex_extra_search_paths.end
|
108
|
+
|
109
|
+
# magictoken.ex_extra_search_paths_kernel.begin
|
110
|
+
sig = "float32(float32, float32, float32)"
|
111
|
+
saxpy = cuda.declare_device("saxpy", sig=sig, link=saxpy_cu)
|
112
|
+
|
113
|
+
@cuda.jit
|
114
|
+
def vector_saxpy(a, x, y, res):
|
115
|
+
i = cuda.grid(1)
|
116
|
+
if i < len(res):
|
117
|
+
res[i] = saxpy(a, x[i], y[i])
|
118
|
+
|
119
|
+
# magictoken.ex_extra_search_paths_kernel.end
|
120
|
+
|
121
|
+
size = 10_000
|
122
|
+
a = 3.0
|
123
|
+
X = np.ones((size,), dtype="float32")
|
124
|
+
Y = np.ones((size,), dtype="float32")
|
125
|
+
R = np.zeros((size,), dtype="float32")
|
126
|
+
|
127
|
+
block_size = 32
|
128
|
+
num_blocks = (size // block_size) + 1
|
129
|
+
|
130
|
+
vector_saxpy[num_blocks, block_size](a, X, Y, R)
|
131
|
+
|
132
|
+
expected = a * X + Y
|
133
|
+
np.testing.assert_equal(R, expected)
|
134
|
+
|
88
135
|
|
89
136
|
if __name__ == "__main__":
|
90
137
|
unittest.main()
|
@@ -1,27 +1,27 @@
|
|
1
1
|
_numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
|
2
2
|
_numba_cuda_redirector.py,sha256=n_r8MYbu5-vcXMnLJW147k8DnFXXvgb7nPIXnlXwTyQ,2659
|
3
|
-
numba_cuda/VERSION,sha256=
|
3
|
+
numba_cuda/VERSION,sha256=9u5pvxxLJ6JCJmzLWutKqMgwY0W56-T_czW4yUBFK4E,7
|
4
4
|
numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
|
5
5
|
numba_cuda/_version.py,sha256=nzrrJXi85d18m6SPdsPsetJNClDETkmF1MrEhGLYDBs,734
|
6
6
|
numba_cuda/numba/cuda/__init__.py,sha256=3siqMXEKqa9ezQ8RxPC3KMdebUjgJt-EKxxV4CX9818,607
|
7
|
-
numba_cuda/numba/cuda/api.py,sha256=
|
7
|
+
numba_cuda/numba/cuda/api.py,sha256=mkbZBcBfm819kCywQbH8jAvUex2m4pYTcFD-LE-tXsQ,17638
|
8
8
|
numba_cuda/numba/cuda/api_util.py,sha256=jK8oUD3zf_D5IX7vbjc3uY_5kmOxwgEqO2m_lDHdWfM,861
|
9
9
|
numba_cuda/numba/cuda/args.py,sha256=UlTHTJpwPeCtnW0Bb-Wetm5UO9TPR-PCgIt5ys8b8tQ,1894
|
10
|
-
numba_cuda/numba/cuda/
|
11
|
-
numba_cuda/numba/cuda/
|
12
|
-
numba_cuda/numba/cuda/
|
10
|
+
numba_cuda/numba/cuda/bf16.py,sha256=PXuitxHhPMjnti3g9IOSoL90ofGgVRcDfqFg7AqCXpU,1778
|
11
|
+
numba_cuda/numba/cuda/cg.py,sha256=n-sBj05ut6U_GgFIq-PTCjPad4nXWAc0GVg_J9xD_Pc,1602
|
12
|
+
numba_cuda/numba/cuda/codegen.py,sha256=vZtLahHSLYzRpQ3GSbmMm5qYp4FS5mAlzGgSgJbaoz0,16709
|
13
|
+
numba_cuda/numba/cuda/compiler.py,sha256=aZwEVP8KXCIyccSw4vJyG6Qaai9oXsFuBAo_Ghwwai4,25607
|
13
14
|
numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=8lUPmU6FURxphzEqkPLZRPYBCEK_wmDtHq2voPkckfs,950
|
14
|
-
numba_cuda/numba/cuda/cuda_bf16.py,sha256=RfnWMV2_zSAW9FLN4JqfW6GfmWR8ZVO16e9Bw3jZnto,152203
|
15
15
|
numba_cuda/numba/cuda/cuda_paths.py,sha256=kMIJ_1yV2qtcKEM5rCgSDJ3Gz7bgxbfAWh54E5cDndg,15872
|
16
|
-
numba_cuda/numba/cuda/cudadecl.py,sha256=
|
16
|
+
numba_cuda/numba/cuda/cudadecl.py,sha256=_TXMu8SIT2hIhsPI0n05wuShtzp8NcPX88NH5y7xauU,22909
|
17
17
|
numba_cuda/numba/cuda/cudaimpl.py,sha256=q6CPqD8ZtJvY8JlpMEN--d6003_FIHoHLBqNP2McNyM,39274
|
18
18
|
numba_cuda/numba/cuda/cudamath.py,sha256=wbGjlyGVwcUAoQjgXIaAaasLdVuDSKHkf6KyID5IYBw,3979
|
19
|
-
numba_cuda/numba/cuda/debuginfo.py,sha256=
|
20
|
-
numba_cuda/numba/cuda/decorators.py,sha256=
|
19
|
+
numba_cuda/numba/cuda/debuginfo.py,sha256=5tCw_IEeZfoD6CtFpA_yUGdrq25Q9mFjfxxrudH_VFg,5476
|
20
|
+
numba_cuda/numba/cuda/decorators.py,sha256=bR8yOAIC68lhm8mSMU-DUt1qFrEogbmSAtzAI4MoToc,9608
|
21
21
|
numba_cuda/numba/cuda/descriptor.py,sha256=t1rSVJSCAlVACC5_Un3FQ7iubdTTBe-euqz88cvs2tI,985
|
22
22
|
numba_cuda/numba/cuda/device_init.py,sha256=Rtwd6hQMHMLMkj6MXtndbWYFJfkIaRe0MwOIJF2nzhU,3449
|
23
23
|
numba_cuda/numba/cuda/deviceufunc.py,sha256=zj9BbLiZD-dPttHew4olw8ANgR2nXnXEE9qjCeGLrQI,30731
|
24
|
-
numba_cuda/numba/cuda/dispatcher.py,sha256=
|
24
|
+
numba_cuda/numba/cuda/dispatcher.py,sha256=cLXD2pnsU7k-bN5clfjuWqifFCr7LfECKtK7YeeHwis,43162
|
25
25
|
numba_cuda/numba/cuda/errors.py,sha256=WRso1Q_jCoWP5yrDBMhihRhhVtVo1-7KdN8QVE9j46o,1712
|
26
26
|
numba_cuda/numba/cuda/extending.py,sha256=VwuU5F0AQFlJsqaiwoWk-6Itihew1FsjVT_BVjhY8Us,2278
|
27
27
|
numba_cuda/numba/cuda/initialize.py,sha256=0SnpjccQEYiWITIyfAJx833H1yhYFFDY42EpnwYyMn8,487
|
@@ -41,17 +41,18 @@ numba_cuda/numba/cuda/random.py,sha256=V30KaFdkuDyjxoP14awz-KkY3lRIXqIZuuH27UotI
|
|
41
41
|
numba_cuda/numba/cuda/reshape_funcs.cu,sha256=frw1uoeMSYlkPC38LiKE8Tz2P70X2e4UZGyLKkaPzho,4326
|
42
42
|
numba_cuda/numba/cuda/simulator_init.py,sha256=Hvzty6NJp1SeKspyb-b887xpeNLMMI0x9aPmV--X77E,450
|
43
43
|
numba_cuda/numba/cuda/stubs.py,sha256=JMs4Xg8IHlAq5L6SBYWcYNzXfJGM6v0lZCQaOb5x9CQ,23014
|
44
|
-
numba_cuda/numba/cuda/target.py,sha256=
|
44
|
+
numba_cuda/numba/cuda/target.py,sha256=ymYBdkt7iNK_PJCfyqupKpcSj7j-UQzkWIq3KjoLBD8,12963
|
45
45
|
numba_cuda/numba/cuda/testing.py,sha256=OR37AuDdzg7vLG4G_4s2uRAkNTScZc-BzHmTMJYuxhQ,6827
|
46
46
|
numba_cuda/numba/cuda/types.py,sha256=hC1MUvgUwy-SLgbzFzXwssJzPR8BxQwqUcjwGJFzVac,1317
|
47
47
|
numba_cuda/numba/cuda/ufuncs.py,sha256=AJifQgapyv62fdJeMm939R1I5TvIRmaA8dJ83Jy8DCw,23559
|
48
48
|
numba_cuda/numba/cuda/utils.py,sha256=VRphC0PLr8Klq3D1FMONu4aRdVO23HOCBg4bxnsqmfc,785
|
49
49
|
numba_cuda/numba/cuda/vector_types.py,sha256=FlzOKufhvBnZ-VC-liA7y9is8BV-uj0fD-En_vP6zl0,6783
|
50
50
|
numba_cuda/numba/cuda/vectorizers.py,sha256=nEfQxjSA4oCX8ZzvoqjDRygDfwzxFVDXtnjx-K1aPqA,8387
|
51
|
+
numba_cuda/numba/cuda/_internal/cuda_bf16.py,sha256=QYck6s_D85HBEsc__SAl_UZxf7SptqAk31mLv_1gzuE,152212
|
51
52
|
numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=inat2K8K1OVrgDe64FK7CyRmyFyNKcNO4p2_L79yRZ0,201
|
52
53
|
numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=6tF2TYnmjMbKk2fho1ONoD_QsRD9QVTT2kHP7x1u1J0,31556
|
53
54
|
numba_cuda/numba/cuda/cudadrv/devices.py,sha256=k87EDIRhj1ncM9PxJCjZGPFfEks99vzmHlTc55GK5X0,8062
|
54
|
-
numba_cuda/numba/cuda/cudadrv/driver.py,sha256=
|
55
|
+
numba_cuda/numba/cuda/cudadrv/driver.py,sha256=63NDga5RLrk6JEiHW1aJDubqCbbHA5uumK3mSYy7SEY,119091
|
55
56
|
numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=OnjYWnmy8ZlSfYouhzyYIpW-AJ3x1YHj32YcBY2xet4,16790
|
56
57
|
numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=2jycZhniMy3ncoVWQG9D8dBehTEeocBZTW43gKHL5Tc,14291
|
57
58
|
numba_cuda/numba/cuda/cudadrv/enums.py,sha256=raWKryxamWQZ5A8ivMpyYVhhwbSpaD9lu7l1_wl2W9M,23742
|
@@ -60,7 +61,7 @@ numba_cuda/numba/cuda/cudadrv/libs.py,sha256=qjknQxYXd2ucwDLQqzhWC_srNg6FnwvcVHI
|
|
60
61
|
numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=IZ13laEG_altDQyi9HkdMcwW-YYEIn2erqz6AnYsqHg,2808
|
61
62
|
numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=9uEs1KepeVGRbEpVhLjtxSsvZpZsbrHnPywmx--y88A,804
|
62
63
|
numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
|
63
|
-
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=
|
64
|
+
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=pDc5YsxOMdMbLnUKm1st2FVmFPRU-Mhlpd9mau9KZ-0,14976
|
64
65
|
numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=7tTy6-VEbMBpDUmuSMnUwqPFfBndTh3aPq_n7nxhEA0,26344
|
65
66
|
numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=J6PRGGK07XSLRzgCw5xs8VU5xVoqavvhojk1mxiQsi4,226
|
66
67
|
numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=CFumwg4iblWap_E7l7GM_hMYz1PsbH81-N0tZwFFooA,4372
|
@@ -132,8 +133,9 @@ numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py,sha256=4CcxftJN4S3whgnngOgrZ
|
|
132
133
|
numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py,sha256=saAWvGuAYJ4ToT9qQjvB254EeBfduVqy7VQVRqeVo0Y,987
|
133
134
|
numba_cuda/numba/cuda/tests/cudadrv/test_streams.py,sha256=rrQEA8iawR6UyKnK2MdI5X9GnuCWPUNpoMOEVXEd_u0,4196
|
134
135
|
numba_cuda/numba/cuda/tests/cudapy/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
|
135
|
-
numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py,sha256=
|
136
|
+
numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py,sha256=3mYDpLS1FUBt7rerACFGR7HxsCJtHSLh_AYqxFEqRd0,5658
|
136
137
|
numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py,sha256=9CbjosLNPN5IzrD-15sD_4B0BMmjo02Y7faZiS82cyk,1143
|
138
|
+
numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py,sha256=w9c0OXN6Mxb0Un0GxF-ndcq39dn5nMC8xaGzESZB40I,682
|
137
139
|
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py,sha256=2i_xq4B1t1tctr6ZrWA29ZHkmQlD_vCSewhr-AT9tMc,1651
|
138
140
|
numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx,sha256=PKVafUhDH1SKRWXkt4N3v8SDMh4RyDFiJM-CMksa5uc,519
|
139
141
|
numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py,sha256=wrWx8AeRhBHM74iYPKKrZqiyWrYCtQU3J-g3Zv7JmoY,1782
|
@@ -143,10 +145,11 @@ numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py,sha256=JDKbbRieNE0C3w
|
|
143
145
|
numba_cuda/numba/cuda/tests/cudapy/test_array_args.py,sha256=iiFrt5Yn7gfheAGOYG2VBeWeuW3JlBhRLXNfSz4cHAA,4982
|
144
146
|
numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py,sha256=SWa1MvpwG07yBkrFIUeM9pm3BIwUbhttMNBdUW-CpSM,969
|
145
147
|
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py,sha256=agsfUN3WOoh6ICAECtuMuxZNcKq5ivK30Ew3h_m76m0,57689
|
146
|
-
numba_cuda/numba/cuda/tests/cudapy/
|
148
|
+
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py,sha256=DdP7WlHev8R5DdY6DEEgOF45ljh8LwKeqmkvGLjNC7E,1444
|
149
|
+
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py,sha256=wNP0NNtqVgaekY9fXp_H4LpPNLX-rDu9gp-_-e965Lg,7420
|
147
150
|
numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py,sha256=0_wr6MSeHh0QVzPeH8SB7j0Nv_RrPAK01hNoQ_dGT5I,4417
|
148
151
|
numba_cuda/numba/cuda/tests/cudapy/test_boolean.py,sha256=j4mIOv4rJTLjJzpKk1O9UFLT41_iOQRtwsmteXdKZ-M,547
|
149
|
-
numba_cuda/numba/cuda/tests/cudapy/test_caching.py,sha256=
|
152
|
+
numba_cuda/numba/cuda/tests/cudapy/test_caching.py,sha256=obUSTJSP2Lh-YNElq8PZpVnRJOeq-uqV_VyLHtsXwAw,18427
|
150
153
|
numba_cuda/numba/cuda/tests/cudapy/test_casting.py,sha256=3LaN3ZsSuOZXAZXCV85wYyhh0ih7JqABnjGTa7Y2YBE,8748
|
151
154
|
numba_cuda/numba/cuda/tests/cudapy/test_cffi.py,sha256=tC7ZCA4dkzehS33iz2l35rX6OxE3BTQd9ivV4r74YXs,926
|
152
155
|
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py,sha256=OkCavTZAAcdffdUBYGEmlP_BN7zAH-rWlhr-LqSUUs8,10997
|
@@ -154,12 +157,12 @@ numba_cuda/numba/cuda/tests/cudapy/test_complex.py,sha256=hmAcyZim46yueXZDqDSJYq
|
|
154
157
|
numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py,sha256=KIuXQ0ihgQQXM-eH7s3xAxhKe35YL1qDTHCVTWA4ut8,497
|
155
158
|
numba_cuda/numba/cuda/tests/cudapy/test_const_string.py,sha256=li1UsV5vc2M01cJ7k6_526VPtuAOAKr8e7kb1CDUXi4,4323
|
156
159
|
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py,sha256=ZWmyKvFokRMjqyXjVpZVOnR6LR694GWcbUn2jVEQV14,5170
|
157
|
-
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py,sha256=
|
160
|
+
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py,sha256=kkrK5Mo9E8nNH3PYfQAEel0hY7CXZNsn88BAo7heX9g,5818
|
158
161
|
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=RXCNHAZM35sbUf3Gi-x2E8-a6BmhFb2rhQkBOeiS_fo,15757
|
159
162
|
numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=8prL2FTiaajW-UHSL9al-nBniygOfpdAOT_Dkej4PWI,2138
|
160
163
|
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=MnOeDWMz-rL3-07FsswM06Laxmm0KjTmTwhrP3rmchQ,3526
|
161
164
|
numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=1P369s02AvGu7fSIEe_YxSgh3c6S72Aw1gRgmepDbQY,3383
|
162
|
-
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=
|
165
|
+
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=AE8D4U4dAv4nYP9oatDwROW6knpJ0-iggP4BaHymo6g,13170
|
163
166
|
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=LNGBZfqFGUtVVQeC6FcHo8T3DbG-j6AjeBwJmwp9HH4,13157
|
164
167
|
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=Oc6CdI1j9Ad_wklHdIYSMytrzUpzK6oXD0BGe45sTwg,26636
|
165
168
|
numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=Yxac6S5P6C8GN0kMwieL3dQb1uogOVZQEx969B0AMpM,4533
|
@@ -226,15 +229,17 @@ numba_cuda/numba/cuda/tests/cudasim/__init__.py,sha256=GdfSq6pRVSOQwmgNi7ZFQ5l0y
|
|
226
229
|
numba_cuda/numba/cuda/tests/cudasim/support.py,sha256=JjRrfrrLKS0V5p6GX6ibs6QTuFb1NanKfBQSgbLeiHs,114
|
227
230
|
numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py,sha256=-GJCl2c063Ig6EUB8w5L_0GcmXzTLatGe_ddEzdnbgc,3177
|
228
231
|
numba_cuda/numba/cuda/tests/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
232
|
+
numba_cuda/numba/cuda/tests/data/cta_barrier.cu,sha256=jJ3lzhbGr6WOHb56_fPaFg8j851ZwCpz8V4du-eyWbA,576
|
229
233
|
numba_cuda/numba/cuda/tests/data/cuda_include.cu,sha256=1wj5Of86-kP0hxK5Gr6AhapuyTiiWWJAoFbCuCpyKfA,294
|
230
234
|
numba_cuda/numba/cuda/tests/data/error.cu,sha256=5m65RDHgh39d0bIW6Dvj0xh9ffhKH1iILeCCR4p2ReI,138
|
231
235
|
numba_cuda/numba/cuda/tests/data/jitlink.cu,sha256=A41S_002h_s4hEghJusT368JXX6H3bSMp3mC_6DX9Us,539
|
232
236
|
numba_cuda/numba/cuda/tests/data/jitlink.ptx,sha256=KJZkTuc1u5xUAC7j5BrmrHkgRWr_ncZwN3ayVKa69dw,894
|
233
237
|
numba_cuda/numba/cuda/tests/data/warn.cu,sha256=6L-qsXJIxAr_n3hVMAz_EZ5j0skcJAfgzuJfDEISG_I,172
|
238
|
+
numba_cuda/numba/cuda/tests/data/include/add.cuh,sha256=yv61Ilqge_kjj-_BPO5YWAx3sqJD73gEh66gxYwE8wc,107
|
234
239
|
numba_cuda/numba/cuda/tests/doc_examples/__init__.py,sha256=GdfSq6pRVSOQwmgNi7ZFQ5l0yg4-2gNar_0Rz0buUpM,157
|
235
240
|
numba_cuda/numba/cuda/tests/doc_examples/test_cg.py,sha256=VLWd5_v744Z5QKa4i3JVDLUwA1sxJFQzV5cRG6EkyOI,2888
|
236
241
|
numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py,sha256=I4hWDF4DzTTtt3-XmQsP5RzPAO_pWUGsKjVO0hhPOCM,2251
|
237
|
-
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=
|
242
|
+
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=AtjAzFgZWm1nwOokQyO7D8NVMYGd1QDD3EaUT_RQruQ,4403
|
238
243
|
numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py,sha256=4C_drWYNZq_qGIt-N0fJ9r8DZBaJdO_5h7mxRZ6RcO8,5133
|
239
244
|
numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py,sha256=cLIN3ejI-3cbW0xxgWjm7EsSlmluGB8stDKOqZN8EUo,6138
|
240
245
|
numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py,sha256=IoS2pbEby3YxLKpnS6_IGlHaPgvOEL8lJtKOf2eaGLM,3493
|
@@ -245,6 +250,8 @@ numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py,sha256=UXwXjL9ybg0OuYOFKn
|
|
245
250
|
numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py,sha256=CvExzNABd2Qk5EJqDq1TjxMNz4zw_QIjynzh1O52HU0,2032
|
246
251
|
numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
247
252
|
numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu,sha256=mRZEyCfZbq4ACTN3sj1236XmTpj1d0IxZ4QTMbI3g_E,877
|
253
|
+
numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu,sha256=xJ6D3RkxlU75Txp1_xsJKBuspDnqvr7-1L8Pb_BdMcU,246
|
254
|
+
numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh,sha256=LfYU4QwoAlAXKysg_pV9k0DSHW8oVg21DTmGK8BuZO8,113
|
248
255
|
numba_cuda/numba/cuda/tests/nocuda/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
|
249
256
|
numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py,sha256=4WbuBaowiv4_3hE8lRuxgAQwnR2r3WGVNWx85M3fRUI,13399
|
250
257
|
numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py,sha256=bnv8HbWQR0f9x8z9XdBykDCu89KaFWP0LU4OohSwHv4,1496
|
@@ -259,8 +266,8 @@ numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=
|
|
259
266
|
numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu,sha256=T9ubst3fFUK7EXyXXMi73wAban3VFFQ986cY5OcKfvI,157
|
260
267
|
numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=IB5t-dVhrKVoue3AbUx3yVMxPG0hBF_yZbzb4642sf0,538
|
261
268
|
numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
|
262
|
-
numba_cuda-0.
|
263
|
-
numba_cuda-0.
|
264
|
-
numba_cuda-0.
|
265
|
-
numba_cuda-0.
|
266
|
-
numba_cuda-0.
|
269
|
+
numba_cuda-0.12.1.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
|
270
|
+
numba_cuda-0.12.1.dist-info/METADATA,sha256=H6JW6cSrhykHqICS50fIbGkrZ6SRgh_cTC3hTC2-XvQ,1859
|
271
|
+
numba_cuda-0.12.1.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
272
|
+
numba_cuda-0.12.1.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
|
273
|
+
numba_cuda-0.12.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|