numba-cuda 0.0.18__py3-none-any.whl → 0.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/cuda_paths.py +68 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/cudadrv/libs.py +38 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +9 -4
- numba_cuda/numba/cuda/dispatcher.py +46 -6
- numba_cuda/numba/cuda/runtime/nrt.cu +190 -0
- numba_cuda/numba/cuda/simulator/api.py +14 -0
- numba_cuda/numba/cuda/target.py +4 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +48 -0
- numba_cuda/numba/cuda/tests/nrt/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +42 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +110 -0
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +8 -1
- {numba_cuda-0.0.18.dist-info → numba_cuda-0.0.19.dist-info}/METADATA +1 -1
- {numba_cuda-0.0.18.dist-info → numba_cuda-0.0.19.dist-info}/RECORD +20 -15
- {numba_cuda-0.0.18.dist-info → numba_cuda-0.0.19.dist-info}/WHEEL +1 -1
- {numba_cuda-0.0.18.dist-info → numba_cuda-0.0.19.dist-info}/LICENSE +0 -0
- {numba_cuda-0.0.18.dist-info → numba_cuda-0.0.19.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.19
|
@@ -2,9 +2,11 @@ import sys
|
|
2
2
|
import re
|
3
3
|
import os
|
4
4
|
from collections import namedtuple
|
5
|
+
import platform
|
5
6
|
|
6
7
|
from numba.core.config import IS_WIN32
|
7
8
|
from numba.misc.findlib import find_lib, find_file
|
9
|
+
from numba import config
|
8
10
|
|
9
11
|
|
10
12
|
_env_path_tuple = namedtuple('_env_path_tuple', ['by', 'info'])
|
@@ -241,6 +243,7 @@ def get_cuda_paths():
|
|
241
243
|
'libdevice': _get_libdevice_paths(),
|
242
244
|
'cudalib_dir': _get_cudalib_dir(),
|
243
245
|
'static_cudalib_dir': _get_static_cudalib_dir(),
|
246
|
+
'include_dir': _get_include_dir(),
|
244
247
|
}
|
245
248
|
# Cache result
|
246
249
|
get_cuda_paths._cached_result = d
|
@@ -256,3 +259,68 @@ def get_debian_pkg_libdevice():
|
|
256
259
|
if not os.path.exists(pkg_libdevice_location):
|
257
260
|
return None
|
258
261
|
return pkg_libdevice_location
|
262
|
+
|
263
|
+
|
264
|
+
def get_current_cuda_target_name():
|
265
|
+
"""Determine conda's CTK target folder based on system and machine arch.
|
266
|
+
|
267
|
+
CTK's conda package delivers headers based on its architecture type. For example,
|
268
|
+
`x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and
|
269
|
+
`aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the
|
270
|
+
nuances at cudart's conda feedstock:
|
271
|
+
https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501
|
272
|
+
"""
|
273
|
+
system = platform.system()
|
274
|
+
machine = platform.machine()
|
275
|
+
|
276
|
+
if system == "Linux":
|
277
|
+
arch_to_targets = {
|
278
|
+
'x86_64': 'x86_64-linux',
|
279
|
+
'aarch64': 'sbsa-linux'
|
280
|
+
}
|
281
|
+
elif system == "Windows":
|
282
|
+
arch_to_targets = {
|
283
|
+
'AMD64': 'x64',
|
284
|
+
}
|
285
|
+
else:
|
286
|
+
arch_to_targets = {}
|
287
|
+
|
288
|
+
return arch_to_targets.get(machine, None)
|
289
|
+
|
290
|
+
|
291
|
+
def get_conda_include_dir():
|
292
|
+
"""
|
293
|
+
Return the include directory in the current conda environment, if one
|
294
|
+
is active and it exists.
|
295
|
+
"""
|
296
|
+
is_conda_env = os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
|
297
|
+
if not is_conda_env:
|
298
|
+
return
|
299
|
+
|
300
|
+
if platform.system() == "Windows":
|
301
|
+
include_dir = os.path.join(
|
302
|
+
sys.prefix, 'Library', 'include'
|
303
|
+
)
|
304
|
+
elif target_name := get_current_cuda_target_name():
|
305
|
+
include_dir = os.path.join(
|
306
|
+
sys.prefix, 'targets', target_name, 'include'
|
307
|
+
)
|
308
|
+
else:
|
309
|
+
# A fallback when target cannot determined
|
310
|
+
# though usually it shouldn't.
|
311
|
+
include_dir = os.path.join(sys.prefix, 'include')
|
312
|
+
|
313
|
+
if os.path.exists(include_dir):
|
314
|
+
return include_dir
|
315
|
+
return
|
316
|
+
|
317
|
+
|
318
|
+
def _get_include_dir():
|
319
|
+
"""Find the root include directory."""
|
320
|
+
options = [
|
321
|
+
('Conda environment (NVIDIA package)', get_conda_include_dir()),
|
322
|
+
('CUDA_INCLUDE_PATH Config Entry', config.CUDA_INCLUDE_PATH),
|
323
|
+
# TODO: add others
|
324
|
+
]
|
325
|
+
by, include_dir = _find_valid_path(options)
|
326
|
+
return _env_path_tuple(by, include_dir)
|
@@ -876,7 +876,10 @@ def auto_device(obj, stream=0, copy=True, user_explicit=False):
|
|
876
876
|
sentry_contiguous(obj)
|
877
877
|
devobj = from_array_like(obj, stream=stream)
|
878
878
|
if copy:
|
879
|
-
if
|
879
|
+
if (
|
880
|
+
config.CUDA_WARN_ON_IMPLICIT_COPY
|
881
|
+
and not config.DISABLE_PERFORMANCE_WARNINGS
|
882
|
+
):
|
880
883
|
if (
|
881
884
|
not user_explicit and
|
882
885
|
(not isinstance(obj, DeviceNDArray)
|
@@ -18,6 +18,7 @@ from numba.misc.findlib import find_lib
|
|
18
18
|
from numba.cuda.cuda_paths import get_cuda_paths
|
19
19
|
from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
|
20
20
|
from numba.cuda.cudadrv.error import CudaSupportError
|
21
|
+
from numba.core import config
|
21
22
|
|
22
23
|
|
23
24
|
if sys.platform == 'win32':
|
@@ -60,6 +61,24 @@ def get_cudalib(lib, static=False):
|
|
60
61
|
return max(candidates) if candidates else namepattern % lib
|
61
62
|
|
62
63
|
|
64
|
+
def get_cuda_include_dir():
|
65
|
+
"""
|
66
|
+
Find the path to cuda include dir based on a list of default locations.
|
67
|
+
Note that this does not list the `CUDA_INCLUDE_PATH` entry in user
|
68
|
+
configuration.
|
69
|
+
"""
|
70
|
+
|
71
|
+
return get_cuda_paths()['include_dir'].info
|
72
|
+
|
73
|
+
|
74
|
+
def check_cuda_include_dir(path):
|
75
|
+
if path is None or not os.path.exists(path):
|
76
|
+
raise FileNotFoundError(f"{path} not found")
|
77
|
+
|
78
|
+
if not os.path.exists(os.path.join(path, "cuda_runtime.h")):
|
79
|
+
raise FileNotFoundError(f"Unable to find cuda_runtime.h from {path}")
|
80
|
+
|
81
|
+
|
63
82
|
def open_cudalib(lib):
|
64
83
|
path = get_cudalib(lib)
|
65
84
|
return ctypes.CDLL(path)
|
@@ -75,6 +94,8 @@ def _get_source_variable(lib, static=False):
|
|
75
94
|
return get_cuda_paths()['nvvm'].by
|
76
95
|
elif lib == 'libdevice':
|
77
96
|
return get_cuda_paths()['libdevice'].by
|
97
|
+
elif lib == 'include_dir':
|
98
|
+
return get_cuda_paths()['include_dir'].by
|
78
99
|
else:
|
79
100
|
dir_type = 'static_cudalib_dir' if static else 'cudalib_dir'
|
80
101
|
return get_cuda_paths()[dir_type].by
|
@@ -173,4 +194,21 @@ def test():
|
|
173
194
|
print('\tERROR: failed to find %s:\n%s' % (lib, e))
|
174
195
|
failed = True
|
175
196
|
|
197
|
+
# Check cuda include paths
|
198
|
+
|
199
|
+
print("Include directory configuration variable:")
|
200
|
+
print(f"\tCUDA_INCLUDE_PATH={config.CUDA_INCLUDE_PATH}")
|
201
|
+
|
202
|
+
where = _get_source_variable('include_dir')
|
203
|
+
print(f'Finding include directory from {where}')
|
204
|
+
include = get_cuda_include_dir()
|
205
|
+
print('\tLocated at', include)
|
206
|
+
try:
|
207
|
+
print('\tChecking include directory', end='...')
|
208
|
+
check_cuda_include_dir(include)
|
209
|
+
print('\tok')
|
210
|
+
except FileNotFoundError as e:
|
211
|
+
print('\tERROR: failed to find cuda include directory:\n%s' % e)
|
212
|
+
failed = True
|
213
|
+
|
176
214
|
return not failed
|
@@ -1,9 +1,8 @@
|
|
1
1
|
from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
|
2
2
|
from enum import IntEnum
|
3
|
-
from numba.core import config
|
4
3
|
from numba.cuda.cudadrv.error import (NvrtcError, NvrtcCompilationError,
|
5
4
|
NvrtcSupportError)
|
6
|
-
|
5
|
+
from numba.cuda.cuda_paths import get_cuda_paths
|
7
6
|
import functools
|
8
7
|
import os
|
9
8
|
import threading
|
@@ -233,12 +232,18 @@ def compile(src, name, cc):
|
|
233
232
|
# being optimized away.
|
234
233
|
major, minor = cc
|
235
234
|
arch = f'--gpu-architecture=compute_{major}{minor}'
|
236
|
-
|
235
|
+
|
236
|
+
cuda_include = [
|
237
|
+
f"-I{get_cuda_paths()['include_dir'].info}",
|
238
|
+
]
|
237
239
|
|
238
240
|
cudadrv_path = os.path.dirname(os.path.abspath(__file__))
|
239
241
|
numba_cuda_path = os.path.dirname(cudadrv_path)
|
240
242
|
numba_include = f'-I{numba_cuda_path}'
|
241
|
-
options = [arch,
|
243
|
+
options = [arch, *cuda_include, numba_include, '-rdc', 'true']
|
244
|
+
|
245
|
+
if nvrtc.get_version() < (12, 0):
|
246
|
+
options += ["-std=c++17"]
|
242
247
|
|
243
248
|
# Compile the program
|
244
249
|
compile_error = nvrtc.compile_program(program, options)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import numpy as np
|
2
2
|
import os
|
3
|
+
import re
|
3
4
|
import sys
|
4
5
|
import ctypes
|
5
6
|
import functools
|
@@ -43,6 +44,21 @@ class _Kernel(serialize.ReduceMixin):
|
|
43
44
|
object launches the kernel on the device.
|
44
45
|
'''
|
45
46
|
|
47
|
+
NRT_functions = [
|
48
|
+
"NRT_Allocate",
|
49
|
+
"NRT_MemInfo_init",
|
50
|
+
"NRT_MemInfo_new",
|
51
|
+
"NRT_Free",
|
52
|
+
"NRT_dealloc",
|
53
|
+
"NRT_MemInfo_destroy",
|
54
|
+
"NRT_MemInfo_call_dtor",
|
55
|
+
"NRT_MemInfo_data_fast",
|
56
|
+
"NRT_MemInfo_alloc_aligned",
|
57
|
+
"NRT_Allocate_External",
|
58
|
+
"NRT_decref",
|
59
|
+
"NRT_incref"
|
60
|
+
]
|
61
|
+
|
46
62
|
@global_compiler_lock
|
47
63
|
def __init__(self, py_func, argtypes, link=None, debug=False,
|
48
64
|
lineinfo=False, inline=False, fastmath=False, extensions=None,
|
@@ -105,16 +121,20 @@ class _Kernel(serialize.ReduceMixin):
|
|
105
121
|
if self.cooperative:
|
106
122
|
lib.needs_cudadevrt = True
|
107
123
|
|
124
|
+
basedir = os.path.dirname(os.path.abspath(__file__))
|
125
|
+
asm = lib.get_asm_str()
|
126
|
+
|
108
127
|
res = [fn for fn in cuda_fp16_math_funcs
|
109
|
-
if (f'__numba_wrapper_{fn}' in
|
128
|
+
if (f'__numba_wrapper_{fn}' in asm)]
|
110
129
|
|
111
130
|
if res:
|
112
131
|
# Path to the source containing the foreign function
|
113
|
-
basedir = os.path.dirname(os.path.abspath(__file__))
|
114
132
|
functions_cu_path = os.path.join(basedir,
|
115
133
|
'cpp_function_wrappers.cu')
|
116
134
|
link.append(functions_cu_path)
|
117
135
|
|
136
|
+
link = self.maybe_link_nrt(link, tgt_ctx, asm)
|
137
|
+
|
118
138
|
for filepath in link:
|
119
139
|
lib.add_linking_file(filepath)
|
120
140
|
|
@@ -136,6 +156,25 @@ class _Kernel(serialize.ReduceMixin):
|
|
136
156
|
self.lifted = []
|
137
157
|
self.reload_init = []
|
138
158
|
|
159
|
+
def maybe_link_nrt(self, link, tgt_ctx, asm):
|
160
|
+
if not tgt_ctx.enable_nrt:
|
161
|
+
return link
|
162
|
+
|
163
|
+
all_nrt = "|".join(self.NRT_functions)
|
164
|
+
pattern = (
|
165
|
+
r'\.extern\s+\.func\s+(?:\s*\(.+\)\s*)?('
|
166
|
+
+ all_nrt + r')\s*\([^)]*\)\s*;'
|
167
|
+
)
|
168
|
+
|
169
|
+
nrt_in_asm = re.findall(pattern, asm)
|
170
|
+
|
171
|
+
basedir = os.path.dirname(os.path.abspath(__file__))
|
172
|
+
if nrt_in_asm:
|
173
|
+
nrt_path = os.path.join(basedir, 'runtime', 'nrt.cu')
|
174
|
+
link.append(nrt_path)
|
175
|
+
|
176
|
+
return link
|
177
|
+
|
139
178
|
@property
|
140
179
|
def library(self):
|
141
180
|
return self._codelibrary
|
@@ -385,7 +424,6 @@ class _Kernel(serialize.ReduceMixin):
|
|
385
424
|
|
386
425
|
if isinstance(ty, types.Array):
|
387
426
|
devary = wrap_arg(val).to_device(retr, stream)
|
388
|
-
|
389
427
|
c_intp = ctypes.c_ssize_t
|
390
428
|
|
391
429
|
meminfo = ctypes.c_void_p(0)
|
@@ -519,7 +557,10 @@ class _LaunchConfiguration:
|
|
519
557
|
self.stream = stream
|
520
558
|
self.sharedmem = sharedmem
|
521
559
|
|
522
|
-
if
|
560
|
+
if (
|
561
|
+
config.CUDA_LOW_OCCUPANCY_WARNINGS
|
562
|
+
and not config.DISABLE_PERFORMANCE_WARNINGS
|
563
|
+
):
|
523
564
|
# Warn when the grid has fewer than 128 blocks. This number is
|
524
565
|
# chosen somewhat heuristically - ideally the minimum is 2 times
|
525
566
|
# the number of SMs, but the number of SMs varies between devices -
|
@@ -708,8 +749,7 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
|
|
708
749
|
*args*.
|
709
750
|
'''
|
710
751
|
cc = get_current_device().compute_capability
|
711
|
-
argtypes = tuple(
|
712
|
-
[self.typingctx.resolve_argument_type(a) for a in args])
|
752
|
+
argtypes = tuple(self.typeof_pyval(a) for a in args)
|
713
753
|
if self.specialized:
|
714
754
|
raise RuntimeError('Dispatcher already specialized')
|
715
755
|
|
@@ -0,0 +1,190 @@
|
|
1
|
+
#ifndef _NRT_H
|
2
|
+
#define _NRT_H
|
3
|
+
|
4
|
+
#include <cuda/atomic>
|
5
|
+
|
6
|
+
typedef void (*NRT_dtor_function)(void* ptr, size_t size, void* info);
|
7
|
+
typedef void (*NRT_dealloc_func)(void* ptr, void* dealloc_info);
|
8
|
+
|
9
|
+
typedef struct MemInfo NRT_MemInfo;
|
10
|
+
|
11
|
+
extern "C" {
|
12
|
+
struct MemInfo {
|
13
|
+
cuda::atomic<size_t, cuda::thread_scope_device> refct;
|
14
|
+
NRT_dtor_function dtor;
|
15
|
+
void* dtor_info;
|
16
|
+
void* data;
|
17
|
+
size_t size;
|
18
|
+
};
|
19
|
+
}
|
20
|
+
|
21
|
+
// Globally needed variables
|
22
|
+
struct NRT_MemSys {
|
23
|
+
struct {
|
24
|
+
bool enabled;
|
25
|
+
cuda::atomic<size_t, cuda::thread_scope_device> alloc;
|
26
|
+
cuda::atomic<size_t, cuda::thread_scope_device> free;
|
27
|
+
cuda::atomic<size_t, cuda::thread_scope_device> mi_alloc;
|
28
|
+
cuda::atomic<size_t, cuda::thread_scope_device> mi_free;
|
29
|
+
} stats;
|
30
|
+
};
|
31
|
+
|
32
|
+
static __device__ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo **mi);
|
33
|
+
static __device__ void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out);
|
34
|
+
extern "C" __device__ void* NRT_Allocate_External(size_t size);
|
35
|
+
|
36
|
+
/* The Memory System object */
|
37
|
+
__device__ NRT_MemSys* TheMSys;
|
38
|
+
|
39
|
+
extern "C" __device__ void* NRT_Allocate(size_t size)
|
40
|
+
{
|
41
|
+
void* ptr = NULL;
|
42
|
+
ptr = malloc(size);
|
43
|
+
// if (TheMSys->stats.enabled) { TheMSys->stats.alloc++; }
|
44
|
+
return ptr;
|
45
|
+
}
|
46
|
+
|
47
|
+
extern "C" __device__ void NRT_MemInfo_init(NRT_MemInfo* mi,
|
48
|
+
void* data,
|
49
|
+
size_t size,
|
50
|
+
NRT_dtor_function dtor,
|
51
|
+
void* dtor_info)
|
52
|
+
// NRT_MemSys* TheMSys)
|
53
|
+
{
|
54
|
+
mi->refct = 1; /* starts with 1 refct */
|
55
|
+
mi->dtor = dtor;
|
56
|
+
mi->dtor_info = dtor_info;
|
57
|
+
mi->data = data;
|
58
|
+
mi->size = size;
|
59
|
+
// if (TheMSys->stats.enabled) { TheMSys->stats.mi_alloc++; }
|
60
|
+
}
|
61
|
+
|
62
|
+
extern "C"
|
63
|
+
__device__ NRT_MemInfo* NRT_MemInfo_new(
|
64
|
+
void* data, size_t size, NRT_dtor_function dtor, void* dtor_info)
|
65
|
+
{
|
66
|
+
NRT_MemInfo* mi = (NRT_MemInfo*)NRT_Allocate(sizeof(NRT_MemInfo));
|
67
|
+
if (mi != NULL) { NRT_MemInfo_init(mi, data, size, dtor, dtor_info); }
|
68
|
+
return mi;
|
69
|
+
}
|
70
|
+
|
71
|
+
extern "C" __device__ void NRT_Free(void* ptr)
|
72
|
+
{
|
73
|
+
free(ptr);
|
74
|
+
//if (TheMSys->stats.enabled) { TheMSys->stats.free++; }
|
75
|
+
}
|
76
|
+
|
77
|
+
extern "C" __device__ void NRT_dealloc(NRT_MemInfo* mi)
|
78
|
+
{
|
79
|
+
NRT_Free(mi);
|
80
|
+
}
|
81
|
+
|
82
|
+
extern "C" __device__ void NRT_MemInfo_destroy(NRT_MemInfo* mi)
|
83
|
+
{
|
84
|
+
NRT_dealloc(mi);
|
85
|
+
//if (TheMSys->stats.enabled) { TheMSys->stats.mi_free++; }
|
86
|
+
}
|
87
|
+
extern "C" __device__ void NRT_MemInfo_call_dtor(NRT_MemInfo* mi)
|
88
|
+
{
|
89
|
+
if (mi->dtor) /* We have a destructor */
|
90
|
+
mi->dtor(mi->data, mi->size, NULL);
|
91
|
+
/* Clear and release MemInfo */
|
92
|
+
NRT_MemInfo_destroy(mi);
|
93
|
+
}
|
94
|
+
|
95
|
+
extern "C" __device__ void* NRT_MemInfo_data_fast(NRT_MemInfo *mi)
|
96
|
+
{
|
97
|
+
return mi->data;
|
98
|
+
}
|
99
|
+
|
100
|
+
extern "C" __device__ NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align) {
|
101
|
+
NRT_MemInfo *mi = NULL;
|
102
|
+
void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi);
|
103
|
+
if (data == NULL) {
|
104
|
+
return NULL; /* return early as allocation failed */
|
105
|
+
}
|
106
|
+
//NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_aligned %p\n", data));
|
107
|
+
NRT_MemInfo_init(mi, data, size, NULL, NULL);
|
108
|
+
return mi;
|
109
|
+
}
|
110
|
+
|
111
|
+
static
|
112
|
+
__device__ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align,
|
113
|
+
NRT_MemInfo **mi)
|
114
|
+
{
|
115
|
+
size_t offset = 0, intptr = 0, remainder = 0;
|
116
|
+
//NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data_align %p\n", allocator));
|
117
|
+
char *base = (char *)nrt_allocate_meminfo_and_data(size + 2 * align, mi);
|
118
|
+
if (base == NULL) {
|
119
|
+
return NULL; /* return early as allocation failed */
|
120
|
+
}
|
121
|
+
intptr = (size_t) base;
|
122
|
+
/*
|
123
|
+
* See if the allocation is aligned already...
|
124
|
+
* Check if align is a power of 2, if so the modulo can be avoided.
|
125
|
+
*/
|
126
|
+
if((align & (align - 1)) == 0)
|
127
|
+
{
|
128
|
+
remainder = intptr & (align - 1);
|
129
|
+
}
|
130
|
+
else
|
131
|
+
{
|
132
|
+
remainder = intptr % align;
|
133
|
+
}
|
134
|
+
if (remainder == 0){ /* Yes */
|
135
|
+
offset = 0;
|
136
|
+
} else { /* No, move forward `offset` bytes */
|
137
|
+
offset = align - remainder;
|
138
|
+
}
|
139
|
+
return (void*)((char *)base + offset);
|
140
|
+
}
|
141
|
+
|
142
|
+
static
|
143
|
+
__device__ void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out) {
|
144
|
+
NRT_MemInfo *mi = NULL;
|
145
|
+
//NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data %p\n", allocator));
|
146
|
+
char *base = (char *)NRT_Allocate_External(sizeof(NRT_MemInfo) + size);
|
147
|
+
if (base == NULL) {
|
148
|
+
*mi_out = NULL; /* set meminfo to NULL as allocation failed */
|
149
|
+
return NULL; /* return early as allocation failed */
|
150
|
+
}
|
151
|
+
mi = (NRT_MemInfo *) base;
|
152
|
+
*mi_out = mi;
|
153
|
+
return (void*)((char *)base + sizeof(NRT_MemInfo));
|
154
|
+
}
|
155
|
+
|
156
|
+
extern "C" __device__ void* NRT_Allocate_External(size_t size) {
|
157
|
+
void *ptr = NULL;
|
158
|
+
ptr = malloc(size);
|
159
|
+
//NRT_Debug(nrt_debug_print("NRT_Allocate_External bytes=%zu ptr=%p\n", size, ptr));
|
160
|
+
|
161
|
+
//if (TheMSys.stats.enabled)
|
162
|
+
//{
|
163
|
+
// TheMSys.stats.alloc++;
|
164
|
+
//}
|
165
|
+
return ptr;
|
166
|
+
}
|
167
|
+
|
168
|
+
|
169
|
+
/*
|
170
|
+
c++ version of the NRT_decref function that usually is added to
|
171
|
+
the final kernel link in PTX form by numba. This version may be
|
172
|
+
used by c++ APIs that accept ownership of live objects and must
|
173
|
+
manage them going forward.
|
174
|
+
*/
|
175
|
+
extern "C" __device__ void NRT_decref(NRT_MemInfo* mi)
|
176
|
+
{
|
177
|
+
if (mi != NULL) {
|
178
|
+
mi->refct--;
|
179
|
+
if (mi->refct == 0) { NRT_MemInfo_call_dtor(mi); }
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
#endif
|
184
|
+
|
185
|
+
extern "C" __device__ void NRT_incref(NRT_MemInfo* mi)
|
186
|
+
{
|
187
|
+
if (mi != NULL) {
|
188
|
+
mi->refct++;
|
189
|
+
}
|
190
|
+
}
|
@@ -35,6 +35,20 @@ class stream(object):
|
|
35
35
|
pass
|
36
36
|
|
37
37
|
|
38
|
+
# Default stream APIs. Since execution from the perspective of the host is
|
39
|
+
# synchronous in the simulator, these can be the same as the stream class.
|
40
|
+
default_stream = stream
|
41
|
+
legacy_default_stream = stream
|
42
|
+
per_thread_default_stream = stream
|
43
|
+
|
44
|
+
|
45
|
+
# There is no way to use external streams with the simulator. Since the
|
46
|
+
# implementation is not really using streams, we can't meaningfully interact
|
47
|
+
# with external ones.
|
48
|
+
def external_stream(ptr):
|
49
|
+
raise RuntimeError("External streams are unsupported in the simulator")
|
50
|
+
|
51
|
+
|
38
52
|
def synchronize():
|
39
53
|
pass
|
40
54
|
|
numba_cuda/numba/cuda/target.py
CHANGED
@@ -126,8 +126,8 @@ class TestPrint(CUDATestCase):
|
|
126
126
|
|
127
127
|
def test_bool(self):
|
128
128
|
output, _ = self.run_code(printbool_usecase)
|
129
|
-
expected = "True\nFalse\nTrue\nTrue\nFalse\nFalse"
|
130
|
-
self.
|
129
|
+
expected = "True\r?\nFalse\r?\nTrue\r?\nTrue\r?\nFalse\r?\nFalse"
|
130
|
+
self.assertRegex(output.strip(), expected)
|
131
131
|
|
132
132
|
def test_printempty(self):
|
133
133
|
output, _ = self.run_code(printempty_usecase)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
from numba.cuda.testing import (skip_on_cudasim, skip_unless_cudasim, unittest,
|
2
|
+
CUDATestCase)
|
3
|
+
from numba import cuda
|
4
|
+
|
5
|
+
# Basic tests that stream APIs execute on the hardware and in the simulator.
|
6
|
+
#
|
7
|
+
# Correctness of semantics is exercised elsewhere in the test suite (though we
|
8
|
+
# could improve the comprehensiveness of testing by adding more correctness
|
9
|
+
# tests here in future).
|
10
|
+
|
11
|
+
|
12
|
+
class TestStreamAPI(CUDATestCase):
|
13
|
+
def test_stream_create_and_sync(self):
|
14
|
+
s = cuda.stream()
|
15
|
+
s.synchronize()
|
16
|
+
|
17
|
+
def test_default_stream_create_and_sync(self):
|
18
|
+
s = cuda.default_stream()
|
19
|
+
s.synchronize()
|
20
|
+
|
21
|
+
def test_legacy_default_stream_create_and_sync(self):
|
22
|
+
s = cuda.legacy_default_stream()
|
23
|
+
s.synchronize()
|
24
|
+
|
25
|
+
def test_ptd_stream_create_and_sync(self):
|
26
|
+
s = cuda.per_thread_default_stream()
|
27
|
+
s.synchronize()
|
28
|
+
|
29
|
+
@skip_on_cudasim("External streams are unsupported on the simulator")
|
30
|
+
def test_external_stream_create(self):
|
31
|
+
# A dummy pointer value
|
32
|
+
ptr = 0x12345678
|
33
|
+
s = cuda.external_stream(ptr)
|
34
|
+
# We don't test synchronization on the stream because it's not a real
|
35
|
+
# stream - we used a dummy pointer for testing the API, so we just
|
36
|
+
# ensure that the stream handle matches the external stream pointer.
|
37
|
+
self.assertEqual(ptr, s.handle.value)
|
38
|
+
|
39
|
+
@skip_unless_cudasim("External streams are usable with hardware")
|
40
|
+
def test_external_stream_simulator_unavailable(self):
|
41
|
+
ptr = 0x12345678
|
42
|
+
msg = "External streams are unsupported in the simulator"
|
43
|
+
with self.assertRaisesRegex(RuntimeError, msg):
|
44
|
+
cuda.external_stream(ptr)
|
45
|
+
|
46
|
+
|
47
|
+
if __name__ == '__main__':
|
48
|
+
unittest.main()
|
@@ -0,0 +1,42 @@
|
|
1
|
+
|
2
|
+
from numba.core import errors, types
|
3
|
+
from numba.core.extending import overload
|
4
|
+
from numba.np.arrayobj import (_check_const_str_dtype, is_nonelike,
|
5
|
+
ty_parse_dtype, ty_parse_shape, numpy_empty_nd)
|
6
|
+
|
7
|
+
|
8
|
+
# Typical tests for allocation use array construction (e.g. np.zeros, np.empty,
|
9
|
+
# etc.) to induce allocations. These don't work in the CUDA target because they
|
10
|
+
# need keyword arguments, which are presently not supported properly in the
|
11
|
+
# CUDA target.
|
12
|
+
#
|
13
|
+
# To work around this, we can define our own function, that works like
|
14
|
+
# the desired one, except that it uses only positional arguments.
|
15
|
+
#
|
16
|
+
# Once the CUDA target supports keyword arguments, this workaround will no
|
17
|
+
# longer be necessary and the tests in this module should be switched to use
|
18
|
+
# the relevant NumPy functions instead.
|
19
|
+
def cuda_empty(shape, dtype):
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
@overload(cuda_empty)
|
24
|
+
def ol_cuda_empty(shape, dtype):
|
25
|
+
_check_const_str_dtype("empty", dtype)
|
26
|
+
if (dtype is float or
|
27
|
+
(isinstance(dtype, types.Function) and dtype.typing_key is float) or
|
28
|
+
is_nonelike(dtype)): #default
|
29
|
+
nb_dtype = types.double
|
30
|
+
else:
|
31
|
+
nb_dtype = ty_parse_dtype(dtype)
|
32
|
+
|
33
|
+
ndim = ty_parse_shape(shape)
|
34
|
+
if nb_dtype is not None and ndim is not None:
|
35
|
+
retty = types.Array(dtype=nb_dtype, ndim=ndim, layout='C')
|
36
|
+
|
37
|
+
def impl(shape, dtype):
|
38
|
+
return numpy_empty_nd(shape, dtype, retty)
|
39
|
+
return impl
|
40
|
+
else:
|
41
|
+
msg = f"Cannot parse input types to function np.empty({shape}, {dtype})"
|
42
|
+
raise errors.TypingError(msg)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
import re
|
2
|
+
import gc
|
3
|
+
import numpy as np
|
4
|
+
import unittest
|
5
|
+
from unittest.mock import patch
|
6
|
+
from numba.core.runtime import rtsys
|
7
|
+
from numba.tests.support import EnableNRTStatsMixin
|
8
|
+
from numba.cuda.testing import CUDATestCase
|
9
|
+
|
10
|
+
from .mock_numpy import cuda_empty
|
11
|
+
|
12
|
+
from numba import cuda
|
13
|
+
|
14
|
+
|
15
|
+
class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
16
|
+
|
17
|
+
def setUp(self):
|
18
|
+
# Clean up any NRT-backed objects hanging in a dead reference cycle
|
19
|
+
gc.collect()
|
20
|
+
super(TestNrtRefCt, self).setUp()
|
21
|
+
|
22
|
+
@unittest.expectedFailure
|
23
|
+
def test_no_return(self):
|
24
|
+
"""
|
25
|
+
Test issue #1291
|
26
|
+
"""
|
27
|
+
n = 10
|
28
|
+
|
29
|
+
@cuda.jit
|
30
|
+
def kernel():
|
31
|
+
for i in range(n):
|
32
|
+
temp = cuda_empty(2, np.float64) # noqa: F841
|
33
|
+
return None
|
34
|
+
|
35
|
+
init_stats = rtsys.get_allocation_stats()
|
36
|
+
|
37
|
+
with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
|
38
|
+
kernel[1,1]()
|
39
|
+
cur_stats = rtsys.get_allocation_stats()
|
40
|
+
self.assertEqual(cur_stats.alloc - init_stats.alloc, n)
|
41
|
+
self.assertEqual(cur_stats.free - init_stats.free, n)
|
42
|
+
|
43
|
+
|
44
|
+
class TestNrtBasic(CUDATestCase):
|
45
|
+
def test_nrt_launches(self):
|
46
|
+
@cuda.jit
|
47
|
+
def f(x):
|
48
|
+
return x[:5]
|
49
|
+
|
50
|
+
@cuda.jit
|
51
|
+
def g():
|
52
|
+
x = cuda_empty(10, np.int64)
|
53
|
+
f(x)
|
54
|
+
|
55
|
+
with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
|
56
|
+
g[1,1]()
|
57
|
+
cuda.synchronize()
|
58
|
+
|
59
|
+
def test_nrt_ptx_contains_refcount(self):
|
60
|
+
@cuda.jit
|
61
|
+
def f(x):
|
62
|
+
return x[:5]
|
63
|
+
|
64
|
+
@cuda.jit
|
65
|
+
def g():
|
66
|
+
x = cuda_empty(10, np.int64)
|
67
|
+
f(x)
|
68
|
+
|
69
|
+
with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
|
70
|
+
g[1,1]()
|
71
|
+
|
72
|
+
ptx = next(iter(g.inspect_asm().values()))
|
73
|
+
|
74
|
+
# The following checks that a `call` PTX instruction is
|
75
|
+
# emitted for NRT_MemInfo_alloc_aligned, NRT_incref and
|
76
|
+
# NRT_decref
|
77
|
+
p1 = r"call\.uni(.|\n)*NRT_MemInfo_alloc_aligned"
|
78
|
+
match = re.search(p1, ptx)
|
79
|
+
assert match is not None
|
80
|
+
|
81
|
+
p2 = r"call\.uni.*\n.*NRT_incref"
|
82
|
+
match = re.search(p2, ptx)
|
83
|
+
assert match is not None
|
84
|
+
|
85
|
+
p3 = r"call\.uni.*\n.*NRT_decref"
|
86
|
+
match = re.search(p3, ptx)
|
87
|
+
assert match is not None
|
88
|
+
|
89
|
+
def test_nrt_returns_correct(self):
|
90
|
+
@cuda.jit
|
91
|
+
def f(x):
|
92
|
+
return x[5:]
|
93
|
+
|
94
|
+
@cuda.jit
|
95
|
+
def g(out_ary):
|
96
|
+
x = cuda_empty(10, np.int64)
|
97
|
+
x[5] = 1
|
98
|
+
y = f(x)
|
99
|
+
out_ary[0] = y[0]
|
100
|
+
|
101
|
+
out_ary = np.zeros(1, dtype=np.int64)
|
102
|
+
|
103
|
+
with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
|
104
|
+
g[1,1](out_ary)
|
105
|
+
|
106
|
+
self.assertEqual(out_ary[0], 1)
|
107
|
+
|
108
|
+
|
109
|
+
if __name__ == '__main__':
|
110
|
+
unittest.main()
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
import argparse
|
4
4
|
import pathlib
|
5
|
+
import platform
|
5
6
|
import subprocess
|
6
7
|
import sys
|
7
8
|
|
@@ -56,7 +57,13 @@ def determine_include_flags():
|
|
56
57
|
print(f"Unexpected return code ({rc}) from `nvcc -v`. Expected 1.")
|
57
58
|
return None
|
58
59
|
|
59
|
-
|
60
|
+
# NVCC writes to stdout on Windows and stderr on Linux
|
61
|
+
if platform.system() == 'Windows':
|
62
|
+
stream = cp.stdout
|
63
|
+
else:
|
64
|
+
stream = cp.stderr
|
65
|
+
|
66
|
+
output = stream.decode()
|
60
67
|
lines = output.splitlines()
|
61
68
|
|
62
69
|
includes_lines = [line for line in lines if line.startswith("#$ INCLUDES=")]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
_numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
|
2
2
|
_numba_cuda_redirector.py,sha256=rc56rnb40w3AtrqnhS66JSgYTSTsi3iTn8yP3NuoQV8,2401
|
3
|
-
numba_cuda/VERSION,sha256=
|
3
|
+
numba_cuda/VERSION,sha256=K2Wn4BRtrXcEkuPZYGGM_h_Orgai6flc272777m5MYQ,7
|
4
4
|
numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
|
5
5
|
numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
|
6
6
|
numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
|
@@ -13,7 +13,7 @@ numba_cuda/numba/cuda/compiler.py,sha256=47SjuI5p4yWCujAglIq0Cb0ARO8QxRp4fOZropk
|
|
13
13
|
numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=iv84_F6Q9kFjV_kclrQz1msh6Dud8mI3qNkswTid7Qc,953
|
14
14
|
numba_cuda/numba/cuda/cuda_fp16.h,sha256=1IC0mdNdkvKbvAe0-f4uYVS7WFrVqOyI1nRUbBiqr6A,126844
|
15
15
|
numba_cuda/numba/cuda/cuda_fp16.hpp,sha256=vJ7NUr2X2tKhAP7ojydAiCoOjVO6n4QGoXD6m9Srrlw,89130
|
16
|
-
numba_cuda/numba/cuda/cuda_paths.py,sha256=
|
16
|
+
numba_cuda/numba/cuda/cuda_paths.py,sha256=wwZKOUS0FyZloRUgDVDPPCwtm3t6Js7U369_YgMpEC0,9859
|
17
17
|
numba_cuda/numba/cuda/cudadecl.py,sha256=ynUidit8oPGjedc6p1miMGtS20DOji3DiQHzwmx6m0s,23192
|
18
18
|
numba_cuda/numba/cuda/cudaimpl.py,sha256=3YMxQSCv2KClBrpuXGchrTNICV1F6NIjjL2rie5fDZ4,38628
|
19
19
|
numba_cuda/numba/cuda/cudamath.py,sha256=EFNtdzEytAZuwijdRoFGzVKCeal76UzzaNy7wUFQx8I,3978
|
@@ -21,7 +21,7 @@ numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZ
|
|
21
21
|
numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
|
22
22
|
numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
|
23
23
|
numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
|
24
|
-
numba_cuda/numba/cuda/dispatcher.py,sha256=
|
24
|
+
numba_cuda/numba/cuda/dispatcher.py,sha256=1ND28o_YeP_0YS2iFYwCH9Byc87qTvCVKjT7PHu2Fsg,41233
|
25
25
|
numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
|
26
26
|
numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
|
27
27
|
numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
|
@@ -38,33 +38,34 @@ numba_cuda/numba/cuda/printimpl.py,sha256=Y1BCQ7EgO2wQ7O6LibNVYBG3tmjVTvmURATW40
|
|
38
38
|
numba_cuda/numba/cuda/random.py,sha256=khX8iDdde_RTUPWhAqrxZacHRQAorFr7BokPuxRWzrg,10456
|
39
39
|
numba_cuda/numba/cuda/simulator_init.py,sha256=W_bPRtmPGOQVuiprbgt7ENnnnELv_LPCeLDIsfsvFZ8,460
|
40
40
|
numba_cuda/numba/cuda/stubs.py,sha256=W3tozv4ganMnfbdFqyPjgQXYeX8GQhwx_xXgv8jk6iM,22270
|
41
|
-
numba_cuda/numba/cuda/target.py,sha256=
|
41
|
+
numba_cuda/numba/cuda/target.py,sha256=hBflzmxCGlmTugWT1sYhZj9f4HkQAMK2RQ9lO85pMW4,17052
|
42
42
|
numba_cuda/numba/cuda/testing.py,sha256=E0wP2vfno1yWsl0v1zg31kpbU8FrKxTF-5y9Iv4WjA4,6412
|
43
43
|
numba_cuda/numba/cuda/types.py,sha256=WVfjcly_VUpG9FfKueiEPzZm2NV8Hg0XAFg3bNzPdVc,1314
|
44
44
|
numba_cuda/numba/cuda/ufuncs.py,sha256=txw27IxG80W1Yo7e-XwL2AMcQo0fMnxMjBIMy-n5pCo,23317
|
45
45
|
numba_cuda/numba/cuda/vector_types.py,sha256=s18dY0IUpT-RcaBvQsa_zEbYuuL2IT0Vh6afCeccwmQ,6750
|
46
46
|
numba_cuda/numba/cuda/vectorizers.py,sha256=u_0EzaD5tqVH8uOz4Gmqn3FgPC1rckwDAQuROm0BXm8,8915
|
47
47
|
numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=0TL4MZcJXUoo9qA7uu0vLv7eHrXRerVmyfi7O149ITw,199
|
48
|
-
numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=
|
48
|
+
numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=06kM7iFcx1TYiFhs1o9r1kyoA3k5yS7mFAdZDf6nrxA,31215
|
49
49
|
numba_cuda/numba/cuda/cudadrv/devices.py,sha256=6SneNmoq83gue0txFWWx4A65vViAa8xA06FzkApoqAk,7992
|
50
50
|
numba_cuda/numba/cuda/cudadrv/driver.py,sha256=uPjKugdtSJfIwVSAo3KgkvQhctbABkQphHAfcq6Q7ec,110892
|
51
51
|
numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=52ms3X6hfPaQB8E1jb6g7QKqRvHzBMlDQ-V2DM1rXxQ,17178
|
52
52
|
numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWxqEDJedpwDPEZ44,14209
|
53
53
|
numba_cuda/numba/cuda/cudadrv/enums.py,sha256=37zZmyrLvT-7R8wWtwKJkQhN8siLMxsDGiA3_NQ-yx8,23740
|
54
54
|
numba_cuda/numba/cuda/cudadrv/error.py,sha256=zEIryW6aIy8GG4ypmTliB6RgY4Gy2n8ckz7I6W99LUM,524
|
55
|
-
numba_cuda/numba/cuda/cudadrv/libs.py,sha256=
|
55
|
+
numba_cuda/numba/cuda/cudadrv/libs.py,sha256=Gk9zQ1CKcsZsWl-_9QneXeP9VH5q5R1I3Cx043UOytk,7240
|
56
56
|
numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=Q_YTv0apBo9t8pkMlKrthPPSVeLd376ZTmVDF5NtVVo,1328
|
57
57
|
numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
|
58
58
|
numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
|
59
|
-
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=
|
59
|
+
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=rv-XQo0snJj4xyEbfeBqivziIxCwMOQzIIEOnvLQaJI,9825
|
60
60
|
numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=v2hJJTAQeRmoG59-hnhgMEp5BSVA73QHtEoy636VKao,24107
|
61
61
|
numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=WdeUoWzsYNYodx8kMRLVIjnNs0QzwpCihd2Q0AaqItE,226
|
62
62
|
numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=Tj9ACrzQqNmDSO6xfpzw12EsQknSywQ-ZGuWMbDdHnQ,4255
|
63
63
|
numba_cuda/numba/cuda/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
64
|
numba_cuda/numba/cuda/kernels/reduction.py,sha256=fQnaWtoNB2yp143MNbE1DujqFIYy0KV_2moQVvbaROU,9362
|
65
65
|
numba_cuda/numba/cuda/kernels/transpose.py,sha256=5FSu-nbTfhintxwfU-bjT2px2otQF5QkKH-JPDDWq_k,2061
|
66
|
+
numba_cuda/numba/cuda/runtime/nrt.cu,sha256=i8Xcf-x84n3uNPzs_xak4c_sLHOH91ast2aE6DKKf9Q,5497
|
66
67
|
numba_cuda/numba/cuda/simulator/__init__.py,sha256=crW0VQ_8e7DMRSHKoAIziZ37ea5mpbh_49tR9M3d5YY,1610
|
67
|
-
numba_cuda/numba/cuda/simulator/api.py,sha256=
|
68
|
+
numba_cuda/numba/cuda/simulator/api.py,sha256=K_fX-w9X4grGx2IAp0XlBW9rth5l7wibMwinQvkE7Jc,3237
|
68
69
|
numba_cuda/numba/cuda/simulator/compiler.py,sha256=eXnvmzSKzIZZzBz6ZFJ-vMNyRAgqbCiB-AO5IJXuUyM,232
|
69
70
|
numba_cuda/numba/cuda/simulator/kernel.py,sha256=GO4HuXBlEstJtgiuMRB_6hjNizBSINR9_hganvMjHH4,10593
|
70
71
|
numba_cuda/numba/cuda/simulator/kernelapi.py,sha256=ZYC_XQqnA51TJCPlAjVHHkOjXeww0yUP6JZeibXw3T8,12397
|
@@ -175,7 +176,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_operator.py,sha256=0nJej4D898_JU-jhlif44
|
|
175
176
|
numba_cuda/numba/cuda/tests/cudapy/test_optimization.py,sha256=SvqRsSFgcGxkFDZS-kul5B-mi8GxINTS98uUzAy4dhw,2647
|
176
177
|
numba_cuda/numba/cuda/tests/cudapy/test_overload.py,sha256=u4yUDVFcV9E3NWMlNjM81e3IW4KaIkcDtXig8JYevsw,8538
|
177
178
|
numba_cuda/numba/cuda/tests/cudapy/test_powi.py,sha256=TI82rYRnkSnwv9VN6PMpBnr9JqMJ_F3HhH4cKY6O8tw,3276
|
178
|
-
numba_cuda/numba/cuda/tests/cudapy/test_print.py,sha256=
|
179
|
+
numba_cuda/numba/cuda/tests/cudapy/test_print.py,sha256=r2xmMNx80_ANi3uFB3CQt3AHAXG_JdhStY1S796hlK0,4466
|
179
180
|
numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py,sha256=R88Vfgg3mSAZ0Jy6WT6dJNmkFTsxnVnEmO7XqpqyxuU,986
|
180
181
|
numba_cuda/numba/cuda/tests/cudapy/test_random.py,sha256=rLw7_8a7BBhD_8GNqMal0l_AbWXzLs_Q0hC6_X8gdjA,3467
|
181
182
|
numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py,sha256=grR64kdRlsLcR0K3IxSfI2VKsTrrqxsXuROOpvj-6nw,18769
|
@@ -186,6 +187,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_serialize.py,sha256=alE5-lTwbjz3Tv6OvQPS
|
|
186
187
|
numba_cuda/numba/cuda/tests/cudapy/test_slicing.py,sha256=bAh_sIk5V9_0_dOVGdzmyjwZkHMLjEbQuEI4e5zRMoU,903
|
187
188
|
numba_cuda/numba/cuda/tests/cudapy/test_sm.py,sha256=kh1F0wwQ2_bd54Q4GUX99y2oiWHQwBpyC__ckk-jiTU,14575
|
188
189
|
numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py,sha256=bTXDjU94ezo6Bz_lktlPyowTcJHBOWfy7-nJB9e-B_s,7231
|
190
|
+
numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py,sha256=alwSPm2xLvuYEwzpuCE6UUkOp6xcEoVqZjyJk3VJjtY,1743
|
189
191
|
numba_cuda/numba/cuda/tests/cudapy/test_sync.py,sha256=Y851UqNkT80U9q_C05SQfvPRCY7jjRARHOMk6g0lU4Y,7837
|
190
192
|
numba_cuda/numba/cuda/tests/cudapy/test_transpose.py,sha256=JAQX2EUHwlpKCfJDGspaldmsIRbHxnXpsNUrvRrnIEE,3134
|
191
193
|
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py,sha256=-ehvkxelr45aT8sUNL9Hq8cn2GU_K4GL1yWeX-rHqEM,9680
|
@@ -227,12 +229,15 @@ numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py,sha256=o4DYocyHK7
|
|
227
229
|
numba_cuda/numba/cuda/tests/nocuda/test_import.py,sha256=teiL8rpFGQOh41kyBSSNHHFYAJYgpdStXkTcpK4_fxo,1641
|
228
230
|
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA9Ym-iT_B972bgFRu3UkRtwIgWtuI,7948
|
229
231
|
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
|
232
|
+
numba_cuda/numba/cuda/tests/nrt/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
|
233
|
+
numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=Qtn52GoKZ_ydre3oqkLWVdImC37tuPClUy4uHSutaJo,1568
|
234
|
+
numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=Ox6ei2DldvSSS-CndTXRxLnsvWdteOQNgn6GvKHB244,2789
|
230
235
|
numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=OFC_6irwscCNGAyJJKq7fTchzWosCUuiVWU02m0bcUQ,2248
|
231
|
-
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=
|
236
|
+
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=V0raLZLGSiWbE_K-JluI0CnmNkXbhlMVj-TH7P1OV8E,5014
|
232
237
|
numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
|
233
238
|
numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
|
234
|
-
numba_cuda-0.0.
|
235
|
-
numba_cuda-0.0.
|
236
|
-
numba_cuda-0.0.
|
237
|
-
numba_cuda-0.0.
|
238
|
-
numba_cuda-0.0.
|
239
|
+
numba_cuda-0.0.19.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
|
240
|
+
numba_cuda-0.0.19.dist-info/METADATA,sha256=GAWms3JiCaxTzo4WMk-5h31_Oqo8YFPgekLKFR_YfqA,1393
|
241
|
+
numba_cuda-0.0.19.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
242
|
+
numba_cuda-0.0.19.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
|
243
|
+
numba_cuda-0.0.19.dist-info/RECORD,,
|
File without changes
|
File without changes
|