numba-cuda 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +1 -20
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +5 -1
- numba_cuda/numba/cuda/dispatcher.py +9 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -0
- numba_cuda/numba/cuda/runtime/memsys.cu +94 -0
- numba_cuda/numba/cuda/runtime/memsys.cuh +17 -0
- numba_cuda/numba/cuda/runtime/nrt.cu +19 -22
- numba_cuda/numba/cuda/runtime/nrt.py +318 -0
- numba_cuda/numba/cuda/tests/__init__.py +1 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +31 -0
- numba_cuda/numba/cuda/tests/nrt/mock_numpy.py +105 -1
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +162 -40
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +114 -0
- numba_cuda/numba/cuda/tests/support.py +11 -0
- numba_cuda/numba/cuda/utils.py +22 -0
- {numba_cuda-0.3.0.dist-info → numba_cuda-0.4.0.dist-info}/METADATA +2 -2
- {numba_cuda-0.3.0.dist-info → numba_cuda-0.4.0.dist-info}/RECORD +22 -15
- {numba_cuda-0.3.0.dist-info → numba_cuda-0.4.0.dist-info}/WHEEL +1 -1
- {numba_cuda-0.3.0.dist-info → numba_cuda-0.4.0.dist-info}/LICENSE +0 -0
- {numba_cuda-0.3.0.dist-info → numba_cuda-0.4.0.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
@@ -570,10 +570,13 @@ class DeviceNDArray(DeviceNDArrayBase):
|
|
570
570
|
'''
|
571
571
|
return self._dummy.is_c_contig
|
572
572
|
|
573
|
-
def __array__(self, dtype=None):
|
573
|
+
def __array__(self, dtype=None, copy=None):
|
574
574
|
"""
|
575
575
|
:return: an `numpy.ndarray`, so copies to the host.
|
576
576
|
"""
|
577
|
+
if copy is False:
|
578
|
+
msg = "`copy=False` is not supported. A copy is always created."
|
579
|
+
raise ValueError(msg)
|
577
580
|
if dtype:
|
578
581
|
return self.copy_to_host().__array__(dtype)
|
579
582
|
else:
|
@@ -18,7 +18,6 @@ import functools
|
|
18
18
|
import warnings
|
19
19
|
import logging
|
20
20
|
import threading
|
21
|
-
import traceback
|
22
21
|
import asyncio
|
23
22
|
import pathlib
|
24
23
|
import subprocess
|
@@ -40,6 +39,7 @@ from .drvapi import API_PROTOTYPES
|
|
40
39
|
from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
|
41
40
|
from .mappings import FILE_EXTENSION_MAP
|
42
41
|
from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
|
42
|
+
from numba.cuda.utils import _readenv
|
43
43
|
from numba.cuda.cudadrv import enums, drvapi, nvrtc
|
44
44
|
|
45
45
|
try:
|
@@ -66,25 +66,6 @@ _py_decref.argtypes = [ctypes.py_object]
|
|
66
66
|
_py_incref.argtypes = [ctypes.py_object]
|
67
67
|
|
68
68
|
|
69
|
-
def _readenv(name, ctor, default):
|
70
|
-
value = os.environ.get(name)
|
71
|
-
if value is None:
|
72
|
-
return default() if callable(default) else default
|
73
|
-
try:
|
74
|
-
if ctor is bool:
|
75
|
-
return value.lower() in {'1', "true"}
|
76
|
-
return ctor(value)
|
77
|
-
except Exception:
|
78
|
-
warnings.warn(
|
79
|
-
f"Environment variable '{name}' is defined but its associated "
|
80
|
-
f"value '{value}' could not be parsed.\n"
|
81
|
-
"The parse failed with exception:\n"
|
82
|
-
f"{traceback.format_exc()}",
|
83
|
-
RuntimeWarning
|
84
|
-
)
|
85
|
-
return default
|
86
|
-
|
87
|
-
|
88
69
|
_MVC_ERROR_MESSAGE = (
|
89
70
|
"Minor version compatibility requires ptxcompiler and cubinlinker packages "
|
90
71
|
"to be available"
|
@@ -266,7 +266,11 @@ def compile(src, name, cc, ltoir=False):
|
|
266
266
|
cudadrv_path = os.path.dirname(os.path.abspath(__file__))
|
267
267
|
numba_cuda_path = os.path.dirname(cudadrv_path)
|
268
268
|
numba_include = f'-I{numba_cuda_path}'
|
269
|
-
|
269
|
+
|
270
|
+
nrt_path = os.path.join(numba_cuda_path, "runtime")
|
271
|
+
nrt_include = f'-I{nrt_path}'
|
272
|
+
|
273
|
+
options = [arch, *cuda_include, numba_include, nrt_include, '-rdc', 'true']
|
270
274
|
|
271
275
|
if ltoir:
|
272
276
|
options.append("-dlto")
|
@@ -21,6 +21,7 @@ from numba.cuda.descriptor import cuda_target
|
|
21
21
|
from numba.cuda.errors import (missing_launch_config_msg,
|
22
22
|
normalize_kernel_dimensions)
|
23
23
|
from numba.cuda import types as cuda_types
|
24
|
+
from numba.cuda.runtime.nrt import rtsys
|
24
25
|
|
25
26
|
from numba import cuda
|
26
27
|
from numba import _dispatcher
|
@@ -253,7 +254,14 @@ class _Kernel(serialize.ReduceMixin):
|
|
253
254
|
"""
|
254
255
|
Force binding to current CUDA context
|
255
256
|
"""
|
256
|
-
self._codelibrary.get_cufunc()
|
257
|
+
cufunc = self._codelibrary.get_cufunc()
|
258
|
+
|
259
|
+
if hasattr(self, "target_context") and self.target_context.enable_nrt:
|
260
|
+
rtsys.ensure_initialized()
|
261
|
+
rtsys.set_memsys_to_module(cufunc.module)
|
262
|
+
# We don't know which stream the kernel will be launched on, so
|
263
|
+
# we force synchronize here.
|
264
|
+
cuda.synchronize()
|
257
265
|
|
258
266
|
@property
|
259
267
|
def regs_per_thread(self):
|
@@ -0,0 +1 @@
|
|
1
|
+
from numba.cuda.runtime.nrt import rtsys # noqa: F401
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#include "memsys.cuh"
|
2
|
+
|
3
|
+
__device__ size_t memsys_size = sizeof(NRT_MemSys);
|
4
|
+
|
5
|
+
namespace detail
|
6
|
+
{
|
7
|
+
void __device__ check_memsys()
|
8
|
+
{
|
9
|
+
if (TheMSys == nullptr)
|
10
|
+
{
|
11
|
+
assert(false && "TheMSys pointer is null. Please use NRT_MemSys_set to set pointer first.");
|
12
|
+
}
|
13
|
+
}
|
14
|
+
}
|
15
|
+
|
16
|
+
extern "C" __global__ void NRT_MemSys_set(NRT_MemSys *memsys_ptr)
|
17
|
+
{
|
18
|
+
TheMSys = memsys_ptr;
|
19
|
+
}
|
20
|
+
|
21
|
+
extern "C" __global__ void NRT_MemSys_read(uint64_t *managed_memsys)
|
22
|
+
{
|
23
|
+
detail::check_memsys();
|
24
|
+
managed_memsys[0] = TheMSys->stats.alloc;
|
25
|
+
managed_memsys[1] = TheMSys->stats.free;
|
26
|
+
managed_memsys[2] = TheMSys->stats.mi_alloc;
|
27
|
+
managed_memsys[3] = TheMSys->stats.mi_free;
|
28
|
+
}
|
29
|
+
|
30
|
+
extern "C" __global__ void NRT_MemSys_read_alloc(uint64_t *managed_result)
|
31
|
+
{
|
32
|
+
detail::check_memsys();
|
33
|
+
managed_result[0] = TheMSys->stats.alloc;
|
34
|
+
}
|
35
|
+
|
36
|
+
extern "C" __global__ void NRT_MemSys_read_free(uint64_t *managed_result)
|
37
|
+
{
|
38
|
+
detail::check_memsys();
|
39
|
+
managed_result[0] = TheMSys->stats.free;
|
40
|
+
}
|
41
|
+
|
42
|
+
extern "C" __global__ void NRT_MemSys_read_mi_alloc(uint64_t *managed_result)
|
43
|
+
{
|
44
|
+
detail::check_memsys();
|
45
|
+
managed_result[0] = TheMSys->stats.mi_alloc;
|
46
|
+
}
|
47
|
+
|
48
|
+
extern "C" __global__ void NRT_MemSys_read_mi_free(uint64_t *managed_result)
|
49
|
+
{
|
50
|
+
detail::check_memsys();
|
51
|
+
managed_result[0] = TheMSys->stats.mi_free;
|
52
|
+
}
|
53
|
+
|
54
|
+
extern "C" __global__ void NRT_MemSys_init(void)
|
55
|
+
{
|
56
|
+
detail::check_memsys();
|
57
|
+
TheMSys->stats.enabled = false;
|
58
|
+
TheMSys->stats.alloc = 0;
|
59
|
+
TheMSys->stats.free = 0;
|
60
|
+
TheMSys->stats.mi_alloc = 0;
|
61
|
+
TheMSys->stats.mi_free = 0;
|
62
|
+
}
|
63
|
+
|
64
|
+
extern "C" __global__ void NRT_MemSys_enable_stats(void)
|
65
|
+
{
|
66
|
+
detail::check_memsys();
|
67
|
+
TheMSys->stats.enabled = true;
|
68
|
+
}
|
69
|
+
|
70
|
+
extern "C" __global__ void NRT_MemSys_disable_stats(void)
|
71
|
+
{
|
72
|
+
detail::check_memsys();
|
73
|
+
TheMSys->stats.enabled = false;
|
74
|
+
}
|
75
|
+
|
76
|
+
extern "C" __global__ void NRT_MemSys_stats_enabled(uint8_t *enabled)
|
77
|
+
{
|
78
|
+
detail::check_memsys();
|
79
|
+
*enabled = static_cast<uint8_t>(TheMSys->stats.enabled);
|
80
|
+
}
|
81
|
+
|
82
|
+
extern "C" __global__ void NRT_MemSys_print(void)
|
83
|
+
{
|
84
|
+
if (TheMSys != nullptr)
|
85
|
+
{
|
86
|
+
printf("TheMSys->stats.enabled %d\n", TheMSys->stats.enabled);
|
87
|
+
printf("TheMSys->stats.alloc %lu\n", TheMSys->stats.alloc.load());
|
88
|
+
printf("TheMSys->stats.free %lu\n", TheMSys->stats.free.load());
|
89
|
+
printf("TheMSys->stats.mi_alloc %lu\n", TheMSys->stats.mi_alloc.load());
|
90
|
+
printf("TheMSys->stats.mi_free %lu\n", TheMSys->stats.mi_free.load());
|
91
|
+
} else {
|
92
|
+
printf("TheMsys is null.\n");
|
93
|
+
}
|
94
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#include <cuda/atomic>
|
2
|
+
|
3
|
+
// Globally needed variables
|
4
|
+
struct NRT_MemSys {
|
5
|
+
struct {
|
6
|
+
bool enabled;
|
7
|
+
cuda::atomic<size_t, cuda::thread_scope_device> alloc;
|
8
|
+
cuda::atomic<size_t, cuda::thread_scope_device> free;
|
9
|
+
cuda::atomic<size_t, cuda::thread_scope_device> mi_alloc;
|
10
|
+
cuda::atomic<size_t, cuda::thread_scope_device> mi_free;
|
11
|
+
} stats;
|
12
|
+
};
|
13
|
+
|
14
|
+
/* The Memory System object */
|
15
|
+
__device__ NRT_MemSys* TheMSys;
|
16
|
+
|
17
|
+
extern "C" __global__ void NRT_MemSys_set(NRT_MemSys *memsys_ptr);
|
@@ -3,6 +3,8 @@
|
|
3
3
|
|
4
4
|
#include <cuda/atomic>
|
5
5
|
|
6
|
+
#include "memsys.cuh"
|
7
|
+
|
6
8
|
typedef void (*NRT_dtor_function)(void* ptr, size_t size, void* info);
|
7
9
|
typedef void (*NRT_dealloc_func)(void* ptr, void* dealloc_info);
|
8
10
|
|
@@ -18,29 +20,21 @@ struct MemInfo {
|
|
18
20
|
};
|
19
21
|
}
|
20
22
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
cuda::atomic<size_t, cuda::thread_scope_device> alloc;
|
26
|
-
cuda::atomic<size_t, cuda::thread_scope_device> free;
|
27
|
-
cuda::atomic<size_t, cuda::thread_scope_device> mi_alloc;
|
28
|
-
cuda::atomic<size_t, cuda::thread_scope_device> mi_free;
|
29
|
-
} stats;
|
30
|
-
};
|
23
|
+
extern "C" __global__ void NRT_MemSys_set(NRT_MemSys *memsys_ptr)
|
24
|
+
{
|
25
|
+
TheMSys = memsys_ptr;
|
26
|
+
}
|
31
27
|
|
32
28
|
static __device__ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo **mi);
|
33
29
|
static __device__ void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out);
|
34
30
|
extern "C" __device__ void* NRT_Allocate_External(size_t size);
|
35
31
|
|
36
|
-
/* The Memory System object */
|
37
|
-
__device__ NRT_MemSys* TheMSys;
|
38
|
-
|
39
32
|
extern "C" __device__ void* NRT_Allocate(size_t size)
|
40
33
|
{
|
41
34
|
void* ptr = NULL;
|
42
35
|
ptr = malloc(size);
|
43
|
-
|
36
|
+
if (TheMSys && TheMSys->stats.enabled) {
|
37
|
+
TheMSys->stats.alloc.fetch_add(1, cuda::memory_order_relaxed); }
|
44
38
|
return ptr;
|
45
39
|
}
|
46
40
|
|
@@ -49,14 +43,14 @@ extern "C" __device__ void NRT_MemInfo_init(NRT_MemInfo* mi,
|
|
49
43
|
size_t size,
|
50
44
|
NRT_dtor_function dtor,
|
51
45
|
void* dtor_info)
|
52
|
-
// NRT_MemSys* TheMSys)
|
53
46
|
{
|
54
47
|
mi->refct = 1; /* starts with 1 refct */
|
55
48
|
mi->dtor = dtor;
|
56
49
|
mi->dtor_info = dtor_info;
|
57
50
|
mi->data = data;
|
58
51
|
mi->size = size;
|
59
|
-
|
52
|
+
if (TheMSys && TheMSys->stats.enabled) {
|
53
|
+
TheMSys->stats.mi_alloc.fetch_add(1, cuda::memory_order_relaxed); }
|
60
54
|
}
|
61
55
|
|
62
56
|
extern "C"
|
@@ -71,7 +65,8 @@ __device__ NRT_MemInfo* NRT_MemInfo_new(
|
|
71
65
|
extern "C" __device__ void NRT_Free(void* ptr)
|
72
66
|
{
|
73
67
|
free(ptr);
|
74
|
-
|
68
|
+
if (TheMSys && TheMSys->stats.enabled) {
|
69
|
+
TheMSys->stats.free.fetch_add(1, cuda::memory_order_relaxed); }
|
75
70
|
}
|
76
71
|
|
77
72
|
extern "C" __device__ void NRT_dealloc(NRT_MemInfo* mi)
|
@@ -82,8 +77,10 @@ extern "C" __device__ void NRT_dealloc(NRT_MemInfo* mi)
|
|
82
77
|
extern "C" __device__ void NRT_MemInfo_destroy(NRT_MemInfo* mi)
|
83
78
|
{
|
84
79
|
NRT_dealloc(mi);
|
85
|
-
|
80
|
+
if (TheMSys && TheMSys->stats.enabled) {
|
81
|
+
TheMSys->stats.mi_free.fetch_add(1, cuda::memory_order_relaxed); }
|
86
82
|
}
|
83
|
+
|
87
84
|
extern "C" __device__ void NRT_MemInfo_call_dtor(NRT_MemInfo* mi)
|
88
85
|
{
|
89
86
|
if (mi->dtor) /* We have a destructor */
|
@@ -158,10 +155,10 @@ extern "C" __device__ void* NRT_Allocate_External(size_t size) {
|
|
158
155
|
ptr = malloc(size);
|
159
156
|
//NRT_Debug(nrt_debug_print("NRT_Allocate_External bytes=%zu ptr=%p\n", size, ptr));
|
160
157
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
158
|
+
if (TheMSys && TheMSys->stats.enabled)
|
159
|
+
{
|
160
|
+
TheMSys->stats.alloc.fetch_add(1, cuda::memory_order_relaxed);
|
161
|
+
}
|
165
162
|
return ptr;
|
166
163
|
}
|
167
164
|
|
@@ -0,0 +1,318 @@
|
|
1
|
+
import ctypes
|
2
|
+
import os
|
3
|
+
from functools import wraps
|
4
|
+
import numpy as np
|
5
|
+
|
6
|
+
from numba import cuda, config
|
7
|
+
from numba.core.runtime.nrt import _nrt_mstats
|
8
|
+
from numba.cuda.cudadrv.driver import Linker, driver, launch_kernel
|
9
|
+
from numba.cuda.cudadrv import devices
|
10
|
+
from numba.cuda.api import get_current_device
|
11
|
+
from numba.cuda.utils import _readenv
|
12
|
+
|
13
|
+
|
14
|
+
# Check environment variable or config for NRT statistics enablement
|
15
|
+
NRT_STATS = (
|
16
|
+
_readenv("NUMBA_CUDA_NRT_STATS", bool, False) or
|
17
|
+
getattr(config, "NUMBA_CUDA_NRT_STATS", False)
|
18
|
+
)
|
19
|
+
if not hasattr(config, "NUMBA_CUDA_NRT_STATS"):
|
20
|
+
config.CUDA_NRT_STATS = NRT_STATS
|
21
|
+
|
22
|
+
|
23
|
+
# Check environment variable or config for NRT enablement
|
24
|
+
ENABLE_NRT = (
|
25
|
+
_readenv("NUMBA_CUDA_ENABLE_NRT", bool, False) or
|
26
|
+
getattr(config, "NUMBA_CUDA_ENABLE_NRT", False)
|
27
|
+
)
|
28
|
+
if not hasattr(config, "NUMBA_CUDA_ENABLE_NRT"):
|
29
|
+
config.CUDA_ENABLE_NRT = ENABLE_NRT
|
30
|
+
|
31
|
+
|
32
|
+
# Protect method to ensure NRT memory allocation and initialization
|
33
|
+
def _alloc_init_guard(method):
|
34
|
+
"""
|
35
|
+
Ensure NRT memory allocation and initialization before running the method
|
36
|
+
"""
|
37
|
+
@wraps(method)
|
38
|
+
def wrapper(self, *args, **kwargs):
|
39
|
+
self.ensure_allocated()
|
40
|
+
self.ensure_initialized()
|
41
|
+
return method(self, *args, **kwargs)
|
42
|
+
return wrapper
|
43
|
+
|
44
|
+
|
45
|
+
class _Runtime:
|
46
|
+
"""Singleton class for Numba CUDA runtime"""
|
47
|
+
_instance = None
|
48
|
+
|
49
|
+
def __new__(cls, *args, **kwargs):
|
50
|
+
if cls._instance is None:
|
51
|
+
cls._instance = super(_Runtime, cls).__new__(cls, *args, **kwargs)
|
52
|
+
return cls._instance
|
53
|
+
|
54
|
+
def __init__(self):
|
55
|
+
"""Initialize memsys module and variable"""
|
56
|
+
self._memsys_module = None
|
57
|
+
self._memsys = None
|
58
|
+
self._initialized = False
|
59
|
+
|
60
|
+
def _compile_memsys_module(self):
|
61
|
+
"""
|
62
|
+
Compile memsys.cu and create a module from it in the current context
|
63
|
+
"""
|
64
|
+
# Define the path for memsys.cu
|
65
|
+
memsys_mod = os.path.join(
|
66
|
+
os.path.dirname(os.path.abspath(__file__)),
|
67
|
+
"memsys.cu"
|
68
|
+
)
|
69
|
+
cc = get_current_device().compute_capability
|
70
|
+
|
71
|
+
# Create a new linker instance and add the cu file
|
72
|
+
linker = Linker.new(cc=cc)
|
73
|
+
linker.add_cu_file(memsys_mod)
|
74
|
+
|
75
|
+
# Complete the linker and create a module from it
|
76
|
+
cubin = linker.complete()
|
77
|
+
ctx = devices.get_context()
|
78
|
+
module = ctx.create_module_image(cubin)
|
79
|
+
|
80
|
+
# Set the memsys module
|
81
|
+
self._memsys_module = module
|
82
|
+
|
83
|
+
def ensure_allocated(self, stream=None):
|
84
|
+
"""
|
85
|
+
If memsys is not allocated, allocate it; otherwise, perform a no-op
|
86
|
+
"""
|
87
|
+
if self._memsys is not None:
|
88
|
+
return
|
89
|
+
|
90
|
+
# Allocate the memsys
|
91
|
+
self.allocate(stream)
|
92
|
+
|
93
|
+
def allocate(self, stream=None):
|
94
|
+
"""
|
95
|
+
Allocate memsys on global memory
|
96
|
+
"""
|
97
|
+
from numba.cuda import device_array
|
98
|
+
|
99
|
+
# Check if memsys module is defined
|
100
|
+
if self._memsys_module is None:
|
101
|
+
# Compile the memsys module if not defined
|
102
|
+
self._compile_memsys_module()
|
103
|
+
|
104
|
+
# Allocate space for NRT_MemSys
|
105
|
+
ptr, nbytes = self._memsys_module.get_global_symbol("memsys_size")
|
106
|
+
memsys_size = ctypes.c_uint64()
|
107
|
+
driver.cuMemcpyDtoH(ctypes.addressof(memsys_size),
|
108
|
+
ptr.device_ctypes_pointer, nbytes)
|
109
|
+
self._memsys = device_array(
|
110
|
+
(memsys_size.value,), dtype="i1", stream=stream)
|
111
|
+
self.set_memsys_to_module(self._memsys_module, stream=stream)
|
112
|
+
|
113
|
+
def _single_thread_launch(self, module, stream, name, params=()):
|
114
|
+
"""
|
115
|
+
Launch the specified kernel with only 1 thread
|
116
|
+
"""
|
117
|
+
if stream is None:
|
118
|
+
stream = cuda.default_stream()
|
119
|
+
|
120
|
+
func = module.get_function(name)
|
121
|
+
launch_kernel(
|
122
|
+
func.handle,
|
123
|
+
1, 1, 1,
|
124
|
+
1, 1, 1,
|
125
|
+
0,
|
126
|
+
stream.handle,
|
127
|
+
params,
|
128
|
+
cooperative=False
|
129
|
+
)
|
130
|
+
|
131
|
+
def ensure_initialized(self, stream=None):
|
132
|
+
"""
|
133
|
+
If memsys is not initialized, initialize memsys
|
134
|
+
"""
|
135
|
+
if self._initialized:
|
136
|
+
return
|
137
|
+
|
138
|
+
# Initialize the memsys
|
139
|
+
self.initialize(stream)
|
140
|
+
|
141
|
+
def initialize(self, stream=None):
|
142
|
+
"""
|
143
|
+
Launch memsys initialization kernel
|
144
|
+
"""
|
145
|
+
self.ensure_allocated()
|
146
|
+
|
147
|
+
self._single_thread_launch(
|
148
|
+
self._memsys_module, stream, "NRT_MemSys_init")
|
149
|
+
self._initialized = True
|
150
|
+
|
151
|
+
if config.CUDA_NRT_STATS:
|
152
|
+
self.memsys_enable_stats()
|
153
|
+
|
154
|
+
@_alloc_init_guard
|
155
|
+
def memsys_enable_stats(self, stream=None):
|
156
|
+
"""
|
157
|
+
Enable memsys statistics
|
158
|
+
"""
|
159
|
+
self._single_thread_launch(
|
160
|
+
self._memsys_module, stream, "NRT_MemSys_enable_stats")
|
161
|
+
|
162
|
+
@_alloc_init_guard
|
163
|
+
def memsys_disable_stats(self, stream=None):
|
164
|
+
"""
|
165
|
+
Disable memsys statistics
|
166
|
+
"""
|
167
|
+
self._single_thread_launch(
|
168
|
+
self._memsys_module, stream, "NRT_MemSys_disable_stats")
|
169
|
+
|
170
|
+
@_alloc_init_guard
|
171
|
+
def memsys_stats_enabled(self, stream=None):
|
172
|
+
"""
|
173
|
+
Return a boolean indicating whether memsys is enabled. Synchronizes
|
174
|
+
context
|
175
|
+
"""
|
176
|
+
enabled_ar = cuda.managed_array(1, np.uint8)
|
177
|
+
|
178
|
+
self._single_thread_launch(
|
179
|
+
self._memsys_module,
|
180
|
+
stream,
|
181
|
+
"NRT_MemSys_stats_enabled",
|
182
|
+
(enabled_ar.device_ctypes_pointer,)
|
183
|
+
)
|
184
|
+
|
185
|
+
cuda.synchronize()
|
186
|
+
return bool(enabled_ar[0])
|
187
|
+
|
188
|
+
@_alloc_init_guard
|
189
|
+
def _copy_memsys_to_host(self, stream):
|
190
|
+
"""
|
191
|
+
Copy all statistics of memsys to the host
|
192
|
+
"""
|
193
|
+
dt = np.dtype([
|
194
|
+
('alloc', np.uint64),
|
195
|
+
('free', np.uint64),
|
196
|
+
('mi_alloc', np.uint64),
|
197
|
+
('mi_free', np.uint64)
|
198
|
+
])
|
199
|
+
|
200
|
+
stats_for_read = cuda.managed_array(1, dt)
|
201
|
+
|
202
|
+
self._single_thread_launch(
|
203
|
+
self._memsys_module,
|
204
|
+
stream,
|
205
|
+
"NRT_MemSys_read",
|
206
|
+
[stats_for_read.device_ctypes_pointer]
|
207
|
+
)
|
208
|
+
cuda.synchronize()
|
209
|
+
|
210
|
+
return stats_for_read[0]
|
211
|
+
|
212
|
+
@_alloc_init_guard
|
213
|
+
def get_allocation_stats(self, stream=None):
|
214
|
+
"""
|
215
|
+
Get the allocation statistics
|
216
|
+
"""
|
217
|
+
enabled = self.memsys_stats_enabled(stream)
|
218
|
+
if not enabled:
|
219
|
+
raise RuntimeError("NRT stats are disabled.")
|
220
|
+
memsys = self._copy_memsys_to_host(stream)
|
221
|
+
return _nrt_mstats(
|
222
|
+
alloc=memsys["alloc"],
|
223
|
+
free=memsys["free"],
|
224
|
+
mi_alloc=memsys["mi_alloc"],
|
225
|
+
mi_free=memsys["mi_free"]
|
226
|
+
)
|
227
|
+
|
228
|
+
@_alloc_init_guard
|
229
|
+
def _get_single_stat(self, stat, stream=None):
|
230
|
+
"""
|
231
|
+
Get a single stat from the memsys
|
232
|
+
"""
|
233
|
+
got = cuda.managed_array(1, np.uint64)
|
234
|
+
self._single_thread_launch(
|
235
|
+
self._memsys_module,
|
236
|
+
stream,
|
237
|
+
f"NRT_MemSys_read_{stat}",
|
238
|
+
[got.device_ctypes_pointer]
|
239
|
+
)
|
240
|
+
|
241
|
+
cuda.synchronize()
|
242
|
+
return got[0]
|
243
|
+
|
244
|
+
@_alloc_init_guard
|
245
|
+
def memsys_get_stats_alloc(self, stream=None):
|
246
|
+
"""
|
247
|
+
Get the allocation statistic
|
248
|
+
"""
|
249
|
+
enabled = self.memsys_stats_enabled(stream)
|
250
|
+
if not enabled:
|
251
|
+
raise RuntimeError("NRT stats are disabled.")
|
252
|
+
|
253
|
+
return self._get_single_stat("alloc")
|
254
|
+
|
255
|
+
@_alloc_init_guard
|
256
|
+
def memsys_get_stats_free(self, stream=None):
|
257
|
+
"""
|
258
|
+
Get the free statistic
|
259
|
+
"""
|
260
|
+
enabled = self.memsys_stats_enabled(stream)
|
261
|
+
if not enabled:
|
262
|
+
raise RuntimeError("NRT stats are disabled.")
|
263
|
+
|
264
|
+
return self._get_single_stat("free")
|
265
|
+
|
266
|
+
@_alloc_init_guard
|
267
|
+
def memsys_get_stats_mi_alloc(self, stream=None):
|
268
|
+
"""
|
269
|
+
Get the mi alloc statistic
|
270
|
+
"""
|
271
|
+
enabled = self.memsys_stats_enabled(stream)
|
272
|
+
if not enabled:
|
273
|
+
raise RuntimeError("NRT stats are disabled.")
|
274
|
+
|
275
|
+
return self._get_single_stat("mi_alloc")
|
276
|
+
|
277
|
+
@_alloc_init_guard
|
278
|
+
def memsys_get_stats_mi_free(self, stream=None):
|
279
|
+
"""
|
280
|
+
Get the mi free statistic
|
281
|
+
"""
|
282
|
+
enabled = self.memsys_stats_enabled(stream)
|
283
|
+
if not enabled:
|
284
|
+
raise RuntimeError("NRT stats are disabled.")
|
285
|
+
|
286
|
+
return self._get_single_stat("mi_free")
|
287
|
+
|
288
|
+
def set_memsys_to_module(self, module, stream=None):
|
289
|
+
"""
|
290
|
+
Set the memsys module. The module must contain `NRT_MemSys_set` kernel,
|
291
|
+
and declare a pointer to NRT_MemSys structure.
|
292
|
+
"""
|
293
|
+
if self._memsys is None:
|
294
|
+
raise RuntimeError(
|
295
|
+
"Please allocate NRT Memsys first before setting to module.")
|
296
|
+
|
297
|
+
self._single_thread_launch(
|
298
|
+
module,
|
299
|
+
stream,
|
300
|
+
"NRT_MemSys_set",
|
301
|
+
[self._memsys.device_ctypes_pointer,]
|
302
|
+
)
|
303
|
+
|
304
|
+
@_alloc_init_guard
|
305
|
+
def print_memsys(self, stream=None):
|
306
|
+
"""
|
307
|
+
Print the current statistics of memsys, for debugging purposes
|
308
|
+
"""
|
309
|
+
cuda.synchronize()
|
310
|
+
self._single_thread_launch(
|
311
|
+
self._memsys_module,
|
312
|
+
stream,
|
313
|
+
"NRT_MemSys_print"
|
314
|
+
)
|
315
|
+
|
316
|
+
|
317
|
+
# Create an instance of the runtime
|
318
|
+
rtsys = _Runtime()
|
@@ -49,6 +49,7 @@ def load_tests(loader, tests, pattern):
|
|
49
49
|
if gpus and gpus[0].compute_capability >= (2, 0):
|
50
50
|
suite.addTests(load_testsuite(loader, join(this_dir, 'cudadrv')))
|
51
51
|
suite.addTests(load_testsuite(loader, join(this_dir, 'cudapy')))
|
52
|
+
suite.addTests(load_testsuite(loader, join(this_dir, 'nrt')))
|
52
53
|
suite.addTests(load_testsuite(loader, join(this_dir,
|
53
54
|
'doc_examples')))
|
54
55
|
else:
|
@@ -4,6 +4,7 @@ from numba.cuda.cudadrv import devicearray
|
|
4
4
|
from numba import cuda
|
5
5
|
from numba.cuda.testing import unittest, CUDATestCase
|
6
6
|
from numba.cuda.testing import skip_on_cudasim
|
7
|
+
from numba.tests.support import IS_NUMPY_2
|
7
8
|
|
8
9
|
|
9
10
|
class TestCudaNDArray(CUDATestCase):
|
@@ -456,6 +457,36 @@ class TestCudaNDArray(CUDATestCase):
|
|
456
457
|
dev_array_from_host.copy_to_device(dev_array)
|
457
458
|
|
458
459
|
|
460
|
+
class TestArrayMethod(CUDATestCase):
|
461
|
+
"""Tests of the __array__() method via np.array"""
|
462
|
+
|
463
|
+
def test_np_array(self):
|
464
|
+
dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
|
465
|
+
host_array = np.array(dev_array)
|
466
|
+
np.testing.assert_equal(dev_array.copy_to_host(), host_array)
|
467
|
+
|
468
|
+
def test_np_array_dtype(self):
|
469
|
+
dtype = np.int32
|
470
|
+
dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
|
471
|
+
host_array = np.array(dev_array, dtype=dtype)
|
472
|
+
np.testing.assert_equal(
|
473
|
+
host_array,
|
474
|
+
dev_array.copy_to_host().astype(dtype)
|
475
|
+
)
|
476
|
+
|
477
|
+
@unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
|
478
|
+
def test_np_array_copy_false(self):
|
479
|
+
dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
|
480
|
+
with self.assertRaisesRegex(ValueError, "`copy=False` is not"):
|
481
|
+
np.array(dev_array, copy=False)
|
482
|
+
|
483
|
+
@unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
|
484
|
+
def test_np_array_copy_true(self):
|
485
|
+
dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
|
486
|
+
host_array = np.array(dev_array)
|
487
|
+
np.testing.assert_equal(dev_array.copy_to_host(), host_array)
|
488
|
+
|
489
|
+
|
459
490
|
class TestRecarray(CUDATestCase):
|
460
491
|
def test_recarray(self):
|
461
492
|
# From issue #4111
|
@@ -1,8 +1,12 @@
|
|
1
|
+
import math
|
2
|
+
|
3
|
+
import numpy as np
|
1
4
|
|
2
5
|
from numba.core import errors, types
|
3
6
|
from numba.core.extending import overload
|
4
7
|
from numba.np.arrayobj import (_check_const_str_dtype, is_nonelike,
|
5
|
-
ty_parse_dtype, ty_parse_shape, numpy_empty_nd
|
8
|
+
ty_parse_dtype, ty_parse_shape, numpy_empty_nd,
|
9
|
+
numpy_empty_like_nd)
|
6
10
|
|
7
11
|
|
8
12
|
# Typical tests for allocation use array construction (e.g. np.zeros, np.empty,
|
@@ -20,6 +24,18 @@ def cuda_empty(shape, dtype):
|
|
20
24
|
pass
|
21
25
|
|
22
26
|
|
27
|
+
def cuda_empty_like(arr):
|
28
|
+
pass
|
29
|
+
|
30
|
+
|
31
|
+
def cuda_arange(start):
|
32
|
+
pass
|
33
|
+
|
34
|
+
|
35
|
+
def cuda_ones(shape):
|
36
|
+
pass
|
37
|
+
|
38
|
+
|
23
39
|
@overload(cuda_empty)
|
24
40
|
def ol_cuda_empty(shape, dtype):
|
25
41
|
_check_const_str_dtype("empty", dtype)
|
@@ -40,3 +56,91 @@ def ol_cuda_empty(shape, dtype):
|
|
40
56
|
else:
|
41
57
|
msg = f"Cannot parse input types to function np.empty({shape}, {dtype})"
|
42
58
|
raise errors.TypingError(msg)
|
59
|
+
|
60
|
+
|
61
|
+
@overload(cuda_empty_like)
|
62
|
+
def ol_cuda_empty_like(arr):
|
63
|
+
|
64
|
+
if isinstance(arr, types.Array):
|
65
|
+
nb_dtype = arr.dtype
|
66
|
+
else:
|
67
|
+
nb_dtype = arr
|
68
|
+
|
69
|
+
if isinstance(arr, types.Array):
|
70
|
+
layout = arr.layout if arr.layout != 'A' else 'C'
|
71
|
+
retty = arr.copy(dtype=nb_dtype, layout=layout, readonly=False)
|
72
|
+
else:
|
73
|
+
retty = types.Array(nb_dtype, 0, 'C')
|
74
|
+
|
75
|
+
def impl(arr):
|
76
|
+
dtype = None
|
77
|
+
return numpy_empty_like_nd(arr, dtype, retty)
|
78
|
+
return impl
|
79
|
+
|
80
|
+
|
81
|
+
def _arange_dtype(*args):
|
82
|
+
bounds = [a for a in args if not isinstance(a, types.NoneType)]
|
83
|
+
|
84
|
+
if any(isinstance(a, types.Complex) for a in bounds):
|
85
|
+
dtype = types.complex128
|
86
|
+
elif any(isinstance(a, types.Float) for a in bounds):
|
87
|
+
dtype = types.float64
|
88
|
+
else:
|
89
|
+
# `np.arange(10).dtype` is always `np.dtype(int)`, aka `np.int_`, which
|
90
|
+
# in all released versions of numpy corresponds to the C `long` type.
|
91
|
+
# Windows 64 is broken by default here because Numba (as of 0.47) does
|
92
|
+
# not differentiate between Python and NumPy integers, so a `typeof(1)`
|
93
|
+
# on w64 is `int64`, i.e. `intp`. This means an arange(<some int>) will
|
94
|
+
# be typed as arange(int64) and the following will yield int64 opposed
|
95
|
+
# to int32. Example: without a load of analysis to work out of the args
|
96
|
+
# were wrapped in NumPy int*() calls it's not possible to detect the
|
97
|
+
# difference between `np.arange(10)` and `np.arange(np.int64(10)`.
|
98
|
+
NPY_TY = getattr(types, "int%s" % (8 * np.dtype(int).itemsize))
|
99
|
+
|
100
|
+
# unliteral these types such that `max` works.
|
101
|
+
unliteral_bounds = [types.unliteral(x) for x in bounds]
|
102
|
+
dtype = max(unliteral_bounds + [NPY_TY,])
|
103
|
+
|
104
|
+
return dtype
|
105
|
+
|
106
|
+
|
107
|
+
@overload(cuda_arange)
|
108
|
+
def ol_cuda_arange(start):
|
109
|
+
"""Simplified arange with just 1 argument."""
|
110
|
+
if (not isinstance(start, types.Number)):
|
111
|
+
return
|
112
|
+
|
113
|
+
start_value = getattr(start, "literal_value", None)
|
114
|
+
|
115
|
+
def impl(start):
|
116
|
+
# Allow for improved performance if given literal arguments.
|
117
|
+
lit_start = start_value if start_value is not None else start
|
118
|
+
|
119
|
+
_step = 1
|
120
|
+
_start, _stop = 0, lit_start
|
121
|
+
|
122
|
+
nitems_c = (_stop - _start) / _step
|
123
|
+
nitems_r = int(math.ceil(nitems_c.real))
|
124
|
+
|
125
|
+
# Binary operator needed for compiler branch pruning.
|
126
|
+
nitems = max(nitems_r, 0)
|
127
|
+
|
128
|
+
arr = cuda_empty(nitems, np.int64)
|
129
|
+
val = _start
|
130
|
+
for i in range(nitems):
|
131
|
+
arr[i] = val + (i * _step)
|
132
|
+
return arr
|
133
|
+
|
134
|
+
return impl
|
135
|
+
|
136
|
+
|
137
|
+
@overload(cuda_ones)
|
138
|
+
def ol_cuda_ones(shape):
|
139
|
+
|
140
|
+
def impl(shape):
|
141
|
+
arr = cuda_empty(shape, np.float64)
|
142
|
+
arr_flat = arr.flat
|
143
|
+
for idx in range(len(arr_flat)):
|
144
|
+
arr_flat[idx] = 1
|
145
|
+
return arr
|
146
|
+
return impl
|
@@ -1,47 +1,22 @@
|
|
1
1
|
import re
|
2
|
-
import
|
2
|
+
import os
|
3
|
+
|
3
4
|
import numpy as np
|
4
5
|
import unittest
|
5
|
-
from unittest.mock import patch
|
6
|
-
from numba.core.runtime import rtsys
|
7
|
-
from numba.tests.support import EnableNRTStatsMixin
|
8
6
|
from numba.cuda.testing import CUDATestCase
|
9
7
|
|
10
|
-
from .mock_numpy import cuda_empty
|
8
|
+
from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_ones, cuda_arange
|
9
|
+
from numba.tests.support import run_in_subprocess, override_config
|
11
10
|
|
12
11
|
from numba import cuda
|
13
|
-
|
14
|
-
|
15
|
-
class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
16
|
-
|
17
|
-
def setUp(self):
|
18
|
-
# Clean up any NRT-backed objects hanging in a dead reference cycle
|
19
|
-
gc.collect()
|
20
|
-
super(TestNrtRefCt, self).setUp()
|
21
|
-
|
22
|
-
@unittest.expectedFailure
|
23
|
-
def test_no_return(self):
|
24
|
-
"""
|
25
|
-
Test issue #1291
|
26
|
-
"""
|
27
|
-
n = 10
|
28
|
-
|
29
|
-
@cuda.jit
|
30
|
-
def kernel():
|
31
|
-
for i in range(n):
|
32
|
-
temp = cuda_empty(2, np.float64) # noqa: F841
|
33
|
-
return None
|
34
|
-
|
35
|
-
init_stats = rtsys.get_allocation_stats()
|
36
|
-
|
37
|
-
with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
|
38
|
-
kernel[1,1]()
|
39
|
-
cur_stats = rtsys.get_allocation_stats()
|
40
|
-
self.assertEqual(cur_stats.alloc - init_stats.alloc, n)
|
41
|
-
self.assertEqual(cur_stats.free - init_stats.free, n)
|
12
|
+
from numba.cuda.runtime.nrt import rtsys
|
42
13
|
|
43
14
|
|
44
15
|
class TestNrtBasic(CUDATestCase):
|
16
|
+
def run(self, result=None):
|
17
|
+
with override_config("CUDA_ENABLE_NRT", True):
|
18
|
+
super(TestNrtBasic, self).run(result)
|
19
|
+
|
45
20
|
def test_nrt_launches(self):
|
46
21
|
@cuda.jit
|
47
22
|
def f(x):
|
@@ -52,8 +27,7 @@ class TestNrtBasic(CUDATestCase):
|
|
52
27
|
x = cuda_empty(10, np.int64)
|
53
28
|
f(x)
|
54
29
|
|
55
|
-
|
56
|
-
g[1,1]()
|
30
|
+
g[1,1]()
|
57
31
|
cuda.synchronize()
|
58
32
|
|
59
33
|
def test_nrt_ptx_contains_refcount(self):
|
@@ -66,8 +40,7 @@ class TestNrtBasic(CUDATestCase):
|
|
66
40
|
x = cuda_empty(10, np.int64)
|
67
41
|
f(x)
|
68
42
|
|
69
|
-
|
70
|
-
g[1,1]()
|
43
|
+
g[1,1]()
|
71
44
|
|
72
45
|
ptx = next(iter(g.inspect_asm().values()))
|
73
46
|
|
@@ -100,11 +73,160 @@ class TestNrtBasic(CUDATestCase):
|
|
100
73
|
|
101
74
|
out_ary = np.zeros(1, dtype=np.int64)
|
102
75
|
|
103
|
-
|
104
|
-
g[1,1](out_ary)
|
76
|
+
g[1,1](out_ary)
|
105
77
|
|
106
78
|
self.assertEqual(out_ary[0], 1)
|
107
79
|
|
108
80
|
|
81
|
+
class TestNrtStatistics(CUDATestCase):
|
82
|
+
|
83
|
+
def setUp(self):
|
84
|
+
self._stream = cuda.default_stream()
|
85
|
+
# Store the current stats state
|
86
|
+
self.__stats_state = rtsys.memsys_stats_enabled(self._stream)
|
87
|
+
|
88
|
+
def tearDown(self):
|
89
|
+
# Set stats state back to whatever it was before the test ran
|
90
|
+
if self.__stats_state:
|
91
|
+
rtsys.memsys_enable_stats(self._stream)
|
92
|
+
else:
|
93
|
+
rtsys.memsys_disable_stats(self._stream)
|
94
|
+
|
95
|
+
def test_stats_env_var_explicit_on(self):
|
96
|
+
# Checks that explicitly turning the stats on via the env var works.
|
97
|
+
src = """if 1:
|
98
|
+
from numba import cuda
|
99
|
+
from numba.cuda.runtime import rtsys
|
100
|
+
from numba.cuda.tests.nrt.mock_numpy import cuda_arange
|
101
|
+
|
102
|
+
@cuda.jit
|
103
|
+
def foo():
|
104
|
+
x = cuda_arange(10)[0]
|
105
|
+
|
106
|
+
# initialize the NRT before use
|
107
|
+
rtsys.initialize()
|
108
|
+
assert rtsys.memsys_stats_enabled(), "Stats not enabled"
|
109
|
+
orig_stats = rtsys.get_allocation_stats()
|
110
|
+
foo[1, 1]()
|
111
|
+
new_stats = rtsys.get_allocation_stats()
|
112
|
+
total_alloc = new_stats.alloc - orig_stats.alloc
|
113
|
+
total_free = new_stats.free - orig_stats.free
|
114
|
+
total_mi_alloc = new_stats.mi_alloc - orig_stats.mi_alloc
|
115
|
+
total_mi_free = new_stats.mi_free - orig_stats.mi_free
|
116
|
+
|
117
|
+
expected = 1
|
118
|
+
assert total_alloc == expected, \\
|
119
|
+
f"total_alloc != expected, {total_alloc} != {expected}"
|
120
|
+
assert total_free == expected, \\
|
121
|
+
f"total_free != expected, {total_free} != {expected}"
|
122
|
+
assert total_mi_alloc == expected, \\
|
123
|
+
f"total_mi_alloc != expected, {total_mi_alloc} != {expected}"
|
124
|
+
assert total_mi_free == expected, \\
|
125
|
+
f"total_mi_free != expected, {total_mi_free} != {expected}"
|
126
|
+
"""
|
127
|
+
|
128
|
+
# Check env var explicitly being set works
|
129
|
+
env = os.environ.copy()
|
130
|
+
env['NUMBA_CUDA_NRT_STATS'] = "1"
|
131
|
+
env['NUMBA_CUDA_ENABLE_NRT'] = "1"
|
132
|
+
run_in_subprocess(src, env=env)
|
133
|
+
|
134
|
+
def check_env_var_off(self, env):
|
135
|
+
|
136
|
+
src = """if 1:
|
137
|
+
from numba import cuda
|
138
|
+
import numpy as np
|
139
|
+
from numba.cuda.runtime import rtsys
|
140
|
+
|
141
|
+
@cuda.jit
|
142
|
+
def foo():
|
143
|
+
arr = np.arange(10)[0]
|
144
|
+
|
145
|
+
assert rtsys.memsys_stats_enabled() == False
|
146
|
+
try:
|
147
|
+
rtsys.get_allocation_stats()
|
148
|
+
except RuntimeError as e:
|
149
|
+
assert "NRT stats are disabled." in str(e)
|
150
|
+
"""
|
151
|
+
run_in_subprocess(src, env=env)
|
152
|
+
|
153
|
+
def test_stats_env_var_explicit_off(self):
|
154
|
+
# Checks that explicitly turning the stats off via the env var works.
|
155
|
+
env = os.environ.copy()
|
156
|
+
env['NUMBA_CUDA_NRT_STATS'] = "0"
|
157
|
+
self.check_env_var_off(env)
|
158
|
+
|
159
|
+
def test_stats_env_var_default_off(self):
|
160
|
+
# Checks that the env var not being set is the same as "off", i.e.
|
161
|
+
# default for Numba is off.
|
162
|
+
env = os.environ.copy()
|
163
|
+
env.pop('NUMBA_CUDA_NRT_STATS', None)
|
164
|
+
self.check_env_var_off(env)
|
165
|
+
|
166
|
+
def test_stats_status_toggle(self):
|
167
|
+
|
168
|
+
@cuda.jit
|
169
|
+
def foo():
|
170
|
+
tmp = cuda_ones(3)
|
171
|
+
arr = cuda_arange(5 * tmp[0]) # noqa: F841
|
172
|
+
return None
|
173
|
+
|
174
|
+
with override_config('CUDA_ENABLE_NRT', True):
|
175
|
+
# Switch on stats
|
176
|
+
rtsys.memsys_enable_stats()
|
177
|
+
# check the stats are on
|
178
|
+
self.assertTrue(rtsys.memsys_stats_enabled())
|
179
|
+
|
180
|
+
for i in range(2):
|
181
|
+
# capture the stats state
|
182
|
+
stats_1 = rtsys.get_allocation_stats()
|
183
|
+
# Switch off stats
|
184
|
+
rtsys.memsys_disable_stats()
|
185
|
+
# check the stats are off
|
186
|
+
self.assertFalse(rtsys.memsys_stats_enabled())
|
187
|
+
# run something that would move the counters were they enabled
|
188
|
+
foo[1, 1]()
|
189
|
+
# Switch on stats
|
190
|
+
rtsys.memsys_enable_stats()
|
191
|
+
# check the stats are on
|
192
|
+
self.assertTrue(rtsys.memsys_stats_enabled())
|
193
|
+
# capture the stats state (should not have changed)
|
194
|
+
stats_2 = rtsys.get_allocation_stats()
|
195
|
+
# run something that will move the counters
|
196
|
+
foo[1, 1]()
|
197
|
+
# capture the stats state (should have changed)
|
198
|
+
stats_3 = rtsys.get_allocation_stats()
|
199
|
+
# check stats_1 == stats_2
|
200
|
+
self.assertEqual(stats_1, stats_2)
|
201
|
+
# check stats_2 < stats_3
|
202
|
+
self.assertLess(stats_2, stats_3)
|
203
|
+
|
204
|
+
def test_rtsys_stats_query_raises_exception_when_disabled(self):
|
205
|
+
# Checks that the standard rtsys.get_allocation_stats() query raises
|
206
|
+
# when stats counters are turned off.
|
207
|
+
|
208
|
+
rtsys.memsys_disable_stats()
|
209
|
+
self.assertFalse(rtsys.memsys_stats_enabled())
|
210
|
+
|
211
|
+
with self.assertRaises(RuntimeError) as raises:
|
212
|
+
rtsys.get_allocation_stats()
|
213
|
+
|
214
|
+
self.assertIn("NRT stats are disabled.", str(raises.exception))
|
215
|
+
|
216
|
+
def test_nrt_explicit_stats_query_raises_exception_when_disabled(self):
|
217
|
+
# Checks the various memsys_get_stats functions raise if queried when
|
218
|
+
# the stats counters are disabled.
|
219
|
+
method_variations = ('alloc', 'free', 'mi_alloc', 'mi_free')
|
220
|
+
for meth in method_variations:
|
221
|
+
stats_func = getattr(rtsys, f'memsys_get_stats_{meth}')
|
222
|
+
with self.subTest(stats_func=stats_func):
|
223
|
+
# Turn stats off
|
224
|
+
rtsys.memsys_disable_stats()
|
225
|
+
self.assertFalse(rtsys.memsys_stats_enabled())
|
226
|
+
with self.assertRaises(RuntimeError) as raises:
|
227
|
+
stats_func()
|
228
|
+
self.assertIn("NRT stats are disabled.", str(raises.exception))
|
229
|
+
|
230
|
+
|
109
231
|
if __name__ == '__main__':
|
110
232
|
unittest.main()
|
@@ -0,0 +1,114 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import unittest
|
3
|
+
from numba.tests.support import override_config
|
4
|
+
from numba.cuda.runtime import rtsys
|
5
|
+
from numba.cuda.tests.support import EnableNRTStatsMixin
|
6
|
+
from numba.cuda.testing import CUDATestCase
|
7
|
+
from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_empty_like
|
8
|
+
|
9
|
+
from numba import cuda
|
10
|
+
|
11
|
+
|
12
|
+
class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
13
|
+
|
14
|
+
def setUp(self):
|
15
|
+
super(TestNrtRefCt, self).setUp()
|
16
|
+
|
17
|
+
def tearDown(self):
|
18
|
+
super(TestNrtRefCt, self).tearDown()
|
19
|
+
|
20
|
+
def run(self, result=None):
|
21
|
+
with override_config("CUDA_ENABLE_NRT", True):
|
22
|
+
super(TestNrtRefCt, self).run(result)
|
23
|
+
|
24
|
+
def test_no_return(self):
|
25
|
+
"""
|
26
|
+
Test issue #1291
|
27
|
+
"""
|
28
|
+
|
29
|
+
n = 10
|
30
|
+
|
31
|
+
@cuda.jit
|
32
|
+
def kernel():
|
33
|
+
for i in range(n):
|
34
|
+
temp = cuda_empty(2, np.float64) # noqa: F841
|
35
|
+
return None
|
36
|
+
|
37
|
+
init_stats = rtsys.get_allocation_stats()
|
38
|
+
kernel[1, 1]()
|
39
|
+
cur_stats = rtsys.get_allocation_stats()
|
40
|
+
self.assertEqual(cur_stats.alloc - init_stats.alloc, n)
|
41
|
+
self.assertEqual(cur_stats.free - init_stats.free, n)
|
42
|
+
|
43
|
+
def test_escaping_var_init_in_loop(self):
|
44
|
+
"""
|
45
|
+
Test issue #1297
|
46
|
+
"""
|
47
|
+
|
48
|
+
@cuda.jit
|
49
|
+
def g(n):
|
50
|
+
|
51
|
+
x = cuda_empty((n, 2), np.float64)
|
52
|
+
|
53
|
+
for i in range(n):
|
54
|
+
y = x[i]
|
55
|
+
|
56
|
+
for i in range(n):
|
57
|
+
y = x[i] # noqa: F841
|
58
|
+
|
59
|
+
return None
|
60
|
+
|
61
|
+
init_stats = rtsys.get_allocation_stats()
|
62
|
+
g[1, 1](10)
|
63
|
+
cur_stats = rtsys.get_allocation_stats()
|
64
|
+
self.assertEqual(cur_stats.alloc - init_stats.alloc, 1)
|
65
|
+
self.assertEqual(cur_stats.free - init_stats.free, 1)
|
66
|
+
|
67
|
+
def test_invalid_computation_of_lifetime(self):
|
68
|
+
"""
|
69
|
+
Test issue #1573
|
70
|
+
"""
|
71
|
+
@cuda.jit
|
72
|
+
def if_with_allocation_and_initialization(arr1, test1):
|
73
|
+
tmp_arr = cuda_empty_like(arr1)
|
74
|
+
|
75
|
+
for i in range(tmp_arr.shape[0]):
|
76
|
+
pass
|
77
|
+
|
78
|
+
if test1:
|
79
|
+
cuda_empty_like(arr1)
|
80
|
+
|
81
|
+
arr = np.random.random((5, 5)) # the values are not consumed
|
82
|
+
|
83
|
+
init_stats = rtsys.get_allocation_stats()
|
84
|
+
if_with_allocation_and_initialization[1, 1](arr, False)
|
85
|
+
cur_stats = rtsys.get_allocation_stats()
|
86
|
+
self.assertEqual(cur_stats.alloc - init_stats.alloc,
|
87
|
+
cur_stats.free - init_stats.free)
|
88
|
+
|
89
|
+
def test_del_at_beginning_of_loop(self):
|
90
|
+
"""
|
91
|
+
Test issue #1734
|
92
|
+
"""
|
93
|
+
@cuda.jit
|
94
|
+
def f(arr):
|
95
|
+
res = 0
|
96
|
+
|
97
|
+
for i in (0, 1):
|
98
|
+
# `del t` is issued here before defining t. It must be
|
99
|
+
# correctly handled by the lowering phase.
|
100
|
+
t = arr[i]
|
101
|
+
if t[i] > 1:
|
102
|
+
res += t[i]
|
103
|
+
|
104
|
+
arr = np.ones((2, 2))
|
105
|
+
|
106
|
+
init_stats = rtsys.get_allocation_stats()
|
107
|
+
f[1, 1](arr)
|
108
|
+
cur_stats = rtsys.get_allocation_stats()
|
109
|
+
self.assertEqual(cur_stats.alloc - init_stats.alloc,
|
110
|
+
cur_stats.free - init_stats.free)
|
111
|
+
|
112
|
+
|
113
|
+
if __name__ == '__main__':
|
114
|
+
unittest.main()
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import os
|
2
|
+
import warnings
|
3
|
+
import traceback
|
4
|
+
|
5
|
+
|
6
|
+
def _readenv(name, ctor, default):
|
7
|
+
value = os.environ.get(name)
|
8
|
+
if value is None:
|
9
|
+
return default() if callable(default) else default
|
10
|
+
try:
|
11
|
+
if ctor is bool:
|
12
|
+
return value.lower() in {'1', "true"}
|
13
|
+
return ctor(value)
|
14
|
+
except Exception:
|
15
|
+
warnings.warn(
|
16
|
+
f"Environment variable '{name}' is defined but its associated "
|
17
|
+
f"value '{value}' could not be parsed.\n"
|
18
|
+
"The parse failed with exception:\n"
|
19
|
+
f"{traceback.format_exc()}",
|
20
|
+
RuntimeWarning
|
21
|
+
)
|
22
|
+
return default
|
@@ -1,6 +1,6 @@
|
|
1
1
|
_numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
|
2
2
|
_numba_cuda_redirector.py,sha256=QKJmYICSQvjvph0Zw9OW015MsuKxIF28GPFjR35AXLM,2681
|
3
|
-
numba_cuda/VERSION,sha256=
|
3
|
+
numba_cuda/VERSION,sha256=QLjrQACpE6d5EJBTXykdPTaYdBYqie88nj1OiHobnnk,6
|
4
4
|
numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
|
5
5
|
numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
|
6
6
|
numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
|
@@ -21,7 +21,7 @@ numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZ
|
|
21
21
|
numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
|
22
22
|
numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
|
23
23
|
numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
|
24
|
-
numba_cuda/numba/cuda/dispatcher.py,sha256=
|
24
|
+
numba_cuda/numba/cuda/dispatcher.py,sha256=cJH7Jm-U26PyU-M2Igevar_Q_c_k9R-A99InnRGPzX0,42444
|
25
25
|
numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
|
26
26
|
numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
|
27
27
|
numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
|
@@ -43,12 +43,13 @@ numba_cuda/numba/cuda/target.py,sha256=hBflzmxCGlmTugWT1sYhZj9f4HkQAMK2RQ9lO85pM
|
|
43
43
|
numba_cuda/numba/cuda/testing.py,sha256=E0wP2vfno1yWsl0v1zg31kpbU8FrKxTF-5y9Iv4WjA4,6412
|
44
44
|
numba_cuda/numba/cuda/types.py,sha256=WVfjcly_VUpG9FfKueiEPzZm2NV8Hg0XAFg3bNzPdVc,1314
|
45
45
|
numba_cuda/numba/cuda/ufuncs.py,sha256=txw27IxG80W1Yo7e-XwL2AMcQo0fMnxMjBIMy-n5pCo,23317
|
46
|
+
numba_cuda/numba/cuda/utils.py,sha256=JId22EI3KkQosW6Dafdaw43qU0xXXO_4JOENLap8klU,630
|
46
47
|
numba_cuda/numba/cuda/vector_types.py,sha256=s18dY0IUpT-RcaBvQsa_zEbYuuL2IT0Vh6afCeccwmQ,6750
|
47
48
|
numba_cuda/numba/cuda/vectorizers.py,sha256=u_0EzaD5tqVH8uOz4Gmqn3FgPC1rckwDAQuROm0BXm8,8915
|
48
49
|
numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=0TL4MZcJXUoo9qA7uu0vLv7eHrXRerVmyfi7O149ITw,199
|
49
|
-
numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=
|
50
|
+
numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=jsfr4LL12HWJzU3HUgzXpkk38Z-pyFyzLuGArg2G-nU,31363
|
50
51
|
numba_cuda/numba/cuda/cudadrv/devices.py,sha256=6SneNmoq83gue0txFWWx4A65vViAa8xA06FzkApoqAk,7992
|
51
|
-
numba_cuda/numba/cuda/cudadrv/driver.py,sha256=
|
52
|
+
numba_cuda/numba/cuda/cudadrv/driver.py,sha256=1F-Ugsf1bdZgK-So_q_TkJckdoczlzhBrCEJn8KYxG0,114321
|
52
53
|
numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=52ms3X6hfPaQB8E1jb6g7QKqRvHzBMlDQ-V2DM1rXxQ,17178
|
53
54
|
numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWxqEDJedpwDPEZ44,14209
|
54
55
|
numba_cuda/numba/cuda/cudadrv/enums.py,sha256=Wy5dzukTk4TnWCowg_PLceET_v2xEyiWLu9TyH8pXr8,23742
|
@@ -57,14 +58,18 @@ numba_cuda/numba/cuda/cudadrv/libs.py,sha256=Gk9zQ1CKcsZsWl-_9QneXeP9VH5q5R1I3Cx
|
|
57
58
|
numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=Q_YTv0apBo9t8pkMlKrthPPSVeLd376ZTmVDF5NtVVo,1328
|
58
59
|
numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
|
59
60
|
numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
|
60
|
-
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=
|
61
|
+
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=XM9_Vllv7HzH5wZIR2lwFictyX68XDtNbyLkXlL6NTI,11003
|
61
62
|
numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=v2hJJTAQeRmoG59-hnhgMEp5BSVA73QHtEoy636VKao,24107
|
62
63
|
numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=WdeUoWzsYNYodx8kMRLVIjnNs0QzwpCihd2Q0AaqItE,226
|
63
64
|
numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=Tj9ACrzQqNmDSO6xfpzw12EsQknSywQ-ZGuWMbDdHnQ,4255
|
64
65
|
numba_cuda/numba/cuda/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
65
66
|
numba_cuda/numba/cuda/kernels/reduction.py,sha256=fQnaWtoNB2yp143MNbE1DujqFIYy0KV_2moQVvbaROU,9362
|
66
67
|
numba_cuda/numba/cuda/kernels/transpose.py,sha256=5FSu-nbTfhintxwfU-bjT2px2otQF5QkKH-JPDDWq_k,2061
|
67
|
-
numba_cuda/numba/cuda/runtime/
|
68
|
+
numba_cuda/numba/cuda/runtime/__init__.py,sha256=rDi_pA5HnwpuwT8wwy0hparfO7HWgfjLVj9htbk_tCg,54
|
69
|
+
numba_cuda/numba/cuda/runtime/memsys.cu,sha256=5nTXrstrUBVLeLvnDUReyhRGvVILK--VdM1u3oUCa2o,2386
|
70
|
+
numba_cuda/numba/cuda/runtime/memsys.cuh,sha256=4oDvs7LvcMmdkN58b8e0nBqPka_sdagoULSKRut74DY,503
|
71
|
+
numba_cuda/numba/cuda/runtime/nrt.cu,sha256=WB7jQxT1bLdkY6Tm7-_ytVLjJxK4iU0OFifbPIpLwvw,5403
|
72
|
+
numba_cuda/numba/cuda/runtime/nrt.py,sha256=pmacryGZn25IHjdRMwT2vZipdtu0xsjpPDic_hlRxkA,9195
|
68
73
|
numba_cuda/numba/cuda/simulator/__init__.py,sha256=crW0VQ_8e7DMRSHKoAIziZ37ea5mpbh_49tR9M3d5YY,1610
|
69
74
|
numba_cuda/numba/cuda/simulator/api.py,sha256=K_fX-w9X4grGx2IAp0XlBW9rth5l7wibMwinQvkE7Jc,3237
|
70
75
|
numba_cuda/numba/cuda/simulator/compiler.py,sha256=eXnvmzSKzIZZzBz6ZFJ-vMNyRAgqbCiB-AO5IJXuUyM,232
|
@@ -82,7 +87,8 @@ numba_cuda/numba/cuda/simulator/cudadrv/error.py,sha256=ACSQ7ZvhuCHnvV4GmvRuKWZ5
|
|
82
87
|
numba_cuda/numba/cuda/simulator/cudadrv/libs.py,sha256=ry5rerpZrnAy70LU_YBa1KNaqKBGLHE9cMxljdSzaik,101
|
83
88
|
numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py,sha256=vIFQi4ewYXyzUYssfw78QLfoZmoVgloFCLTk55Gg1tw,474
|
84
89
|
numba_cuda/numba/cuda/simulator/cudadrv/runtime.py,sha256=K63p7puZJZD3BQ6ZT0qoII_Z3xJiUckp2dhozFjrnEs,358
|
85
|
-
numba_cuda/numba/cuda/tests/__init__.py,sha256=
|
90
|
+
numba_cuda/numba/cuda/tests/__init__.py,sha256=4U2RJuURN6SazAUSEtVofVEtahN3dDfUYNyDCmu64zo,2421
|
91
|
+
numba_cuda/numba/cuda/tests/support.py,sha256=1og4VLrK2x2LF5m5ARrrHVe-JhYx9Gv9ODKt6-8r6Aw,253
|
86
92
|
numba_cuda/numba/cuda/tests/cudadrv/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
|
87
93
|
numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py,sha256=cjHQ0J6F8APrLm23ZCFr0S7dtQmLqwq9vxMoI5lyn68,5300
|
88
94
|
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py,sha256=lSEuEM7x-x95m_lS_wSIBKnBxOhzn-AJ3WjYw8bW0y4,4492
|
@@ -92,7 +98,7 @@ numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py,sha256=rikIJQ266l_
|
|
92
98
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py,sha256=y--0AZFVpp2nmbeI1jbgZsWbBP-iVEmG8WKgR9XrxKE,7663
|
93
99
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py,sha256=sqNbo8pk4Zl5ptuGXrXFndia4IyttbuGnqjVTOtGuuw,801
|
94
100
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py,sha256=MDJMIWm1jCsBOcuwdshzqwaE__uqX0562uSjxFhud3M,6627
|
95
|
-
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py,sha256=
|
101
|
+
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py,sha256=eWczfXXIHS0p9eNhVagzXa4XWPwmrCb_yIBuDtjgq8c,21628
|
96
102
|
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py,sha256=BR1ccEj_TCVToHoHS8KwwCfKLMUl6KGb92Cx6nX-XPg,8404
|
97
103
|
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py,sha256=lCt2E8gxnd8O-fRobDEwgX4jBZ15W7cImQcZc8_u2Sg,2774
|
98
104
|
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py,sha256=ah82yaWFvBfUTTSfbkZBKLsUf2tTSSJNvlSxrk1RI1E,7094
|
@@ -231,14 +237,15 @@ numba_cuda/numba/cuda/tests/nocuda/test_import.py,sha256=teiL8rpFGQOh41kyBSSNHHF
|
|
231
237
|
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA9Ym-iT_B972bgFRu3UkRtwIgWtuI,7948
|
232
238
|
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
|
233
239
|
numba_cuda/numba/cuda/tests/nrt/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
|
234
|
-
numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=
|
235
|
-
numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=
|
240
|
+
numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=Cx2DGhm2bJheShP2Ja1w9YLlRTeAMM7u1UYHsPnTzA8,4552
|
241
|
+
numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=b3rtK018qslhUU5UsAAa3s-mjlnlfxAwTJmARTVD2j4,7650
|
242
|
+
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=Wq46oICum9IXnbQ97vV8V7g-3U01PLQEQbaGSNdRuMg,3163
|
236
243
|
numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq6pJwHEwmKVmJOJxPBtsMTbnuzqYkik,2679
|
237
244
|
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=V0raLZLGSiWbE_K-JluI0CnmNkXbhlMVj-TH7P1OV8E,5014
|
238
245
|
numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
|
239
246
|
numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
|
240
|
-
numba_cuda-0.
|
241
|
-
numba_cuda-0.
|
242
|
-
numba_cuda-0.
|
243
|
-
numba_cuda-0.
|
244
|
-
numba_cuda-0.
|
247
|
+
numba_cuda-0.4.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
|
248
|
+
numba_cuda-0.4.0.dist-info/METADATA,sha256=BWlfqEMCG0dlSXORk9sKzY7nT_YdQzk9eQ7fBX4rvlY,1496
|
249
|
+
numba_cuda-0.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
250
|
+
numba_cuda-0.4.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
|
251
|
+
numba_cuda-0.4.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|