numba-cuda 0.12.1__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +1 -1
- numba_cuda/numba/cuda/dispatcher.py +1 -1
- numba_cuda/numba/cuda/memory_management/__init__.py +1 -0
- numba_cuda/numba/cuda/simulator/__init__.py +10 -1
- numba_cuda/numba/cuda/simulator/_internal/__init__.py +1 -0
- numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +0 -0
- numba_cuda/numba/cuda/simulator/api.py +17 -0
- numba_cuda/numba/cuda/simulator/bf16.py +1 -0
- numba_cuda/numba/cuda/simulator/compiler.py +1 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +7 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +57 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +8 -0
- numba_cuda/numba/cuda/simulator/kernel.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +8 -2
- numba_cuda/numba/cuda/simulator/memory_management/__init__.py +1 -0
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +6 -0
- numba_cuda/numba/cuda/testing.py +10 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +2 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +15 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +0 -3
- numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +25 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +11 -4
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +34 -21
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +4 -2
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +60 -58
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -2
- numba_cuda/numba/cuda/tests/support.py +1 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +1 -1
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +1 -1
- {numba_cuda-0.12.1.dist-info → numba_cuda-0.13.0.dist-info}/METADATA +1 -1
- {numba_cuda-0.12.1.dist-info → numba_cuda-0.13.0.dist-info}/RECORD +45 -38
- {numba_cuda-0.12.1.dist-info → numba_cuda-0.13.0.dist-info}/WHEEL +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +0 -1
- /numba_cuda/numba/cuda/{runtime → memory_management}/memsys.cu +0 -0
- /numba_cuda/numba/cuda/{runtime → memory_management}/memsys.cuh +0 -0
- /numba_cuda/numba/cuda/{runtime → memory_management}/nrt.cu +0 -0
- /numba_cuda/numba/cuda/{runtime → memory_management}/nrt.cuh +0 -0
- /numba_cuda/numba/cuda/{runtime → memory_management}/nrt.py +0 -0
- {numba_cuda-0.12.1.dist-info → numba_cuda-0.13.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.12.1.dist-info → numba_cuda-0.13.0.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.13.0
|
numba_cuda/numba/cuda/codegen.py
CHANGED
@@ -5,7 +5,7 @@ from numba.core.codegen import Codegen, CodeLibrary
|
|
5
5
|
from .cudadrv import devices, driver, nvvm, runtime
|
6
6
|
from numba.cuda.cudadrv.libs import get_cudalib
|
7
7
|
from numba.cuda.cudadrv.linkable_code import LinkableCode
|
8
|
-
from numba.cuda.
|
8
|
+
from numba.cuda.memory_management.nrt import NRT_LIBRARY
|
9
9
|
|
10
10
|
import os
|
11
11
|
import subprocess
|
@@ -397,7 +397,7 @@ def compile(src, name, cc, ltoir=False):
|
|
397
397
|
else:
|
398
398
|
extra_includes = []
|
399
399
|
|
400
|
-
nrt_path = os.path.join(numba_cuda_path, "
|
400
|
+
nrt_path = os.path.join(numba_cuda_path, "memory_management")
|
401
401
|
nrt_include = f"-I{nrt_path}"
|
402
402
|
|
403
403
|
options = [
|
@@ -27,8 +27,8 @@ from numba.cuda.errors import (
|
|
27
27
|
normalize_kernel_dimensions,
|
28
28
|
)
|
29
29
|
from numba.cuda import types as cuda_types
|
30
|
-
from numba.cuda.runtime.nrt import rtsys, NRT_LIBRARY
|
31
30
|
from numba.cuda.locks import module_init_lock
|
31
|
+
from numba.cuda.memory_management.nrt import rtsys, NRT_LIBRARY
|
32
32
|
|
33
33
|
from numba import cuda
|
34
34
|
from numba import _dispatcher
|
@@ -0,0 +1 @@
|
|
1
|
+
from numba.cuda.memory_management.nrt import rtsys # noqa: F401
|
@@ -38,11 +38,20 @@ if config.ENABLE_CUDASIM:
|
|
38
38
|
sys.modules["numba.cuda.cudadrv.devicearray"] = cudadrv.devicearray
|
39
39
|
sys.modules["numba.cuda.cudadrv.devices"] = cudadrv.devices
|
40
40
|
sys.modules["numba.cuda.cudadrv.driver"] = cudadrv.driver
|
41
|
+
sys.modules["numba.cuda.cudadrv.linkable_code"] = cudadrv.linkable_code
|
41
42
|
sys.modules["numba.cuda.cudadrv.runtime"] = cudadrv.runtime
|
42
43
|
sys.modules["numba.cuda.cudadrv.drvapi"] = cudadrv.drvapi
|
43
44
|
sys.modules["numba.cuda.cudadrv.error"] = cudadrv.error
|
44
45
|
sys.modules["numba.cuda.cudadrv.nvvm"] = cudadrv.nvvm
|
45
46
|
|
46
|
-
from . import compiler
|
47
|
+
from . import bf16, compiler, _internal
|
47
48
|
|
49
|
+
sys.modules["numba.cuda.bf16"] = bf16
|
48
50
|
sys.modules["numba.cuda.compiler"] = compiler
|
51
|
+
sys.modules["numba.cuda._internal"] = _internal
|
52
|
+
sys.modules["numba.cuda._internal.cuda_bf16"] = _internal.cuda_bf16
|
53
|
+
|
54
|
+
from numba.cuda.simulator import memory_management
|
55
|
+
|
56
|
+
sys.modules["numba.cuda.memory_management"] = memory_management
|
57
|
+
sys.modules["numba.cuda.memory_management.nrt"] = memory_management.nrt
|
@@ -0,0 +1 @@
|
|
1
|
+
from numba.cuda.simulator._internal import cuda_bf16 # noqa: F401
|
File without changes
|
@@ -7,6 +7,15 @@ Contains CUDA API functions
|
|
7
7
|
from contextlib import contextmanager
|
8
8
|
|
9
9
|
from .cudadrv.devices import require_context, reset, gpus # noqa: F401
|
10
|
+
from .cudadrv.linkable_code import (
|
11
|
+
PTXSource, # noqa: F401
|
12
|
+
CUSource, # noqa: F401
|
13
|
+
Cubin, # noqa: F401
|
14
|
+
Fatbin, # noqa: F401
|
15
|
+
Archive, # noqa: F401
|
16
|
+
Object, # noqa: F401
|
17
|
+
LTOIR, # noqa: F401
|
18
|
+
) # noqa: F401
|
10
19
|
from .kernel import FakeCUDAKernel
|
11
20
|
from numba.core.sigutils import is_signature
|
12
21
|
from numba.core import config
|
@@ -22,6 +31,10 @@ def is_float16_supported():
|
|
22
31
|
return True
|
23
32
|
|
24
33
|
|
34
|
+
def is_bfloat16_supported():
|
35
|
+
return False
|
36
|
+
|
37
|
+
|
25
38
|
class stream(object):
|
26
39
|
"""
|
27
40
|
The stream API is supported in the simulator - however, all execution
|
@@ -72,6 +85,10 @@ def list_devices():
|
|
72
85
|
return gpus
|
73
86
|
|
74
87
|
|
88
|
+
def get_current_device():
|
89
|
+
return gpus[0].device
|
90
|
+
|
91
|
+
|
75
92
|
# Events
|
76
93
|
|
77
94
|
|
@@ -0,0 +1 @@
|
|
1
|
+
bfloat16 = None
|
@@ -3,6 +3,8 @@ Most of the driver API is unsupported in the simulator, but some stubs are
|
|
3
3
|
provided to allow tests to import correctly.
|
4
4
|
"""
|
5
5
|
|
6
|
+
from numba import config
|
7
|
+
|
6
8
|
|
7
9
|
def device_memset(dst, val, size, stream=0):
|
8
10
|
dst.view("u1")[:size].fill(bytes([val])[0])
|
@@ -60,3 +62,8 @@ def launch_kernel(*args, **kwargs):
|
|
60
62
|
|
61
63
|
|
62
64
|
USE_NV_BINDING = False
|
65
|
+
|
66
|
+
PyNvJitLinker = None
|
67
|
+
|
68
|
+
if config.ENABLE_CUDASIM:
|
69
|
+
config.CUDA_ENABLE_PYNVJITLINK = False
|
@@ -0,0 +1,57 @@
|
|
1
|
+
class LinkableCode:
|
2
|
+
"""An object that holds code to be linked from memory.
|
3
|
+
|
4
|
+
:param data: A buffer containing the data to link.
|
5
|
+
:param name: The name of the file to be referenced in any compilation or
|
6
|
+
linking errors that may be produced.
|
7
|
+
"""
|
8
|
+
|
9
|
+
def __init__(self, data, name=None):
|
10
|
+
self.data = data
|
11
|
+
self._name = name
|
12
|
+
|
13
|
+
@property
|
14
|
+
def name(self):
|
15
|
+
return self._name or self.default_name
|
16
|
+
|
17
|
+
|
18
|
+
class PTXSource(LinkableCode):
|
19
|
+
"""PTX source code in memory."""
|
20
|
+
|
21
|
+
default_name = "<unnamed-ptx>"
|
22
|
+
|
23
|
+
|
24
|
+
class CUSource(LinkableCode):
|
25
|
+
"""CUDA C/C++ source code in memory."""
|
26
|
+
|
27
|
+
default_name = "<unnamed-cu>"
|
28
|
+
|
29
|
+
|
30
|
+
class Fatbin(LinkableCode):
|
31
|
+
"""An ELF Fatbin in memory."""
|
32
|
+
|
33
|
+
default_name = "<unnamed-fatbin>"
|
34
|
+
|
35
|
+
|
36
|
+
class Cubin(LinkableCode):
|
37
|
+
"""An ELF Cubin in memory."""
|
38
|
+
|
39
|
+
default_name = "<unnamed-cubin>"
|
40
|
+
|
41
|
+
|
42
|
+
class Archive(LinkableCode):
|
43
|
+
"""An archive of objects in memory."""
|
44
|
+
|
45
|
+
default_name = "<unnamed-archive>"
|
46
|
+
|
47
|
+
|
48
|
+
class Object(LinkableCode):
|
49
|
+
"""An object file in memory."""
|
50
|
+
|
51
|
+
default_name = "<unnamed-object>"
|
52
|
+
|
53
|
+
|
54
|
+
class LTOIR(LinkableCode):
|
55
|
+
"""An LTOIR file in memory."""
|
56
|
+
|
57
|
+
default_name = "<unnamed-ltoir>"
|
@@ -63,7 +63,10 @@ class FakeCUDALocal(object):
|
|
63
63
|
CUDA Local arrays
|
64
64
|
"""
|
65
65
|
|
66
|
-
def array(self, shape, dtype):
|
66
|
+
def array(self, shape, dtype, alignment=None):
|
67
|
+
if alignment is not None:
|
68
|
+
raise RuntimeError("Array alignment is not supported in cudasim")
|
69
|
+
|
67
70
|
if isinstance(dtype, types.Type):
|
68
71
|
dtype = numpy_support.as_dtype(dtype)
|
69
72
|
return np.empty(shape, dtype)
|
@@ -102,7 +105,10 @@ class FakeCUDAShared(object):
|
|
102
105
|
self._dynshared_size = dynshared_size
|
103
106
|
self._dynshared = np.zeros(dynshared_size, dtype=np.byte)
|
104
107
|
|
105
|
-
def array(self, shape, dtype):
|
108
|
+
def array(self, shape, dtype, alignment=None):
|
109
|
+
if alignment is not None:
|
110
|
+
raise RuntimeError("Array alignment is not supported in cudasim")
|
111
|
+
|
106
112
|
if isinstance(dtype, types.Type):
|
107
113
|
dtype = numpy_support.as_dtype(dtype)
|
108
114
|
# Dynamic shared memory is requested with size 0 - this all shares the
|
@@ -0,0 +1 @@
|
|
1
|
+
from .nrt import rtsys # noqa: F401
|
numba_cuda/numba/cuda/testing.py
CHANGED
@@ -116,20 +116,26 @@ def skip_on_arm(reason):
|
|
116
116
|
def skip_if_cuda_includes_missing(fn):
|
117
117
|
# Skip when cuda.h is not available - generally this should indicate
|
118
118
|
# whether the CUDA includes are available or not
|
119
|
-
|
119
|
+
reason = "CUDA include dir not available on this system"
|
120
|
+
try:
|
121
|
+
cuda_include_path = libs.get_cuda_include_dir()
|
122
|
+
except FileNotFoundError:
|
123
|
+
return unittest.skip(reason)(fn)
|
120
124
|
cuda_h = os.path.join(cuda_include_path, "cuda.h")
|
121
125
|
cuda_h_file = os.path.exists(cuda_h) and os.path.isfile(cuda_h)
|
122
|
-
reason = "CUDA include dir not available on this system"
|
123
126
|
return unittest.skipUnless(cuda_h_file, reason)(fn)
|
124
127
|
|
125
128
|
|
126
129
|
def skip_if_curand_kernel_missing(fn):
|
127
|
-
|
130
|
+
reason = "curand_kernel.h not available on this system"
|
131
|
+
try:
|
132
|
+
cuda_include_path = libs.get_cuda_include_dir()
|
133
|
+
except FileNotFoundError:
|
134
|
+
return unittest.skip(reason)(fn)
|
128
135
|
curand_kernel_h = os.path.join(cuda_include_path, "curand_kernel.h")
|
129
136
|
curand_kernel_h_file = os.path.exists(curand_kernel_h) and os.path.isfile(
|
130
137
|
curand_kernel_h
|
131
138
|
)
|
132
|
-
reason = "curand_kernel.h not available on this system"
|
133
139
|
return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
|
134
140
|
|
135
141
|
|
@@ -476,12 +476,14 @@ class TestArrayMethod(CUDATestCase):
|
|
476
476
|
host_array, dev_array.copy_to_host().astype(dtype)
|
477
477
|
)
|
478
478
|
|
479
|
+
@skip_on_cudasim("Simulator does not use __array__()")
|
479
480
|
@unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
|
480
481
|
def test_np_array_copy_false(self):
|
481
482
|
dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
|
482
483
|
with self.assertRaisesRegex(ValueError, "`copy=False` is not"):
|
483
484
|
np.array(dev_array, copy=False)
|
484
485
|
|
486
|
+
@skip_on_cudasim("Simulator does not use __array__()")
|
485
487
|
@unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
|
486
488
|
def test_np_array_copy_true(self):
|
487
489
|
dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
|
@@ -5,14 +5,19 @@ import numpy as np
|
|
5
5
|
|
6
6
|
from numba import cuda, config
|
7
7
|
from numba.cuda.cudadrv.linkable_code import CUSource
|
8
|
-
from numba.cuda.testing import
|
8
|
+
from numba.cuda.testing import (
|
9
|
+
CUDATestCase,
|
10
|
+
ContextResettingTestCase,
|
11
|
+
skip_on_cudasim,
|
12
|
+
)
|
9
13
|
|
10
|
-
|
14
|
+
if not config.ENABLE_CUDASIM:
|
15
|
+
from cuda.bindings.driver import cuModuleGetGlobal, cuMemcpyHtoD
|
11
16
|
|
12
|
-
if config.CUDA_USE_NVIDIA_BINDING:
|
13
|
-
|
14
|
-
else:
|
15
|
-
|
17
|
+
if config.CUDA_USE_NVIDIA_BINDING:
|
18
|
+
from cuda.cuda import CUmodule as cu_module_type
|
19
|
+
else:
|
20
|
+
from numba.cuda.cudadrv.drvapi import cu_module as cu_module_type
|
16
21
|
|
17
22
|
|
18
23
|
def wipe_all_modules_in_context():
|
@@ -32,6 +37,7 @@ def get_hashable_handle_value(handle):
|
|
32
37
|
return handle
|
33
38
|
|
34
39
|
|
40
|
+
@skip_on_cudasim("Module loading not implemented in the simulator")
|
35
41
|
class TestModuleCallbacksBasic(ContextResettingTestCase):
|
36
42
|
def test_basic(self):
|
37
43
|
counter = 0
|
@@ -136,6 +142,7 @@ class TestModuleCallbacksBasic(ContextResettingTestCase):
|
|
136
142
|
self.assertEqual(len(teardown_seen), 2)
|
137
143
|
|
138
144
|
|
145
|
+
@skip_on_cudasim("Module loading not implemented in the simulator")
|
139
146
|
class TestModuleCallbacksAPICompleteness(CUDATestCase):
|
140
147
|
def test_api(self):
|
141
148
|
def setup(handle):
|
@@ -164,6 +171,7 @@ class TestModuleCallbacksAPICompleteness(CUDATestCase):
|
|
164
171
|
kernel[1, 1]()
|
165
172
|
|
166
173
|
|
174
|
+
@skip_on_cudasim("Module loading not implemented in the simulator")
|
167
175
|
class TestModuleCallbacks(CUDATestCase):
|
168
176
|
def setUp(self):
|
169
177
|
super().setUp()
|
@@ -213,6 +221,7 @@ __device__ int get_num(int &retval) {
|
|
213
221
|
self.assertEqual(arr[0], 42)
|
214
222
|
|
215
223
|
|
224
|
+
@skip_on_cudasim("Module loading not implemented in the simulator")
|
216
225
|
class TestMultithreadedCallbacks(CUDATestCase):
|
217
226
|
def test_concurrent_initialization(self):
|
218
227
|
seen_mods = set()
|
@@ -267,6 +267,7 @@ class TestLinker(CUDATestCase):
|
|
267
267
|
not PYNVJITLINK_INSTALLED or not TEST_BIN_DIR,
|
268
268
|
reason="pynvjitlink not enabled",
|
269
269
|
)
|
270
|
+
@skip_on_cudasim("Linking unsupported in the simulator")
|
270
271
|
class TestLinkerUsage(CUDATestCase):
|
271
272
|
"""Test that whether pynvjitlink can be enabled by both environment variable
|
272
273
|
and modification of config at runtime.
|
@@ -310,9 +310,6 @@ class TestCudaArray(CUDATestCase):
|
|
310
310
|
check(array_reshape, array_reshape1d, arr, 0)
|
311
311
|
check(array_reshape, array_reshape1d, arr, (0,))
|
312
312
|
check(array_reshape, array_reshape3d, arr, (1, 0, 2))
|
313
|
-
check_only_shape(array_reshape2d, arr, (0, -1), (0, 0))
|
314
|
-
check_only_shape(array_reshape2d, arr, (4, -1), (4, 0))
|
315
|
-
check_only_shape(array_reshape3d, arr, (-1, 0, 4), (0, 0, 4))
|
316
313
|
|
317
314
|
# C-contiguous
|
318
315
|
arr = np.arange(24)
|
@@ -3,7 +3,11 @@ import itertools
|
|
3
3
|
import numpy as np
|
4
4
|
from numba import cuda
|
5
5
|
from numba.core.errors import TypingError
|
6
|
-
from numba.cuda.testing import
|
6
|
+
from numba.cuda.testing import (
|
7
|
+
CUDATestCase,
|
8
|
+
skip_on_cudasim,
|
9
|
+
skip_unless_cudasim,
|
10
|
+
)
|
7
11
|
import unittest
|
8
12
|
|
9
13
|
|
@@ -65,6 +69,7 @@ for align in (True, False):
|
|
65
69
|
# with the test_alignment.TestArrayAlignment class.
|
66
70
|
|
67
71
|
|
72
|
+
@skip_on_cudasim("Array alignment not supported on cudasim")
|
68
73
|
class TestArrayAddressAlignment(CUDATestCase):
|
69
74
|
"""
|
70
75
|
Test cuda.local.array and cuda.shared.array support for an alignment
|
@@ -232,5 +237,24 @@ class TestArrayAddressAlignment(CUDATestCase):
|
|
232
237
|
print(".", end="", flush=True)
|
233
238
|
|
234
239
|
|
240
|
+
@skip_unless_cudasim("Only check for alignment unsupported in the simulator")
|
241
|
+
class TestCudasimUnsupportedAlignment(CUDATestCase):
|
242
|
+
def test_local_unsupported(self):
|
243
|
+
@cuda.jit
|
244
|
+
def f():
|
245
|
+
cuda.local.array(1, dtype=np.uint8, alignment=16)
|
246
|
+
|
247
|
+
with self.assertRaisesRegex(RuntimeError, "not supported in cudasim"):
|
248
|
+
f[1, 1]()
|
249
|
+
|
250
|
+
def test_shared_unsupported(self):
|
251
|
+
@cuda.jit
|
252
|
+
def f():
|
253
|
+
cuda.shared.array(1, dtype=np.uint8, alignment=16)
|
254
|
+
|
255
|
+
with self.assertRaisesRegex(RuntimeError, "not supported in cudasim"):
|
256
|
+
f[1, 1]()
|
257
|
+
|
258
|
+
|
235
259
|
if __name__ == "__main__":
|
236
260
|
unittest.main()
|
@@ -23,6 +23,15 @@ class TestBfloat16HighLevelBindings(CUDATestCase):
|
|
23
23
|
|
24
24
|
def test_math_bindings(self):
|
25
25
|
self.skip_unsupported()
|
26
|
+
|
27
|
+
exp_functions = [math.exp]
|
28
|
+
try:
|
29
|
+
from math import exp2
|
30
|
+
|
31
|
+
exp_functions += [exp2]
|
32
|
+
except ImportError:
|
33
|
+
pass
|
34
|
+
|
26
35
|
functions = [
|
27
36
|
math.trunc,
|
28
37
|
math.ceil,
|
@@ -33,9 +42,7 @@ class TestBfloat16HighLevelBindings(CUDATestCase):
|
|
33
42
|
math.cos,
|
34
43
|
math.sin,
|
35
44
|
math.tanh,
|
36
|
-
|
37
|
-
math.exp2,
|
38
|
-
]
|
45
|
+
] + exp_functions
|
39
46
|
|
40
47
|
for f in functions:
|
41
48
|
with self.subTest(func=f):
|
@@ -49,7 +56,7 @@ class TestBfloat16HighLevelBindings(CUDATestCase):
|
|
49
56
|
arr = cuda.device_array((1,), dtype="float32")
|
50
57
|
kernel[1, 1](arr)
|
51
58
|
|
52
|
-
if f in
|
59
|
+
if f in exp_functions:
|
53
60
|
self.assertAlmostEqual(arr[0], f(3.14), delta=1e-1)
|
54
61
|
else:
|
55
62
|
self.assertAlmostEqual(arr[0], f(3.14), delta=1e-2)
|
@@ -2,29 +2,40 @@ import numba.cuda as cuda
|
|
2
2
|
from numba.cuda.testing import unittest, CUDATestCase
|
3
3
|
import numpy as np
|
4
4
|
|
5
|
-
from numba import
|
5
|
+
from numba import (
|
6
|
+
config,
|
7
|
+
int16,
|
8
|
+
int32,
|
9
|
+
int64,
|
10
|
+
uint16,
|
11
|
+
uint32,
|
12
|
+
uint64,
|
13
|
+
float32,
|
14
|
+
float64,
|
15
|
+
)
|
6
16
|
from numba.types import float16
|
7
17
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
18
|
+
if not config.ENABLE_CUDASIM:
|
19
|
+
from numba.cuda._internal.cuda_bf16 import (
|
20
|
+
nv_bfloat16,
|
21
|
+
htrunc,
|
22
|
+
hceil,
|
23
|
+
hfloor,
|
24
|
+
hrint,
|
25
|
+
hsqrt,
|
26
|
+
hrsqrt,
|
27
|
+
hrcp,
|
28
|
+
hlog,
|
29
|
+
hlog2,
|
30
|
+
hlog10,
|
31
|
+
hcos,
|
32
|
+
hsin,
|
33
|
+
hexp,
|
34
|
+
hexp2,
|
35
|
+
hexp10,
|
36
|
+
htanh,
|
37
|
+
htanh_approx,
|
38
|
+
)
|
28
39
|
|
29
40
|
dtypes = [int16, int32, int64, uint16, uint32, uint64, float32]
|
30
41
|
|
@@ -263,6 +274,8 @@ class Bfloat16Test(CUDATestCase):
|
|
263
274
|
np.testing.assert_allclose(arr, [8], atol=1e-2)
|
264
275
|
|
265
276
|
def test_use_binding_inside_dfunc(self):
|
277
|
+
self.skip_unsupported()
|
278
|
+
|
266
279
|
@cuda.jit(device=True)
|
267
280
|
def f(arr):
|
268
281
|
pi = nv_bfloat16(3.14)
|
@@ -157,6 +157,7 @@ class TestCudaCooperativeGroups(CUDATestCase):
|
|
157
157
|
self.assertEqual(blocks1d, blocks2d)
|
158
158
|
self.assertEqual(blocks1d, blocks3d)
|
159
159
|
|
160
|
+
@skip_on_cudasim("External code unsupported on cudasim")
|
160
161
|
@skip_unless_cc_60
|
161
162
|
def test_external_cooperative_func(self):
|
162
163
|
cudapy_test_path = os.path.dirname(__file__)
|
@@ -171,12 +172,13 @@ class TestCudaCooperativeGroups(CUDATestCase):
|
|
171
172
|
"cta_barrier", sig=sig, link=[src], use_cooperative=True
|
172
173
|
)
|
173
174
|
|
174
|
-
@cuda.jit
|
175
|
+
@cuda.jit("void()")
|
175
176
|
def kernel():
|
176
177
|
cta_barrier()
|
177
178
|
|
179
|
+
overload = kernel.overloads[()]
|
178
180
|
block_size = 32
|
179
|
-
grid_size =
|
181
|
+
grid_size = overload.max_cooperative_grid_blocks(block_size)
|
180
182
|
|
181
183
|
kernel[grid_size, block_size]()
|
182
184
|
|
@@ -116,6 +116,7 @@ class EnumTest(CUDATestCase):
|
|
116
116
|
got = cuda_func(arr)
|
117
117
|
self.assertPreciseEqual(expected, got)
|
118
118
|
|
119
|
+
@skip_on_cudasim("No typing context in CUDA simulator")
|
119
120
|
def test_int_enum_no_conversion(self):
|
120
121
|
# Ported from Numba PR #10047: "Fix IntEnumMember.can_convert_to() when
|
121
122
|
# no conversions found", https://github.com/numba/numba/pull/10047.
|
@@ -191,7 +191,9 @@ if TEST_BIN_DIR:
|
|
191
191
|
)
|
192
192
|
|
193
193
|
|
194
|
+
@skip_on_cudasim("Extensions not supported in the simulator")
|
194
195
|
class TestExtendingLinkage(CUDATestCase):
|
196
|
+
@unittest.skipUnless(TEST_BIN_DIR, "Necessary binaries are not available")
|
195
197
|
def test_extension_adds_linkable_code(self):
|
196
198
|
cuda_major_version = cuda.runtime.get_version()[0]
|
197
199
|
|
@@ -3,16 +3,13 @@ import os
|
|
3
3
|
|
4
4
|
import numpy as np
|
5
5
|
import unittest
|
6
|
-
from numba.cuda.testing import CUDATestCase
|
6
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
7
7
|
from numba.tests.support import run_in_subprocess, override_config
|
8
8
|
from numba.cuda import get_current_device
|
9
9
|
from numba.cuda.cudadrv.nvrtc import compile
|
10
|
-
from numba import types
|
11
|
-
from numba.cuda.cudadecl import registry as cuda_decl_registry
|
10
|
+
from numba import config, types
|
12
11
|
from numba.core.typing import signature
|
13
|
-
from numba.cuda.cudaimpl import lower as cuda_lower
|
14
12
|
from numba import cuda
|
15
|
-
from numba.cuda.runtime.nrt import rtsys, get_include
|
16
13
|
from numba.core.typing.templates import AbstractTemplate
|
17
14
|
from numba.cuda.cudadrv.linkable_code import (
|
18
15
|
CUSource,
|
@@ -23,67 +20,68 @@ from numba.cuda.cudadrv.linkable_code import (
|
|
23
20
|
Object,
|
24
21
|
)
|
25
22
|
|
26
|
-
|
27
23
|
TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
|
28
24
|
|
29
|
-
if
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
with open(path, mode) as f:
|
34
|
-
contents = f.read()
|
35
|
-
return kind(contents, nrt=True)
|
36
|
-
|
37
|
-
nrt_extern_a = make_linkable_code("nrt_extern.a", Archive, "rb")
|
38
|
-
nrt_extern_cubin = make_linkable_code("nrt_extern.cubin", Cubin, "rb")
|
39
|
-
nrt_extern_cu = make_linkable_code(
|
40
|
-
"nrt_extern.cu",
|
41
|
-
CUSource,
|
42
|
-
"rb",
|
43
|
-
)
|
44
|
-
nrt_extern_fatbin = make_linkable_code("nrt_extern.fatbin", Fatbin, "rb")
|
45
|
-
nrt_extern_fatbin_multi = make_linkable_code(
|
46
|
-
"nrt_extern_multi.fatbin", Fatbin, "rb"
|
47
|
-
)
|
48
|
-
nrt_extern_o = make_linkable_code("nrt_extern.o", Object, "rb")
|
49
|
-
nrt_extern_ptx = make_linkable_code("nrt_extern.ptx", PTXSource, "rb")
|
25
|
+
if not config.ENABLE_CUDASIM:
|
26
|
+
from numba.cuda.memory_management.nrt import rtsys, get_include
|
27
|
+
from numba.cuda.cudadecl import registry as cuda_decl_registry
|
28
|
+
from numba.cuda.cudaimpl import lower as cuda_lower
|
50
29
|
|
30
|
+
def allocate_deallocate_handle():
|
31
|
+
"""
|
32
|
+
Handle to call NRT_Allocate and NRT_Free
|
33
|
+
"""
|
34
|
+
pass
|
51
35
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
pass
|
57
|
-
|
58
|
-
|
59
|
-
@cuda_decl_registry.register_global(allocate_deallocate_handle)
|
60
|
-
class AllocateShimImpl(AbstractTemplate):
|
61
|
-
def generic(self, args, kws):
|
62
|
-
return signature(types.void)
|
63
|
-
|
36
|
+
@cuda_decl_registry.register_global(allocate_deallocate_handle)
|
37
|
+
class AllocateShimImpl(AbstractTemplate):
|
38
|
+
def generic(self, args, kws):
|
39
|
+
return signature(types.void)
|
64
40
|
|
65
|
-
device_fun_shim = cuda.declare_device(
|
66
|
-
|
67
|
-
)
|
41
|
+
device_fun_shim = cuda.declare_device(
|
42
|
+
"device_allocate_deallocate", types.int32()
|
43
|
+
)
|
68
44
|
|
45
|
+
# wrapper to turn the above into a python callable
|
46
|
+
def call_device_fun_shim():
|
47
|
+
return device_fun_shim()
|
48
|
+
|
49
|
+
@cuda_lower(allocate_deallocate_handle)
|
50
|
+
def allocate_deallocate_impl(context, builder, sig, args):
|
51
|
+
sig_ = types.int32()
|
52
|
+
# call the external function, passing the pointer
|
53
|
+
result = context.compile_internal(
|
54
|
+
builder,
|
55
|
+
call_device_fun_shim,
|
56
|
+
sig_,
|
57
|
+
(),
|
58
|
+
)
|
69
59
|
|
70
|
-
|
71
|
-
def call_device_fun_shim():
|
72
|
-
return device_fun_shim()
|
60
|
+
return result
|
73
61
|
|
62
|
+
if TEST_BIN_DIR:
|
74
63
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
builder,
|
81
|
-
call_device_fun_shim,
|
82
|
-
sig_,
|
83
|
-
(),
|
84
|
-
)
|
64
|
+
def make_linkable_code(name, kind, mode):
|
65
|
+
path = os.path.join(TEST_BIN_DIR, name)
|
66
|
+
with open(path, mode) as f:
|
67
|
+
contents = f.read()
|
68
|
+
return kind(contents, nrt=True)
|
85
69
|
|
86
|
-
|
70
|
+
nrt_extern_a = make_linkable_code("nrt_extern.a", Archive, "rb")
|
71
|
+
nrt_extern_cubin = make_linkable_code("nrt_extern.cubin", Cubin, "rb")
|
72
|
+
nrt_extern_cu = make_linkable_code(
|
73
|
+
"nrt_extern.cu",
|
74
|
+
CUSource,
|
75
|
+
"rb",
|
76
|
+
)
|
77
|
+
nrt_extern_fatbin = make_linkable_code(
|
78
|
+
"nrt_extern.fatbin", Fatbin, "rb"
|
79
|
+
)
|
80
|
+
nrt_extern_fatbin_multi = make_linkable_code(
|
81
|
+
"nrt_extern_multi.fatbin", Fatbin, "rb"
|
82
|
+
)
|
83
|
+
nrt_extern_o = make_linkable_code("nrt_extern.o", Object, "rb")
|
84
|
+
nrt_extern_ptx = make_linkable_code("nrt_extern.ptx", PTXSource, "rb")
|
87
85
|
|
88
86
|
|
89
87
|
class TestNrtBasic(CUDATestCase):
|
@@ -104,6 +102,7 @@ class TestNrtBasic(CUDATestCase):
|
|
104
102
|
g[1, 1]()
|
105
103
|
cuda.synchronize()
|
106
104
|
|
105
|
+
@skip_on_cudasim("CUDA Simulator does not produce PTX")
|
107
106
|
def test_nrt_ptx_contains_refcount(self):
|
108
107
|
@cuda.jit
|
109
108
|
def f(x):
|
@@ -157,6 +156,7 @@ class TestNrtLinking(CUDATestCase):
|
|
157
156
|
with override_config("CUDA_ENABLE_NRT", True):
|
158
157
|
super(TestNrtLinking, self).run(result)
|
159
158
|
|
159
|
+
@skip_on_cudasim("CUDA Simulator does not link PTX")
|
160
160
|
def test_nrt_detect_linked_ptx_file(self):
|
161
161
|
src = f"#include <{get_include()}/nrt.cuh>"
|
162
162
|
src += """
|
@@ -176,6 +176,7 @@ class TestNrtLinking(CUDATestCase):
|
|
176
176
|
kernel[1, 1]()
|
177
177
|
|
178
178
|
@unittest.skipIf(not TEST_BIN_DIR, "necessary binaries not generated.")
|
179
|
+
@skip_on_cudasim("CUDA Simulator does not link code")
|
179
180
|
def test_nrt_detect_linkable_code(self):
|
180
181
|
codes = (
|
181
182
|
nrt_extern_a,
|
@@ -196,6 +197,7 @@ class TestNrtLinking(CUDATestCase):
|
|
196
197
|
kernel[1, 1]()
|
197
198
|
|
198
199
|
|
200
|
+
@skip_on_cudasim("CUDASIM does not have NRT statistics")
|
199
201
|
class TestNrtStatistics(CUDATestCase):
|
200
202
|
def setUp(self):
|
201
203
|
self._stream = cuda.default_stream()
|
@@ -213,7 +215,7 @@ class TestNrtStatistics(CUDATestCase):
|
|
213
215
|
# Checks that explicitly turning the stats on via the env var works.
|
214
216
|
src = """if 1:
|
215
217
|
from numba import cuda
|
216
|
-
from numba.cuda.
|
218
|
+
from numba.cuda.memory_management import rtsys
|
217
219
|
import numpy as np
|
218
220
|
|
219
221
|
@cuda.jit
|
@@ -252,7 +254,7 @@ class TestNrtStatistics(CUDATestCase):
|
|
252
254
|
src = """if 1:
|
253
255
|
from numba import cuda
|
254
256
|
import numpy as np
|
255
|
-
from numba.cuda.
|
257
|
+
from numba.cuda.memory_management import rtsys
|
256
258
|
|
257
259
|
@cuda.jit
|
258
260
|
def foo():
|
@@ -1,13 +1,14 @@
|
|
1
1
|
import numpy as np
|
2
2
|
import unittest
|
3
3
|
from numba.tests.support import override_config
|
4
|
-
from numba.cuda.
|
4
|
+
from numba.cuda.memory_management import rtsys
|
5
5
|
from numba.cuda.tests.support import EnableNRTStatsMixin
|
6
|
-
from numba.cuda.testing import CUDATestCase
|
6
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
7
7
|
|
8
8
|
from numba import cuda
|
9
9
|
|
10
10
|
|
11
|
+
@skip_on_cudasim("No refcounting in the simulator")
|
11
12
|
class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
|
12
13
|
def setUp(self):
|
13
14
|
super(TestNrtRefCt, self).setUp()
|
@@ -40,7 +40,7 @@ LTOIR_FLAGS := $(LTOIR_GENCODE) -dc
|
|
40
40
|
|
41
41
|
OUTPUT_DIR := ./
|
42
42
|
|
43
|
-
NRT_INCLUDE_DIR := $(shell python -c "from numba.cuda.
|
43
|
+
NRT_INCLUDE_DIR := $(shell python -c "from numba.cuda.memory_management.nrt import get_include; print(get_include())")
|
44
44
|
|
45
45
|
all:
|
46
46
|
@echo "GPU CC: $(GPU_CC)"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
_numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
|
2
2
|
_numba_cuda_redirector.py,sha256=n_r8MYbu5-vcXMnLJW147k8DnFXXvgb7nPIXnlXwTyQ,2659
|
3
|
-
numba_cuda/VERSION,sha256=
|
3
|
+
numba_cuda/VERSION,sha256=2EyeWWx9apTl90V5742JEqgHsNKFgkdJAK0137Pt_PQ,7
|
4
4
|
numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
|
5
5
|
numba_cuda/_version.py,sha256=nzrrJXi85d18m6SPdsPsetJNClDETkmF1MrEhGLYDBs,734
|
6
6
|
numba_cuda/numba/cuda/__init__.py,sha256=3siqMXEKqa9ezQ8RxPC3KMdebUjgJt-EKxxV4CX9818,607
|
@@ -9,7 +9,7 @@ numba_cuda/numba/cuda/api_util.py,sha256=jK8oUD3zf_D5IX7vbjc3uY_5kmOxwgEqO2m_lDH
|
|
9
9
|
numba_cuda/numba/cuda/args.py,sha256=UlTHTJpwPeCtnW0Bb-Wetm5UO9TPR-PCgIt5ys8b8tQ,1894
|
10
10
|
numba_cuda/numba/cuda/bf16.py,sha256=PXuitxHhPMjnti3g9IOSoL90ofGgVRcDfqFg7AqCXpU,1778
|
11
11
|
numba_cuda/numba/cuda/cg.py,sha256=n-sBj05ut6U_GgFIq-PTCjPad4nXWAc0GVg_J9xD_Pc,1602
|
12
|
-
numba_cuda/numba/cuda/codegen.py,sha256=
|
12
|
+
numba_cuda/numba/cuda/codegen.py,sha256=u2J0mRRDBiPceB1G5WR4KQ0KUFGGawaDaaoUf9zLQzE,16719
|
13
13
|
numba_cuda/numba/cuda/compiler.py,sha256=aZwEVP8KXCIyccSw4vJyG6Qaai9oXsFuBAo_Ghwwai4,25607
|
14
14
|
numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=8lUPmU6FURxphzEqkPLZRPYBCEK_wmDtHq2voPkckfs,950
|
15
15
|
numba_cuda/numba/cuda/cuda_paths.py,sha256=kMIJ_1yV2qtcKEM5rCgSDJ3Gz7bgxbfAWh54E5cDndg,15872
|
@@ -21,7 +21,7 @@ numba_cuda/numba/cuda/decorators.py,sha256=bR8yOAIC68lhm8mSMU-DUt1qFrEogbmSAtzAI
|
|
21
21
|
numba_cuda/numba/cuda/descriptor.py,sha256=t1rSVJSCAlVACC5_Un3FQ7iubdTTBe-euqz88cvs2tI,985
|
22
22
|
numba_cuda/numba/cuda/device_init.py,sha256=Rtwd6hQMHMLMkj6MXtndbWYFJfkIaRe0MwOIJF2nzhU,3449
|
23
23
|
numba_cuda/numba/cuda/deviceufunc.py,sha256=zj9BbLiZD-dPttHew4olw8ANgR2nXnXEE9qjCeGLrQI,30731
|
24
|
-
numba_cuda/numba/cuda/dispatcher.py,sha256=
|
24
|
+
numba_cuda/numba/cuda/dispatcher.py,sha256=m8kXKk08ldcW7Cl3KpFxsKMTxVgZeRJke9bKzO6_JjE,43172
|
25
25
|
numba_cuda/numba/cuda/errors.py,sha256=WRso1Q_jCoWP5yrDBMhihRhhVtVo1-7KdN8QVE9j46o,1712
|
26
26
|
numba_cuda/numba/cuda/extending.py,sha256=VwuU5F0AQFlJsqaiwoWk-6Itihew1FsjVT_BVjhY8Us,2278
|
27
27
|
numba_cuda/numba/cuda/initialize.py,sha256=0SnpjccQEYiWITIyfAJx833H1yhYFFDY42EpnwYyMn8,487
|
@@ -42,7 +42,7 @@ numba_cuda/numba/cuda/reshape_funcs.cu,sha256=frw1uoeMSYlkPC38LiKE8Tz2P70X2e4UZG
|
|
42
42
|
numba_cuda/numba/cuda/simulator_init.py,sha256=Hvzty6NJp1SeKspyb-b887xpeNLMMI0x9aPmV--X77E,450
|
43
43
|
numba_cuda/numba/cuda/stubs.py,sha256=JMs4Xg8IHlAq5L6SBYWcYNzXfJGM6v0lZCQaOb5x9CQ,23014
|
44
44
|
numba_cuda/numba/cuda/target.py,sha256=ymYBdkt7iNK_PJCfyqupKpcSj7j-UQzkWIq3KjoLBD8,12963
|
45
|
-
numba_cuda/numba/cuda/testing.py,sha256=
|
45
|
+
numba_cuda/numba/cuda/testing.py,sha256=a5Ay9z7x9vPiFfbfuwIfb3uyA_OSAchQZtye4u71UKE,6995
|
46
46
|
numba_cuda/numba/cuda/types.py,sha256=hC1MUvgUwy-SLgbzFzXwssJzPR8BxQwqUcjwGJFzVac,1317
|
47
47
|
numba_cuda/numba/cuda/ufuncs.py,sha256=AJifQgapyv62fdJeMm939R1I5TvIRmaA8dJ83Jy8DCw,23559
|
48
48
|
numba_cuda/numba/cuda/utils.py,sha256=VRphC0PLr8Klq3D1FMONu4aRdVO23HOCBg4bxnsqmfc,785
|
@@ -61,7 +61,7 @@ numba_cuda/numba/cuda/cudadrv/libs.py,sha256=qjknQxYXd2ucwDLQqzhWC_srNg6FnwvcVHI
|
|
61
61
|
numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=IZ13laEG_altDQyi9HkdMcwW-YYEIn2erqz6AnYsqHg,2808
|
62
62
|
numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=9uEs1KepeVGRbEpVhLjtxSsvZpZsbrHnPywmx--y88A,804
|
63
63
|
numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
|
64
|
-
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=
|
64
|
+
numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=UD8kASyGUU896tNWAtVxmbzDTP5jDbiOAZjCsELOg6U,14986
|
65
65
|
numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=7tTy6-VEbMBpDUmuSMnUwqPFfBndTh3aPq_n7nxhEA0,26344
|
66
66
|
numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=J6PRGGK07XSLRzgCw5xs8VU5xVoqavvhojk1mxiQsi4,226
|
67
67
|
numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=CFumwg4iblWap_E7l7GM_hMYz1PsbH81-N0tZwFFooA,4372
|
@@ -76,31 +76,38 @@ numba_cuda/numba/cuda/include/12/cuda_fp16.hpp,sha256=o1ITDmuN67N8YUGUcvTpV3IdpS
|
|
76
76
|
numba_cuda/numba/cuda/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
77
77
|
numba_cuda/numba/cuda/kernels/reduction.py,sha256=RsVubg8uNumxNxo9HBlFVCDicA-KZKsksKId0ktgQyY,9101
|
78
78
|
numba_cuda/numba/cuda/kernels/transpose.py,sha256=FbtFmOqaj_e7ARR_kkiTpSvj4BJyqBta5ci1CWtJ690,2033
|
79
|
-
numba_cuda/numba/cuda/
|
80
|
-
numba_cuda/numba/cuda/
|
81
|
-
numba_cuda/numba/cuda/
|
82
|
-
numba_cuda/numba/cuda/
|
83
|
-
numba_cuda/numba/cuda/
|
84
|
-
numba_cuda/numba/cuda/
|
85
|
-
numba_cuda/numba/cuda/simulator/__init__.py,sha256=
|
86
|
-
numba_cuda/numba/cuda/simulator/api.py,sha256=
|
87
|
-
numba_cuda/numba/cuda/simulator/
|
88
|
-
numba_cuda/numba/cuda/simulator/
|
89
|
-
numba_cuda/numba/cuda/simulator/
|
79
|
+
numba_cuda/numba/cuda/memory_management/__init__.py,sha256=r4RWOgWft81ChDdRaJdABHF0y_HxrTIv4mgXpo0imYA,65
|
80
|
+
numba_cuda/numba/cuda/memory_management/memsys.cu,sha256=gMBM9_Hnv3EO3Gw_GKvII8y2hGoNtwrlZ43AUjTcsVo,2387
|
81
|
+
numba_cuda/numba/cuda/memory_management/memsys.cuh,sha256=hPGBQgKyOfYY25ntoBXlhYyeXzxJyz0ByeTszkaKJUM,504
|
82
|
+
numba_cuda/numba/cuda/memory_management/nrt.cu,sha256=1hzbAKyqh9783UVdVT67ZxfvJyl_Ojt8e0AbHUC86ss,4818
|
83
|
+
numba_cuda/numba/cuda/memory_management/nrt.cuh,sha256=p2GQ-l-EfCoO0sBTyKXhIY3hxGWbPhEJcR-mLLT_V3M,2173
|
84
|
+
numba_cuda/numba/cuda/memory_management/nrt.py,sha256=6yXKBUvjIw_9BJ48iDIuckREaQVskzQAXm7uIRGFVuc,10039
|
85
|
+
numba_cuda/numba/cuda/simulator/__init__.py,sha256=ONoWJ3SwE53di0p-lFRH7NOZea2jEUWyn9sDpkOVjCw,2040
|
86
|
+
numba_cuda/numba/cuda/simulator/api.py,sha256=hFSFPIrg-aUd-MHg2GCSosFJiL8x2XRiQaqolfTGA3A,3551
|
87
|
+
numba_cuda/numba/cuda/simulator/bf16.py,sha256=1ZWkY4Adv8dY46YyorGKGQj3KEBqeet6rsyM8jwfAb4,16
|
88
|
+
numba_cuda/numba/cuda/simulator/compiler.py,sha256=bK3qZhZ5ZszjGEtR-a5q-0wqKk_mjZ8Z5yXFsRVu4kE,263
|
89
|
+
numba_cuda/numba/cuda/simulator/kernel.py,sha256=9SWZwD0kLQ3phRUIxZJdzxKiOgLQ6FCbyxVGC52niwM,10448
|
90
|
+
numba_cuda/numba/cuda/simulator/kernelapi.py,sha256=wAKQih_0KRVgtMgOW2SpfmKQhZxU-D2-9D6SZc78CXM,12662
|
90
91
|
numba_cuda/numba/cuda/simulator/reduction.py,sha256=_l5xzoKfoDoaSWW5uFOkXz0YKA6XBqL84wUFNLLNCgM,301
|
91
92
|
numba_cuda/numba/cuda/simulator/vector_types.py,sha256=bN347hH6w2khtYD42d_VT8n1lAmWCee1QUxtGvpdzBg,1692
|
93
|
+
numba_cuda/numba/cuda/simulator/_internal/__init__.py,sha256=arjWguQQhHaD0pAQe9XaC8gI6eye0lboHAxd6TgCXHQ,67
|
94
|
+
numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
92
95
|
numba_cuda/numba/cuda/simulator/cudadrv/__init__.py,sha256=P8VK4Q8w7H_-sagOYWNLeYjWsmoI6_KcCAceBmSyHx0,120
|
93
96
|
numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py,sha256=On6Vx4WQC79yWr8ayUOts00sOADsCITVxLMnuiG-4LI,13503
|
94
97
|
numba_cuda/numba/cuda/simulator/cudadrv/devices.py,sha256=7kq0XuzlgFeZPlYiFl1smEEmR0BCigalkt-CODsfPCo,2691
|
95
|
-
numba_cuda/numba/cuda/simulator/cudadrv/driver.py,sha256
|
98
|
+
numba_cuda/numba/cuda/simulator/cudadrv/driver.py,sha256=-o16cYfkuta7BgltIBcprvQL81yQwlzZ4xpy9DD6V9Q,1242
|
96
99
|
numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py,sha256=MfYr-UGYhMaysqxsEstyTvmV5Gee6mFZb3PCvcLywos,111
|
97
100
|
numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py,sha256=DYIpIehz3cZSane651UYdJP5fehDuJkxyCg_90A1heU,163
|
98
101
|
numba_cuda/numba/cuda/simulator/cudadrv/error.py,sha256=ACSQ7ZvhuCHnvV4GmvRuKWZ5bBLVzq7ncZ75oiWyLdM,87
|
99
|
-
numba_cuda/numba/cuda/simulator/cudadrv/libs.py,sha256=
|
102
|
+
numba_cuda/numba/cuda/simulator/cudadrv/libs.py,sha256=pmAZxP40Md1B8sEBlkQ0ckYraSiuxH7tt3TmjvXDmr0,201
|
103
|
+
numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py,sha256=u8I2keYsmAcWaSvQhTLiBxX_O4qJhkMrK15esx0m1PM,1192
|
104
|
+
numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py,sha256=OSE_gPUqqxdIMs01yacVT-eGwmirQAY4DIve15wODRI,216
|
100
105
|
numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py,sha256=AFJg67iVcXROuj2ncHcmpkBsooEbEK8CQVOvxA_RUrc,474
|
101
106
|
numba_cuda/numba/cuda/simulator/cudadrv/runtime.py,sha256=OtbGKMObt5eTPbVV8G5bKviT2sKiGLQ6z9crC1wfiCQ,360
|
107
|
+
numba_cuda/numba/cuda/simulator/memory_management/__init__.py,sha256=-_jZfXlheXqZDSGW-sHYykVl0dDvljpEg4Pp_f0EZC4,37
|
108
|
+
numba_cuda/numba/cuda/simulator/memory_management/nrt.py,sha256=1wcgPJtKUjqzP3KDDE1F_fwVR0S2emBeEPfHBdfnKkU,101
|
102
109
|
numba_cuda/numba/cuda/tests/__init__.py,sha256=qrIMTeP56g1SJlyekm4vQ3fcsWGR9qGTnMfE_uq0eeQ,2387
|
103
|
-
numba_cuda/numba/cuda/tests/support.py,sha256=
|
110
|
+
numba_cuda/numba/cuda/tests/support.py,sha256=IpWXM2pELCeoqdQIUsvy9Rsm460omp15HMMpJsxTt9U,263
|
104
111
|
numba_cuda/numba/cuda/tests/cudadrv/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
|
105
112
|
numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py,sha256=np2UccpkNqLFLcbjJNHGkCJF6TZIyFljB6UXD7U71nM,5299
|
106
113
|
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py,sha256=m7q1bEsH3rJD2gngB9RAjQuvjr0FB-GrOjLCWryI1lQ,4495
|
@@ -110,7 +117,7 @@ numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py,sha256=JkMbKFa3CBS
|
|
110
117
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py,sha256=bn9OBNmNq5WTgv5LXQTyi-3V3auKbIBNoC-vNfzeX9I,7536
|
111
118
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py,sha256=KWGON5OSb5Vp74QFDIiupK6ytMwwwDfbYqpENAB4lGE,801
|
112
119
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py,sha256=nN1pk7CEm4j8A6XYlDpIWkpFOSO7IGz-7rwa0fFnerY,6485
|
113
|
-
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py,sha256=
|
120
|
+
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py,sha256=MzKXO2RLCHA_0XU29JfjaLrmKTwwu7PA3cspTfQgCPM,21699
|
114
121
|
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py,sha256=VKYRuIOPdEWkI-6E6-pRCNC1U4-Qxi0d-jX_q_7x1dI,8420
|
115
122
|
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py,sha256=DUYZeNlDgL1mQN1xHDYzTRfc-zetuikcULyULcRaC1A,2657
|
116
123
|
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py,sha256=xom2V_KImeDX5AN9XKcWln_MFhIvPrwJDxJWvE6Nspw,7070
|
@@ -121,9 +128,9 @@ numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py,sha256=B_fYsBUpd9SxYSOmuW
|
|
121
128
|
numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py,sha256=0KPe4E9wOZsSV_0QI0LmjUeMTjWpYT8BXExUUsmUCDI,394
|
122
129
|
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py,sha256=ymv2ujRLLIIURikNEdC0SshJFwXhIx9j462va_QvPTw,10133
|
123
130
|
numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py,sha256=2tkf766GjIta_wL5NGlMIqmrDMFN2rZmnP_c9A8cWA8,5084
|
124
|
-
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py,sha256=
|
131
|
+
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py,sha256=176Ma2ZVLnc4w4bfYwbF1eeRq3x3rbOvDieRJLSuNpI,8413
|
125
132
|
numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py,sha256=9MLFEXn7DnLkuuXK_qjilA1jxQwC-AeSBOcRYzZogRY,1513
|
126
|
-
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=
|
133
|
+
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=8SSSAotk8rhGClwxQCnwL_JhoD9NbvXxEa7KfjaZO3M,11551
|
127
134
|
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=1r817QeIrIEs8BcK0XKBR9g_mkO3e7WI-oW-sNsO_Ho,7353
|
128
135
|
numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py,sha256=PGuv4bt9qiIGlkLhyQCOXFIf1SK5Nj-RjcpWqeO1TMM,943
|
129
136
|
numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py,sha256=xbSFmvqOIcWY-TI9p1MDcGwE-24iaK4j-_UenMvTnR4,508
|
@@ -140,13 +147,13 @@ numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py,sha256=2i_xq4B1t1tctr6
|
|
140
147
|
numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx,sha256=PKVafUhDH1SKRWXkt4N3v8SDMh4RyDFiJM-CMksa5uc,519
|
141
148
|
numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py,sha256=wrWx8AeRhBHM74iYPKKrZqiyWrYCtQU3J-g3Zv7JmoY,1782
|
142
149
|
numba_cuda/numba/cuda/tests/cudapy/test_alignment.py,sha256=RkhAcVkGtze8JpZTlYYvqTesDYE7xfKQZd1izgxDQpU,1219
|
143
|
-
numba_cuda/numba/cuda/tests/cudapy/test_array.py,sha256=
|
144
|
-
numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py,sha256=
|
150
|
+
numba_cuda/numba/cuda/tests/cudapy/test_array.py,sha256=tqALZEr39aM8ZzbnEuOpKTOWb6Diz6Eti1i0a6WyI7k,13005
|
151
|
+
numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py,sha256=Bkz6z-GrSbwAjI9YQMcTKnFMNIQv5zyALYJZtaet9w0,9059
|
145
152
|
numba_cuda/numba/cuda/tests/cudapy/test_array_args.py,sha256=iiFrt5Yn7gfheAGOYG2VBeWeuW3JlBhRLXNfSz4cHAA,4982
|
146
153
|
numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py,sha256=SWa1MvpwG07yBkrFIUeM9pm3BIwUbhttMNBdUW-CpSM,969
|
147
154
|
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py,sha256=agsfUN3WOoh6ICAECtuMuxZNcKq5ivK30Ew3h_m76m0,57689
|
148
|
-
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py,sha256=
|
149
|
-
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py,sha256=
|
155
|
+
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py,sha256=TpHRxyIwvwv6KUKTS15ukYVDTcT5iYsNp8fCcocY7M0,1573
|
156
|
+
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py,sha256=wvEbMdlmqAZvlZoUgFG-KC0w13CeMfMdR-gf2lN0Sj0,7612
|
150
157
|
numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py,sha256=0_wr6MSeHh0QVzPeH8SB7j0Nv_RrPAK01hNoQ_dGT5I,4417
|
151
158
|
numba_cuda/numba/cuda/tests/cudapy/test_boolean.py,sha256=j4mIOv4rJTLjJzpKk1O9UFLT41_iOQRtwsmteXdKZ-M,547
|
152
159
|
numba_cuda/numba/cuda/tests/cudapy/test_caching.py,sha256=obUSTJSP2Lh-YNElq8PZpVnRJOeq-uqV_VyLHtsXwAw,18427
|
@@ -157,7 +164,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_complex.py,sha256=hmAcyZim46yueXZDqDSJYq
|
|
157
164
|
numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py,sha256=KIuXQ0ihgQQXM-eH7s3xAxhKe35YL1qDTHCVTWA4ut8,497
|
158
165
|
numba_cuda/numba/cuda/tests/cudapy/test_const_string.py,sha256=li1UsV5vc2M01cJ7k6_526VPtuAOAKr8e7kb1CDUXi4,4323
|
159
166
|
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py,sha256=ZWmyKvFokRMjqyXjVpZVOnR6LR694GWcbUn2jVEQV14,5170
|
160
|
-
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py,sha256=
|
167
|
+
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py,sha256=3OkjhcjPp_P3Pnc1zbteGpAGpoN07cG8Xtdnunx5yWA,5973
|
161
168
|
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=RXCNHAZM35sbUf3Gi-x2E8-a6BmhFb2rhQkBOeiS_fo,15757
|
162
169
|
numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=8prL2FTiaajW-UHSL9al-nBniygOfpdAOT_Dkej4PWI,2138
|
163
170
|
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=MnOeDWMz-rL3-07FsswM06Laxmm0KjTmTwhrP3rmchQ,3526
|
@@ -165,10 +172,10 @@ numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=1P369s02AvGu7fSIEe_YxSgh
|
|
165
172
|
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=AE8D4U4dAv4nYP9oatDwROW6knpJ0-iggP4BaHymo6g,13170
|
166
173
|
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=LNGBZfqFGUtVVQeC6FcHo8T3DbG-j6AjeBwJmwp9HH4,13157
|
167
174
|
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=Oc6CdI1j9Ad_wklHdIYSMytrzUpzK6oXD0BGe45sTwg,26636
|
168
|
-
numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=
|
175
|
+
numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=VQGPLcTbT1nhS1BE4VALK-TaQEsPec5zu-XVlWV0sHA,4593
|
169
176
|
numba_cuda/numba/cuda/tests/cudapy/test_errors.py,sha256=w6ipW9UIvUD_ZIt_6fQ-uJsHyKLyHVqv2bym-9vyGyY,2757
|
170
177
|
numba_cuda/numba/cuda/tests/cudapy/test_exception.py,sha256=W5NF022DOOTaEjFmhfr8BnfhRXvYyXHiGwznQrm_9T4,5507
|
171
|
-
numba_cuda/numba/cuda/tests/cudapy/test_extending.py,sha256=
|
178
|
+
numba_cuda/numba/cuda/tests/cudapy/test_extending.py,sha256=G6KcFAiJnDEfa5f7HW72Ocqxrv6xRvGMRTbwttTsuec,8678
|
172
179
|
numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py,sha256=fiUoOiwWjctZNFN-DGw1A8eGfHLqNulo2OQ7v1DFS9o,8552
|
173
180
|
numba_cuda/numba/cuda/tests/cudapy/test_forall.py,sha256=Ory5s-_9MauSCP2RuWUEmcGFvP0kS7ytV-3iYPFYR6o,1470
|
174
181
|
numba_cuda/numba/cuda/tests/cudapy/test_freevar.py,sha256=JvWn7Lw137HI61mouKnPvDxZIqLppiCF_351osxQQYE,753
|
@@ -259,15 +266,15 @@ numba_cuda/numba/cuda/tests/nocuda/test_import.py,sha256=avrMV0jlve3KmDOrYtCeQ4r
|
|
259
266
|
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=O_S_HG59Ak071b43BQ2s-xJDw9D8Iy_H1-CechHOZnc,7948
|
260
267
|
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=E_OdYlxgfRTFKONCMlgjvwvwHQkPS1ne1KTwzBDH9GE,1968
|
261
268
|
numba_cuda/numba/cuda/tests/nrt/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
|
262
|
-
numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=
|
263
|
-
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=
|
264
|
-
numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=
|
265
|
-
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=
|
269
|
+
numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=2yzKcfpaAYuxO2xG9BO2BySyMXkQFki3yEEZobuoqPA,12720
|
270
|
+
numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=vishrSKwg0TRQ-FAEhRh7-Xbdg6Q8xVRy4C4-T1s5Pg,3212
|
271
|
+
numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=Gf5Q06VWY2DhUS6hZrLundTfUzWHXhr8LsbAddzdTE8,3641
|
272
|
+
numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=QRFDUQFsIk6zQ7U3sCK2a0F40pOmJdUzdP3deUjCOwk,5182
|
266
273
|
numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu,sha256=T9ubst3fFUK7EXyXXMi73wAban3VFFQ986cY5OcKfvI,157
|
267
274
|
numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=IB5t-dVhrKVoue3AbUx3yVMxPG0hBF_yZbzb4642sf0,538
|
268
275
|
numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
|
269
|
-
numba_cuda-0.
|
270
|
-
numba_cuda-0.
|
271
|
-
numba_cuda-0.
|
272
|
-
numba_cuda-0.
|
273
|
-
numba_cuda-0.
|
276
|
+
numba_cuda-0.13.0.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
|
277
|
+
numba_cuda-0.13.0.dist-info/METADATA,sha256=clEe3q5Jb4S4sixwT6RAgkGqLieoRYtWoyWEqBvSyZk,1859
|
278
|
+
numba_cuda-0.13.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
279
|
+
numba_cuda-0.13.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
|
280
|
+
numba_cuda-0.13.0.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
from numba.cuda.runtime.nrt import rtsys # noqa: F401
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|