numba-cuda 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/cuda_paths.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devices.py +4 -6
- numba_cuda/numba/cuda/cudadrv/driver.py +103 -49
- numba_cuda/numba/cuda/dispatcher.py +2 -3
- numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +16 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +5 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +1 -5
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +2 -5
- {numba_cuda-0.16.0.dist-info → numba_cuda-0.17.0.dist-info}/METADATA +2 -2
- {numba_cuda-0.16.0.dist-info → numba_cuda-0.17.0.dist-info}/RECORD +16 -16
- {numba_cuda-0.16.0.dist-info → numba_cuda-0.17.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.16.0.dist-info → numba_cuda-0.17.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.16.0.dist-info → numba_cuda-0.17.0.dist-info}/top_level.txt +0 -0
numba_cuda/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.17.0
|
@@ -100,7 +100,7 @@ def _get_nvrtc_system_ctk():
|
|
100
100
|
def _get_nvrtc_path_decision():
|
101
101
|
options = _build_options(
|
102
102
|
[
|
103
|
-
("CUDA_HOME", lambda: get_cuda_home(
|
103
|
+
("CUDA_HOME", lambda: get_cuda_home(_cudalib_path())),
|
104
104
|
("Conda environment", get_conda_ctk),
|
105
105
|
("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk),
|
106
106
|
("NVIDIA NVCC Wheel", _get_nvrtc_wheel),
|
@@ -40,6 +40,8 @@ class _DeviceList(object):
|
|
40
40
|
"""
|
41
41
|
Returns the context manager for device *devnum*.
|
42
42
|
"""
|
43
|
+
if not isinstance(devnum, (int, slice)) and USE_NV_BINDING:
|
44
|
+
devnum = int(devnum)
|
43
45
|
return self.lst[devnum]
|
44
46
|
|
45
47
|
def __str__(self):
|
@@ -159,12 +161,8 @@ class _Runtime(object):
|
|
159
161
|
# Get primary context for the active device
|
160
162
|
ctx = self.gpus[ac.devnum].get_primary_context()
|
161
163
|
# Is active context the primary context?
|
162
|
-
|
163
|
-
|
164
|
-
ac_ctx_handle = int(ac.context_handle)
|
165
|
-
else:
|
166
|
-
ctx_handle = ctx.handle.value
|
167
|
-
ac_ctx_handle = ac.context_handle.value
|
164
|
+
ctx_handle = ctx.handle.value
|
165
|
+
ac_ctx_handle = ac.context_handle.value
|
168
166
|
if ctx_handle != ac_ctx_handle:
|
169
167
|
msg = (
|
170
168
|
"Numba cannot operate on non-primary"
|
@@ -490,11 +490,11 @@ class Driver(object):
|
|
490
490
|
with self.get_active_context() as ac:
|
491
491
|
if ac.devnum is not None:
|
492
492
|
if USE_NV_BINDING:
|
493
|
-
|
493
|
+
popped = drvapi.cu_context(int(driver.cuCtxPopCurrent()))
|
494
494
|
else:
|
495
495
|
popped = drvapi.cu_context()
|
496
496
|
driver.cuCtxPopCurrent(byref(popped))
|
497
|
-
|
497
|
+
return popped
|
498
498
|
|
499
499
|
def get_active_context(self):
|
500
500
|
"""Returns an instance of ``_ActiveContext``."""
|
@@ -538,6 +538,8 @@ class _ActiveContext(object):
|
|
538
538
|
hctx = driver.cuCtxGetCurrent()
|
539
539
|
if int(hctx) == 0:
|
540
540
|
hctx = None
|
541
|
+
else:
|
542
|
+
hctx = drvapi.cu_context(int(hctx))
|
541
543
|
else:
|
542
544
|
hctx = drvapi.cu_context(0)
|
543
545
|
driver.cuCtxGetCurrent(byref(hctx))
|
@@ -716,6 +718,7 @@ class Device(object):
|
|
716
718
|
# create primary context
|
717
719
|
if USE_NV_BINDING:
|
718
720
|
hctx = driver.cuDevicePrimaryCtxRetain(self.id)
|
721
|
+
hctx = drvapi.cu_context(int(hctx))
|
719
722
|
else:
|
720
723
|
hctx = drvapi.cu_context()
|
721
724
|
driver.cuDevicePrimaryCtxRetain(byref(hctx), self.id)
|
@@ -1254,6 +1257,7 @@ class _PendingDeallocs(object):
|
|
1254
1257
|
[dtor, handle, size] = self._cons.popleft()
|
1255
1258
|
_logger.info("dealloc: %s %s bytes", dtor.__name__, size)
|
1256
1259
|
dtor(handle)
|
1260
|
+
|
1257
1261
|
self._size = 0
|
1258
1262
|
|
1259
1263
|
@contextlib.contextmanager
|
@@ -1430,7 +1434,10 @@ class Context(object):
|
|
1430
1434
|
"""
|
1431
1435
|
Pushes this context on the current CPU Thread.
|
1432
1436
|
"""
|
1433
|
-
|
1437
|
+
if USE_NV_BINDING:
|
1438
|
+
driver.cuCtxPushCurrent(self.handle.value)
|
1439
|
+
else:
|
1440
|
+
driver.cuCtxPushCurrent(self.handle)
|
1434
1441
|
self.prepare_for_use()
|
1435
1442
|
|
1436
1443
|
def pop(self):
|
@@ -1439,10 +1446,7 @@ class Context(object):
|
|
1439
1446
|
must be at the top of the context stack, otherwise an error will occur.
|
1440
1447
|
"""
|
1441
1448
|
popped = driver.pop_active_context()
|
1442
|
-
|
1443
|
-
assert int(popped) == int(self.handle)
|
1444
|
-
else:
|
1445
|
-
assert popped.value == self.handle.value
|
1449
|
+
assert popped.value == self.handle.value
|
1446
1450
|
|
1447
1451
|
def memalloc(self, bytesize):
|
1448
1452
|
return self.memory_manager.memalloc(bytesize)
|
@@ -1535,21 +1539,25 @@ class Context(object):
|
|
1535
1539
|
|
1536
1540
|
def get_default_stream(self):
|
1537
1541
|
if USE_NV_BINDING:
|
1538
|
-
handle = binding.CUstream(CU_STREAM_DEFAULT)
|
1542
|
+
handle = drvapi.cu_stream(int(binding.CUstream(CU_STREAM_DEFAULT)))
|
1539
1543
|
else:
|
1540
1544
|
handle = drvapi.cu_stream(drvapi.CU_STREAM_DEFAULT)
|
1541
1545
|
return Stream(weakref.proxy(self), handle, None)
|
1542
1546
|
|
1543
1547
|
def get_legacy_default_stream(self):
|
1544
1548
|
if USE_NV_BINDING:
|
1545
|
-
handle =
|
1549
|
+
handle = drvapi.cu_stream(
|
1550
|
+
int(binding.CUstream(binding.CU_STREAM_LEGACY))
|
1551
|
+
)
|
1546
1552
|
else:
|
1547
1553
|
handle = drvapi.cu_stream(drvapi.CU_STREAM_LEGACY)
|
1548
1554
|
return Stream(weakref.proxy(self), handle, None)
|
1549
1555
|
|
1550
1556
|
def get_per_thread_default_stream(self):
|
1551
1557
|
if USE_NV_BINDING:
|
1552
|
-
handle =
|
1558
|
+
handle = drvapi.cu_stream(
|
1559
|
+
int(binding.CUstream(binding.CU_STREAM_PER_THREAD))
|
1560
|
+
)
|
1553
1561
|
else:
|
1554
1562
|
handle = drvapi.cu_stream(drvapi.CU_STREAM_PER_THREAD)
|
1555
1563
|
return Stream(weakref.proxy(self), handle, None)
|
@@ -1561,7 +1569,7 @@ class Context(object):
|
|
1561
1569
|
# default stream, which we define also as CU_STREAM_DEFAULT when
|
1562
1570
|
# the NV binding is in use).
|
1563
1571
|
flags = binding.CUstream_flags.CU_STREAM_DEFAULT.value
|
1564
|
-
handle = driver.cuStreamCreate(flags)
|
1572
|
+
handle = drvapi.cu_stream(int(driver.cuStreamCreate(flags)))
|
1565
1573
|
else:
|
1566
1574
|
handle = drvapi.cu_stream()
|
1567
1575
|
driver.cuStreamCreate(byref(handle), 0)
|
@@ -1575,7 +1583,7 @@ class Context(object):
|
|
1575
1583
|
if not isinstance(ptr, int):
|
1576
1584
|
raise TypeError("ptr for external stream must be an int")
|
1577
1585
|
if USE_NV_BINDING:
|
1578
|
-
handle = binding.CUstream(ptr)
|
1586
|
+
handle = drvapi.cu_stream(int(binding.CUstream(ptr)))
|
1579
1587
|
else:
|
1580
1588
|
handle = drvapi.cu_stream(ptr)
|
1581
1589
|
return Stream(weakref.proxy(self), handle, None, external=True)
|
@@ -1585,7 +1593,7 @@ class Context(object):
|
|
1585
1593
|
if not timing:
|
1586
1594
|
flags |= enums.CU_EVENT_DISABLE_TIMING
|
1587
1595
|
if USE_NV_BINDING:
|
1588
|
-
handle = driver.cuEventCreate(flags)
|
1596
|
+
handle = drvapi.cu_event(int(driver.cuEventCreate(flags)))
|
1589
1597
|
else:
|
1590
1598
|
handle = drvapi.cu_event()
|
1591
1599
|
driver.cuEventCreate(byref(handle), flags)
|
@@ -1776,14 +1784,14 @@ def _pin_finalizer(memory_manager, ptr, alloc_key, mapped):
|
|
1776
1784
|
|
1777
1785
|
def _event_finalizer(deallocs, handle):
|
1778
1786
|
def core():
|
1779
|
-
deallocs.add_item(driver.cuEventDestroy, handle)
|
1787
|
+
deallocs.add_item(driver.cuEventDestroy, handle.value)
|
1780
1788
|
|
1781
1789
|
return core
|
1782
1790
|
|
1783
1791
|
|
1784
1792
|
def _stream_finalizer(deallocs, handle):
|
1785
1793
|
def core():
|
1786
|
-
deallocs.add_item(driver.cuStreamDestroy, handle)
|
1794
|
+
deallocs.add_item(driver.cuStreamDestroy, handle.value)
|
1787
1795
|
|
1788
1796
|
return core
|
1789
1797
|
|
@@ -2054,6 +2062,9 @@ class MemoryPointer(object):
|
|
2054
2062
|
__cuda_memory__ = True
|
2055
2063
|
|
2056
2064
|
def __init__(self, context, pointer, size, owner=None, finalizer=None):
|
2065
|
+
if USE_NV_BINDING and isinstance(pointer, ctypes.c_void_p):
|
2066
|
+
pointer = binding.CUdeviceptr(pointer.value)
|
2067
|
+
|
2057
2068
|
self.context = context
|
2058
2069
|
self.device_pointer = pointer
|
2059
2070
|
self.size = size
|
@@ -2086,9 +2097,11 @@ class MemoryPointer(object):
|
|
2086
2097
|
def memset(self, byte, count=None, stream=0):
|
2087
2098
|
count = self.size if count is None else count
|
2088
2099
|
if stream:
|
2089
|
-
|
2090
|
-
|
2091
|
-
|
2100
|
+
if USE_NV_BINDING:
|
2101
|
+
handle = stream.handle.value
|
2102
|
+
else:
|
2103
|
+
handle = stream.handle
|
2104
|
+
driver.cuMemsetD8Async(self.device_pointer, byte, count, handle)
|
2092
2105
|
else:
|
2093
2106
|
driver.cuMemsetD8(self.device_pointer, byte, count)
|
2094
2107
|
|
@@ -2326,27 +2339,16 @@ class Stream(object):
|
|
2326
2339
|
weakref.finalize(self, finalizer)
|
2327
2340
|
|
2328
2341
|
def __int__(self):
|
2329
|
-
|
2330
|
-
|
2331
|
-
else:
|
2332
|
-
# The default stream's handle.value is 0, which gives `None`
|
2333
|
-
return self.handle.value or drvapi.CU_STREAM_DEFAULT
|
2342
|
+
# The default stream's handle.value is 0, which gives `None`
|
2343
|
+
return self.handle.value or drvapi.CU_STREAM_DEFAULT
|
2334
2344
|
|
2335
2345
|
def __repr__(self):
|
2336
|
-
|
2337
|
-
|
2338
|
-
|
2339
|
-
|
2340
|
-
|
2341
|
-
|
2342
|
-
ptr = int(self.handle) or 0
|
2343
|
-
else:
|
2344
|
-
default_streams = {
|
2345
|
-
drvapi.CU_STREAM_DEFAULT: "<Default CUDA stream on %s>",
|
2346
|
-
drvapi.CU_STREAM_LEGACY: "<Legacy default CUDA stream on %s>",
|
2347
|
-
drvapi.CU_STREAM_PER_THREAD: "<Per-thread default CUDA stream on %s>",
|
2348
|
-
}
|
2349
|
-
ptr = self.handle.value or drvapi.CU_STREAM_DEFAULT
|
2346
|
+
default_streams = {
|
2347
|
+
drvapi.CU_STREAM_DEFAULT: "<Default CUDA stream on %s>",
|
2348
|
+
drvapi.CU_STREAM_LEGACY: "<Legacy default CUDA stream on %s>",
|
2349
|
+
drvapi.CU_STREAM_PER_THREAD: "<Per-thread default CUDA stream on %s>",
|
2350
|
+
}
|
2351
|
+
ptr = self.handle.value or drvapi.CU_STREAM_DEFAULT
|
2350
2352
|
|
2351
2353
|
if ptr in default_streams:
|
2352
2354
|
return default_streams[ptr] % self.context
|
@@ -2360,7 +2362,11 @@ class Stream(object):
|
|
2360
2362
|
Wait for all commands in this stream to execute. This will commit any
|
2361
2363
|
pending memory transfers.
|
2362
2364
|
"""
|
2363
|
-
|
2365
|
+
if USE_NV_BINDING:
|
2366
|
+
handle = self.handle.value
|
2367
|
+
else:
|
2368
|
+
handle = self.handle
|
2369
|
+
driver.cuStreamSynchronize(handle)
|
2364
2370
|
|
2365
2371
|
@contextlib.contextmanager
|
2366
2372
|
def auto_synchronize(self):
|
@@ -2385,6 +2391,16 @@ class Stream(object):
|
|
2385
2391
|
callback will block later work in the stream and may block other
|
2386
2392
|
callbacks from being executed.
|
2387
2393
|
|
2394
|
+
.. warning::
|
2395
|
+
There is a potential for deadlock due to a lock ordering issue
|
2396
|
+
between the GIL and the CUDA driver lock when using libraries
|
2397
|
+
that call CUDA functions without releasing the GIL. This can
|
2398
|
+
occur when the callback function, which holds the CUDA driver lock,
|
2399
|
+
attempts to acquire the GIL while another thread that holds the GIL
|
2400
|
+
is waiting for the CUDA driver lock. Consider using libraries that
|
2401
|
+
properly release the GIL around CUDA operations or restructure
|
2402
|
+
your code to avoid this situation.
|
2403
|
+
|
2388
2404
|
Note: The driver function underlying this method is marked for
|
2389
2405
|
eventual deprecation and may be replaced in a future CUDA release.
|
2390
2406
|
|
@@ -2398,9 +2414,11 @@ class Stream(object):
|
|
2398
2414
|
stream_callback = binding.CUstreamCallback(ptr)
|
2399
2415
|
# The callback needs to receive a pointer to the data PyObject
|
2400
2416
|
data = id(data)
|
2417
|
+
handle = self.handle.value
|
2401
2418
|
else:
|
2402
2419
|
stream_callback = self._stream_callback
|
2403
|
-
|
2420
|
+
handle = self.handle
|
2421
|
+
driver.cuStreamAddCallback(handle, stream_callback, data, 0)
|
2404
2422
|
|
2405
2423
|
@staticmethod
|
2406
2424
|
@cu_stream_callback_pyobj
|
@@ -2417,6 +2435,16 @@ class Stream(object):
|
|
2417
2435
|
"""
|
2418
2436
|
Return an awaitable that resolves once all preceding stream operations
|
2419
2437
|
are complete. The result of the awaitable is the current stream.
|
2438
|
+
|
2439
|
+
.. warning::
|
2440
|
+
There is a potential for deadlock due to a lock ordering issue
|
2441
|
+
between the GIL and the CUDA driver lock when using libraries
|
2442
|
+
that call CUDA functions without releasing the GIL. This can
|
2443
|
+
occur when the callback function (internally used by this method),
|
2444
|
+
which holds the CUDA driver lock, attempts to acquire the GIL
|
2445
|
+
while another thread that holds the GIL is waiting for the CUDA driver lock.
|
2446
|
+
Consider using libraries that properly release the GIL around
|
2447
|
+
CUDA operations or restructure your code to avoid this situation.
|
2420
2448
|
"""
|
2421
2449
|
loop = asyncio.get_running_loop()
|
2422
2450
|
future = loop.create_future()
|
@@ -2468,27 +2496,35 @@ class Event(object):
|
|
2468
2496
|
completed.
|
2469
2497
|
"""
|
2470
2498
|
if USE_NV_BINDING:
|
2471
|
-
hstream = stream.handle if stream else binding.CUstream(0)
|
2499
|
+
hstream = stream.handle.value if stream else binding.CUstream(0)
|
2500
|
+
handle = self.handle.value
|
2472
2501
|
else:
|
2473
2502
|
hstream = stream.handle if stream else 0
|
2474
|
-
|
2503
|
+
handle = self.handle
|
2504
|
+
driver.cuEventRecord(handle, hstream)
|
2475
2505
|
|
2476
2506
|
def synchronize(self):
|
2477
2507
|
"""
|
2478
2508
|
Synchronize the host thread for the completion of the event.
|
2479
2509
|
"""
|
2480
|
-
|
2510
|
+
if USE_NV_BINDING:
|
2511
|
+
handle = self.handle.value
|
2512
|
+
else:
|
2513
|
+
handle = self.handle
|
2514
|
+
driver.cuEventSynchronize(handle)
|
2481
2515
|
|
2482
2516
|
def wait(self, stream=0):
|
2483
2517
|
"""
|
2484
2518
|
All future works submitted to stream will wait util the event completes.
|
2485
2519
|
"""
|
2486
2520
|
if USE_NV_BINDING:
|
2487
|
-
hstream = stream.handle if stream else binding.CUstream(0)
|
2521
|
+
hstream = stream.handle.value if stream else binding.CUstream(0)
|
2522
|
+
handle = self.handle.value
|
2488
2523
|
else:
|
2489
2524
|
hstream = stream.handle if stream else 0
|
2525
|
+
handle = self.handle
|
2490
2526
|
flags = 0
|
2491
|
-
driver.cuStreamWaitEvent(hstream,
|
2527
|
+
driver.cuStreamWaitEvent(hstream, handle, flags)
|
2492
2528
|
|
2493
2529
|
def elapsed_time(self, evtend):
|
2494
2530
|
return event_elapsed_time(self, evtend)
|
@@ -2499,7 +2535,9 @@ def event_elapsed_time(evtstart, evtend):
|
|
2499
2535
|
Compute the elapsed time between two events in milliseconds.
|
2500
2536
|
"""
|
2501
2537
|
if USE_NV_BINDING:
|
2502
|
-
return driver.cuEventElapsedTime(
|
2538
|
+
return driver.cuEventElapsedTime(
|
2539
|
+
evtstart.handle.value, evtend.handle.value
|
2540
|
+
)
|
2503
2541
|
else:
|
2504
2542
|
msec = c_float()
|
2505
2543
|
driver.cuEventElapsedTime(byref(msec), evtstart.handle, evtend.handle)
|
@@ -3477,7 +3515,11 @@ def host_to_device(dst, src, size, stream=0):
|
|
3477
3515
|
if stream:
|
3478
3516
|
assert isinstance(stream, Stream)
|
3479
3517
|
fn = driver.cuMemcpyHtoDAsync
|
3480
|
-
|
3518
|
+
if USE_NV_BINDING:
|
3519
|
+
handle = stream.handle.value
|
3520
|
+
else:
|
3521
|
+
handle = stream.handle
|
3522
|
+
varargs.append(handle)
|
3481
3523
|
else:
|
3482
3524
|
fn = driver.cuMemcpyHtoD
|
3483
3525
|
|
@@ -3495,7 +3537,11 @@ def device_to_host(dst, src, size, stream=0):
|
|
3495
3537
|
if stream:
|
3496
3538
|
assert isinstance(stream, Stream)
|
3497
3539
|
fn = driver.cuMemcpyDtoHAsync
|
3498
|
-
|
3540
|
+
if USE_NV_BINDING:
|
3541
|
+
handle = stream.handle.value
|
3542
|
+
else:
|
3543
|
+
handle = stream.handle
|
3544
|
+
varargs.append(handle)
|
3499
3545
|
else:
|
3500
3546
|
fn = driver.cuMemcpyDtoH
|
3501
3547
|
|
@@ -3513,7 +3559,11 @@ def device_to_device(dst, src, size, stream=0):
|
|
3513
3559
|
if stream:
|
3514
3560
|
assert isinstance(stream, Stream)
|
3515
3561
|
fn = driver.cuMemcpyDtoDAsync
|
3516
|
-
|
3562
|
+
if USE_NV_BINDING:
|
3563
|
+
handle = stream.handle.value
|
3564
|
+
else:
|
3565
|
+
handle = stream.handle
|
3566
|
+
varargs.append(handle)
|
3517
3567
|
else:
|
3518
3568
|
fn = driver.cuMemcpyDtoD
|
3519
3569
|
|
@@ -3534,7 +3584,11 @@ def device_memset(dst, val, size, stream=0):
|
|
3534
3584
|
if stream:
|
3535
3585
|
assert isinstance(stream, Stream)
|
3536
3586
|
fn = driver.cuMemsetD8Async
|
3537
|
-
|
3587
|
+
if USE_NV_BINDING:
|
3588
|
+
handle = stream.handle.value
|
3589
|
+
else:
|
3590
|
+
handle = stream.handle
|
3591
|
+
varargs.append(handle)
|
3538
3592
|
else:
|
3539
3593
|
fn = driver.cuMemsetD8
|
3540
3594
|
|
@@ -458,11 +458,10 @@ class _Kernel(serialize.ReduceMixin):
|
|
458
458
|
self._prepare_args(t, v, stream, retr, kernelargs)
|
459
459
|
|
460
460
|
if driver.USE_NV_BINDING:
|
461
|
-
|
461
|
+
stream_handle = stream and stream.handle.value or 0
|
462
462
|
else:
|
463
463
|
zero_stream = None
|
464
|
-
|
465
|
-
stream_handle = stream and stream.handle or zero_stream
|
464
|
+
stream_handle = stream and stream.handle or zero_stream
|
466
465
|
|
467
466
|
# Invoke kernel
|
468
467
|
driver.launch_kernel(
|
@@ -25,6 +25,19 @@ class TestContextStack(CUDATestCase):
|
|
25
25
|
gpulist = list(cuda.gpus)
|
26
26
|
self.assertGreater(len(gpulist), 0)
|
27
27
|
|
28
|
+
def test_gpus_cudevice_indexing(self):
|
29
|
+
"""Test that CUdevice objects can be used to index into cuda.gpus"""
|
30
|
+
# When using the CUDA Python bindings, the device ids are CUdevice
|
31
|
+
# objects, otherwise they are integers. We test that the device id is
|
32
|
+
# usable as an index into cuda.gpus.
|
33
|
+
device_ids = [device.id for device in cuda.list_devices()]
|
34
|
+
for device_id in device_ids:
|
35
|
+
with cuda.gpus[device_id]:
|
36
|
+
# Check that the device is an integer if not using the CUDA
|
37
|
+
# Python bindings, otherwise it's a CUdevice object
|
38
|
+
assert isinstance(device_id, int) != driver.USE_NV_BINDING
|
39
|
+
self.assertEqual(cuda.gpus.current.id, device_id)
|
40
|
+
|
28
41
|
|
29
42
|
class TestContextAPI(CUDATestCase):
|
30
43
|
def tearDown(self):
|
@@ -82,7 +95,8 @@ class Test3rdPartyContext(CUDATestCase):
|
|
82
95
|
the_driver = driver.driver
|
83
96
|
if driver.USE_NV_BINDING:
|
84
97
|
dev = driver.binding.CUdevice(0)
|
85
|
-
|
98
|
+
binding_hctx = the_driver.cuDevicePrimaryCtxRetain(dev)
|
99
|
+
hctx = driver.drvapi.cu_context(int(binding_hctx))
|
86
100
|
else:
|
87
101
|
dev = 0
|
88
102
|
hctx = driver.drvapi.cu_context()
|
@@ -93,10 +107,7 @@ class Test3rdPartyContext(CUDATestCase):
|
|
93
107
|
# Check that the context from numba matches the created primary
|
94
108
|
# context.
|
95
109
|
my_ctx = cuda.current_context()
|
96
|
-
|
97
|
-
self.assertEqual(int(my_ctx.handle), int(ctx.handle))
|
98
|
-
else:
|
99
|
-
self.assertEqual(my_ctx.handle.value, ctx.handle.value)
|
110
|
+
self.assertEqual(my_ctx.handle.value, ctx.handle.value)
|
100
111
|
|
101
112
|
extra_work()
|
102
113
|
finally:
|
@@ -129,6 +129,10 @@ class TestCudaDriver(CUDATestCase):
|
|
129
129
|
|
130
130
|
ptr = memory.device_ctypes_pointer
|
131
131
|
|
132
|
+
stream_handle = stream.handle
|
133
|
+
if _driver.USE_NV_BINDING:
|
134
|
+
stream_handle = stream_handle.value
|
135
|
+
|
132
136
|
launch_kernel(
|
133
137
|
function.handle, # Kernel
|
134
138
|
1,
|
@@ -138,7 +142,7 @@ class TestCudaDriver(CUDATestCase):
|
|
138
142
|
1,
|
139
143
|
1, # bx, by, bz
|
140
144
|
0, # dynamic shared mem
|
141
|
-
|
145
|
+
stream_handle, # stream
|
142
146
|
[ptr],
|
143
147
|
) # arguments
|
144
148
|
|
@@ -84,7 +84,7 @@ if not config.ENABLE_CUDASIM:
|
|
84
84
|
# the tests don't try to do too much with it (e.g. open / close
|
85
85
|
# it).
|
86
86
|
self.get_ipc_handle_called = True
|
87
|
-
return "Dummy IPC handle for alloc %s" % memory.
|
87
|
+
return "Dummy IPC handle for alloc %s" % memory.device_pointer_value
|
88
88
|
|
89
89
|
@property
|
90
90
|
def interface_version(self):
|
@@ -2,7 +2,6 @@ import numpy as np
|
|
2
2
|
|
3
3
|
from numba import vectorize, guvectorize
|
4
4
|
from numba import cuda
|
5
|
-
from numba.cuda.cudadrv import driver
|
6
5
|
from numba.cuda.testing import unittest, ContextResettingTestCase, ForeignArray
|
7
6
|
from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
|
8
7
|
from numba.tests.support import linux_only, override_config
|
@@ -32,10 +31,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
|
|
32
31
|
self.assertPointersEqual(wrapped, d_arr)
|
33
32
|
|
34
33
|
def get_stream_value(self, stream):
|
35
|
-
|
36
|
-
return int(stream.handle)
|
37
|
-
else:
|
38
|
-
return stream.handle.value
|
34
|
+
return stream.handle.value
|
39
35
|
|
40
36
|
@skip_if_external_memmgr("Ownership not relevant with external memmgr")
|
41
37
|
def test_ownership(self):
|
@@ -4,7 +4,7 @@ from numba.cuda.testing import (
|
|
4
4
|
unittest,
|
5
5
|
CUDATestCase,
|
6
6
|
)
|
7
|
-
from numba import
|
7
|
+
from numba import cuda
|
8
8
|
|
9
9
|
# Basic tests that stream APIs execute on the hardware and in the simulator.
|
10
10
|
#
|
@@ -38,10 +38,7 @@ class TestStreamAPI(CUDATestCase):
|
|
38
38
|
# We don't test synchronization on the stream because it's not a real
|
39
39
|
# stream - we used a dummy pointer for testing the API, so we just
|
40
40
|
# ensure that the stream handle matches the external stream pointer.
|
41
|
-
|
42
|
-
value = int(s.handle)
|
43
|
-
else:
|
44
|
-
value = s.handle.value
|
41
|
+
value = s.handle.value
|
45
42
|
self.assertEqual(ptr, value)
|
46
43
|
|
47
44
|
@skip_unless_cudasim("External streams are usable with hardware")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: numba-cuda
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.17.0
|
4
4
|
Summary: CUDA target for Numba
|
5
5
|
Author: Anaconda Inc., NVIDIA Corporation
|
6
6
|
License: BSD 2-clause
|
@@ -75,7 +75,7 @@ If you want to manage all run-time dependencies yourself, also pass the `--no-de
|
|
75
75
|
python -m numba.runtests numba.cuda.tests
|
76
76
|
```
|
77
77
|
|
78
|
-
This should discover the`numba.cuda` module from the `numba_cuda` package. You
|
78
|
+
This should discover the `numba.cuda` module from the `numba_cuda` package. You
|
79
79
|
can check where `numba.cuda` files are being located by running
|
80
80
|
|
81
81
|
```
|
@@ -1,6 +1,6 @@
|
|
1
1
|
_numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
|
2
2
|
_numba_cuda_redirector.py,sha256=n_r8MYbu5-vcXMnLJW147k8DnFXXvgb7nPIXnlXwTyQ,2659
|
3
|
-
numba_cuda/VERSION,sha256=
|
3
|
+
numba_cuda/VERSION,sha256=AtOOmOiPSo3C6kDbt2kzhgEGb8lJILUogqFPIa2X9-4,7
|
4
4
|
numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
|
5
5
|
numba_cuda/_version.py,sha256=nzrrJXi85d18m6SPdsPsetJNClDETkmF1MrEhGLYDBs,734
|
6
6
|
numba_cuda/numba/cuda/__init__.py,sha256=xdN14yT_i5jlikiGJy7K3E1AvUTQQ4U-t4zOgYYAwBQ,3526
|
@@ -12,7 +12,7 @@ numba_cuda/numba/cuda/cg.py,sha256=n-sBj05ut6U_GgFIq-PTCjPad4nXWAc0GVg_J9xD_Pc,1
|
|
12
12
|
numba_cuda/numba/cuda/codegen.py,sha256=NyHz0jlH7_vRapHqfA4P-RWpMTWsQNiW9dUbVO0N5w8,16855
|
13
13
|
numba_cuda/numba/cuda/compiler.py,sha256=_RkTTc6oSZiZVlB8w-jleBGnqxTkmzaIo7VsE3h0-18,26016
|
14
14
|
numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=8lUPmU6FURxphzEqkPLZRPYBCEK_wmDtHq2voPkckfs,950
|
15
|
-
numba_cuda/numba/cuda/cuda_paths.py,sha256=
|
15
|
+
numba_cuda/numba/cuda/cuda_paths.py,sha256=2r2uTfoSb1Lg5RhxmJ0rt40_wZcBcmibo16uWKUlWLs,15792
|
16
16
|
numba_cuda/numba/cuda/cudadecl.py,sha256=_TXMu8SIT2hIhsPI0n05wuShtzp8NcPX88NH5y7xauU,22909
|
17
17
|
numba_cuda/numba/cuda/cudaimpl.py,sha256=q6CPqD8ZtJvY8JlpMEN--d6003_FIHoHLBqNP2McNyM,39274
|
18
18
|
numba_cuda/numba/cuda/cudamath.py,sha256=wbGjlyGVwcUAoQjgXIaAaasLdVuDSKHkf6KyID5IYBw,3979
|
@@ -21,7 +21,7 @@ numba_cuda/numba/cuda/decorators.py,sha256=wCLjm2FUWjznmKV_QK1egO03msOrJEnZTR1du
|
|
21
21
|
numba_cuda/numba/cuda/descriptor.py,sha256=t1rSVJSCAlVACC5_Un3FQ7iubdTTBe-euqz88cvs2tI,985
|
22
22
|
numba_cuda/numba/cuda/device_init.py,sha256=Rtwd6hQMHMLMkj6MXtndbWYFJfkIaRe0MwOIJF2nzhU,3449
|
23
23
|
numba_cuda/numba/cuda/deviceufunc.py,sha256=zj9BbLiZD-dPttHew4olw8ANgR2nXnXEE9qjCeGLrQI,30731
|
24
|
-
numba_cuda/numba/cuda/dispatcher.py,sha256=
|
24
|
+
numba_cuda/numba/cuda/dispatcher.py,sha256=Lc09syEjVLEWbvl2FRP6BgO4FYlDBsp06K64_OAEtsg,43386
|
25
25
|
numba_cuda/numba/cuda/errors.py,sha256=WRso1Q_jCoWP5yrDBMhihRhhVtVo1-7KdN8QVE9j46o,1712
|
26
26
|
numba_cuda/numba/cuda/extending.py,sha256=VwuU5F0AQFlJsqaiwoWk-6Itihew1FsjVT_BVjhY8Us,2278
|
27
27
|
numba_cuda/numba/cuda/flags.py,sha256=4p12gufxChLfF1fCK_B9HJRdB0Eg8k_UFU__cItqWps,743
|
@@ -52,8 +52,8 @@ numba_cuda/numba/cuda/vectorizers.py,sha256=nEfQxjSA4oCX8ZzvoqjDRygDfwzxFVDXtnjx
|
|
52
52
|
numba_cuda/numba/cuda/_internal/cuda_bf16.py,sha256=QYck6s_D85HBEsc__SAl_UZxf7SptqAk31mLv_1gzuE,152212
|
53
53
|
numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=inat2K8K1OVrgDe64FK7CyRmyFyNKcNO4p2_L79yRZ0,201
|
54
54
|
numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=xJFZwbfi7o9bzPDLxSPDTLdH6iFYi8W1AbOMmikbpgY,31225
|
55
|
-
numba_cuda/numba/cuda/cudadrv/devices.py,sha256=
|
56
|
-
numba_cuda/numba/cuda/cudadrv/driver.py,sha256=
|
55
|
+
numba_cuda/numba/cuda/cudadrv/devices.py,sha256=UB0xFDe-yyaXwe-rlkNmh2RvulU6bACktDvP41yxo8c,7974
|
56
|
+
numba_cuda/numba/cuda/cudadrv/driver.py,sha256=T_YKjr-uh-I0c7IaJJtvtPW64C_7e5G6ySDK3HdmxhQ,117173
|
57
57
|
numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=OnjYWnmy8ZlSfYouhzyYIpW-AJ3x1YHj32YcBY2xet4,16790
|
58
58
|
numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=2jycZhniMy3ncoVWQG9D8dBehTEeocBZTW43gKHL5Tc,14291
|
59
59
|
numba_cuda/numba/cuda/cudadrv/enums.py,sha256=raWKryxamWQZ5A8ivMpyYVhhwbSpaD9lu7l1_wl2W9M,23742
|
@@ -82,7 +82,7 @@ numba_cuda/numba/cuda/memory_management/memsys.cu,sha256=gMBM9_Hnv3EO3Gw_GKvII8y
|
|
82
82
|
numba_cuda/numba/cuda/memory_management/memsys.cuh,sha256=hPGBQgKyOfYY25ntoBXlhYyeXzxJyz0ByeTszkaKJUM,504
|
83
83
|
numba_cuda/numba/cuda/memory_management/nrt.cu,sha256=1hzbAKyqh9783UVdVT67ZxfvJyl_Ojt8e0AbHUC86ss,4818
|
84
84
|
numba_cuda/numba/cuda/memory_management/nrt.cuh,sha256=p2GQ-l-EfCoO0sBTyKXhIY3hxGWbPhEJcR-mLLT_V3M,2173
|
85
|
-
numba_cuda/numba/cuda/memory_management/nrt.py,sha256=
|
85
|
+
numba_cuda/numba/cuda/memory_management/nrt.py,sha256=vrC5lo6tIfbWUjBTxBr_ZjdS_WAcR5h7ZZyMylomyS0,9872
|
86
86
|
numba_cuda/numba/cuda/simulator/__init__.py,sha256=ONoWJ3SwE53di0p-lFRH7NOZea2jEUWyn9sDpkOVjCw,2040
|
87
87
|
numba_cuda/numba/cuda/simulator/api.py,sha256=hFSFPIrg-aUd-MHg2GCSosFJiL8x2XRiQaqolfTGA3A,3551
|
88
88
|
numba_cuda/numba/cuda/simulator/bf16.py,sha256=1ZWkY4Adv8dY46YyorGKGQj3KEBqeet6rsyM8jwfAb4,16
|
@@ -111,17 +111,17 @@ numba_cuda/numba/cuda/tests/__init__.py,sha256=T5bnw3yl7Xa5ZJPI6OUYBDBDOgcaGqMeS
|
|
111
111
|
numba_cuda/numba/cuda/tests/support.py,sha256=IpWXM2pELCeoqdQIUsvy9Rsm460omp15HMMpJsxTt9U,263
|
112
112
|
numba_cuda/numba/cuda/tests/cudadrv/__init__.py,sha256=GdfSq6pRVSOQwmgNi7ZFQ5l0yg4-2gNar_0Rz0buUpM,157
|
113
113
|
numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py,sha256=np2UccpkNqLFLcbjJNHGkCJF6TZIyFljB6UXD7U71nM,5299
|
114
|
-
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py,sha256=
|
114
|
+
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py,sha256=8hRDvMyl1ZQaGzPuX0PJV4UXGjFO13vh3PB0jzx_QNM,5623
|
115
115
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py,sha256=QhBQr3ZzrBMT-r132RR99UCKwrR-RwZk98RxVv5os0w,13933
|
116
116
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py,sha256=xprxASXl0g6QrOujoj07YDw3lIwu0SQbk1lGQPJHlRc,564
|
117
117
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py,sha256=JkMbKFa3CBSMSQaSWzOqJU7DE5YlwJLux6OLAmvnSJo,5654
|
118
|
-
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py,sha256=
|
118
|
+
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py,sha256=CIir5l1l8EAIn3sQZwdeDWWuQ7Ksu39NQzpYb93uHBM,7543
|
119
119
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py,sha256=KWGON5OSb5Vp74QFDIiupK6ytMwwwDfbYqpENAB4lGE,801
|
120
120
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py,sha256=_ysNhgEnvr18miDhzFWbFoQdrPRl6wHPjXAH4sGTOPo,6377
|
121
121
|
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py,sha256=MzKXO2RLCHA_0XU29JfjaLrmKTwwu7PA3cspTfQgCPM,21699
|
122
122
|
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py,sha256=VKYRuIOPdEWkI-6E6-pRCNC1U4-Qxi0d-jX_q_7x1dI,8420
|
123
123
|
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py,sha256=DUYZeNlDgL1mQN1xHDYzTRfc-zetuikcULyULcRaC1A,2657
|
124
|
-
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py,sha256=
|
124
|
+
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py,sha256=76p-pr8qPHIhf8RjmPgzOVHSpIdy6x8wzfWCH1VX-T4,7070
|
125
125
|
numba_cuda/numba/cuda/tests/cudadrv/test_events.py,sha256=TenHde9Dieie1TmwXOoDtFYWS9ZBVUHaNTVaJ0bHgw4,1075
|
126
126
|
numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py,sha256=ciy4dAK6-qrf1f8X_xJVsrIEF_a2iLc16m2kJc6nj78,2169
|
127
127
|
numba_cuda/numba/cuda/tests/cudadrv/test_init.py,sha256=mRcGOJWTUpZ533EWq4Tbp3D_aHFFcVS6c_iZqhId7I0,4494
|
@@ -167,7 +167,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py,sha256=KIuXQ0ihgQQXM-e
|
|
167
167
|
numba_cuda/numba/cuda/tests/cudapy/test_const_string.py,sha256=li1UsV5vc2M01cJ7k6_526VPtuAOAKr8e7kb1CDUXi4,4323
|
168
168
|
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py,sha256=ZWmyKvFokRMjqyXjVpZVOnR6LR694GWcbUn2jVEQV14,5170
|
169
169
|
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py,sha256=3OkjhcjPp_P3Pnc1zbteGpAGpoN07cG8Xtdnunx5yWA,5973
|
170
|
-
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=
|
170
|
+
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=GEIKRMBQX2L72nAmoET_LJjkFHwnHM6gvwsZmWaGkuc,15585
|
171
171
|
numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=8prL2FTiaajW-UHSL9al-nBniygOfpdAOT_Dkej4PWI,2138
|
172
172
|
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=MnOeDWMz-rL3-07FsswM06Laxmm0KjTmTwhrP3rmchQ,3526
|
173
173
|
numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=1P369s02AvGu7fSIEe_YxSgh3c6S72Aw1gRgmepDbQY,3383
|
@@ -221,7 +221,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_serialize.py,sha256=k0YBvZYE9CfSIPvQm5M9
|
|
221
221
|
numba_cuda/numba/cuda/tests/cudapy/test_slicing.py,sha256=9QRqT29X9CUivLGQy9tFb3CWcflCOWX1zL_qpK8ixQE,903
|
222
222
|
numba_cuda/numba/cuda/tests/cudapy/test_sm.py,sha256=xIsD9kfvZwy0S5zvV0VHTTrwPv_SnlJ1aPnziWnC4nM,14537
|
223
223
|
numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py,sha256=9W04zGf8V-RgA4703ey4JUMNNjijTc_us62UZ37_KuM,7133
|
224
|
-
numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py,sha256=
|
224
|
+
numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py,sha256=zyWmwlsKvIswD7lHvFl12KvvPTo7NQFZHC1_b_En-vs,1752
|
225
225
|
numba_cuda/numba/cuda/tests/cudapy/test_sync.py,sha256=mKBGmFoqA1AkK1EYVcwxcdTXtwSegVKLpyB01XnebbU,7815
|
226
226
|
numba_cuda/numba/cuda/tests/cudapy/test_transpose.py,sha256=qHxBXLUUyJSy69BGshzus06lBKoppZ3N-5hLqLrg8NE,3149
|
227
227
|
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py,sha256=V-gZsXP_OT9ldhVqREakU-qsLB2nMK_4nUzBCV8HZpY,10289
|
@@ -276,8 +276,8 @@ numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=
|
|
276
276
|
numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu,sha256=T9ubst3fFUK7EXyXXMi73wAban3VFFQ986cY5OcKfvI,157
|
277
277
|
numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=IB5t-dVhrKVoue3AbUx3yVMxPG0hBF_yZbzb4642sf0,538
|
278
278
|
numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
|
279
|
-
numba_cuda-0.
|
280
|
-
numba_cuda-0.
|
281
|
-
numba_cuda-0.
|
282
|
-
numba_cuda-0.
|
283
|
-
numba_cuda-0.
|
279
|
+
numba_cuda-0.17.0.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
|
280
|
+
numba_cuda-0.17.0.dist-info/METADATA,sha256=NnZAJFI68kEFW7PuhL0bIU3HxgQ1N1_70a0bi-2SuZg,3196
|
281
|
+
numba_cuda-0.17.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
282
|
+
numba_cuda-0.17.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
|
283
|
+
numba_cuda-0.17.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|