numba-cuda 0.23.0__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_compat.py +47 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +8 -2
- numba_cuda/numba/cuda/cext/_hashtable.cpp +5 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +1 -1
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +56 -8
- numba_cuda/numba/cuda/cext/mviewbuf.c +7 -1
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +4 -5
- numba_cuda/numba/cuda/codegen.py +4 -2
- numba_cuda/numba/cuda/compiler.py +5 -5
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +1 -1
- numba_cuda/numba/cuda/core/base.py +6 -10
- numba_cuda/numba/cuda/core/bytecode.py +21 -13
- numba_cuda/numba/cuda/core/byteflow.py +336 -90
- numba_cuda/numba/cuda/core/compiler.py +3 -4
- numba_cuda/numba/cuda/core/compiler_machinery.py +3 -3
- numba_cuda/numba/cuda/core/config.py +5 -7
- numba_cuda/numba/cuda/core/controlflow.py +17 -9
- numba_cuda/numba/cuda/core/inline_closurecall.py +11 -10
- numba_cuda/numba/cuda/core/interpreter.py +255 -96
- numba_cuda/numba/cuda/core/ir_utils.py +8 -17
- numba_cuda/numba/cuda/core/pythonapi.py +3 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +1 -1
- numba_cuda/numba/cuda/core/ssa.py +2 -2
- numba_cuda/numba/cuda/core/transforms.py +4 -6
- numba_cuda/numba/cuda/core/typed_passes.py +1 -1
- numba_cuda/numba/cuda/core/typeinfer.py +3 -3
- numba_cuda/numba/cuda/core/untyped_passes.py +11 -10
- numba_cuda/numba/cuda/cpython/unicode.py +2 -2
- numba_cuda/numba/cuda/cpython/unicode_support.py +1 -3
- numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -4
- numba_cuda/numba/cuda/cudadrv/driver.py +13 -11
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +71 -32
- numba_cuda/numba/cuda/debuginfo.py +10 -79
- numba_cuda/numba/cuda/deviceufunc.py +3 -6
- numba_cuda/numba/cuda/dispatcher.py +5 -19
- numba_cuda/numba/cuda/libdeviceimpl.py +1 -2
- numba_cuda/numba/cuda/lowering.py +0 -28
- numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
- numba_cuda/numba/cuda/np/arrayobj.py +7 -9
- numba_cuda/numba/cuda/np/numpy_support.py +7 -10
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +4 -3
- numba_cuda/numba/cuda/testing.py +4 -8
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +66 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +26 -4
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +61 -9
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +6 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +12 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +13 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +12 -7
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +37 -35
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +8 -7
- numba_cuda/numba/cuda/tests/support.py +11 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +1 -1
- numba_cuda/numba/cuda/typing/asnumbatype.py +37 -2
- numba_cuda/numba/cuda/typing/typeof.py +9 -16
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/METADATA +4 -13
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/RECORD +74 -73
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/top_level.txt +0 -0
|
@@ -187,41 +187,43 @@ def make_fancy_creation_kernel(vtype):
|
|
|
187
187
|
|
|
188
188
|
f4_34 = v4(f4_1) # 1 2 3 4
|
|
189
189
|
|
|
190
|
-
for v in (
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
190
|
+
for v in tuple(
|
|
191
|
+
(
|
|
192
|
+
f4_1,
|
|
193
|
+
f4_2,
|
|
194
|
+
f4_3,
|
|
195
|
+
f4_4,
|
|
196
|
+
f4_5,
|
|
197
|
+
f4_6,
|
|
198
|
+
f4_7,
|
|
199
|
+
f4_8,
|
|
200
|
+
f4_9,
|
|
201
|
+
f4_10,
|
|
202
|
+
f4_11,
|
|
203
|
+
f4_12,
|
|
204
|
+
f4_13,
|
|
205
|
+
f4_14,
|
|
206
|
+
f4_15,
|
|
207
|
+
f4_16,
|
|
208
|
+
f4_17,
|
|
209
|
+
f4_18,
|
|
210
|
+
f4_19,
|
|
211
|
+
f4_20,
|
|
212
|
+
f4_21,
|
|
213
|
+
f4_22,
|
|
214
|
+
f4_23,
|
|
215
|
+
f4_24,
|
|
216
|
+
f4_25,
|
|
217
|
+
f4_26,
|
|
218
|
+
f4_27,
|
|
219
|
+
f4_28,
|
|
220
|
+
f4_29,
|
|
221
|
+
f4_30,
|
|
222
|
+
f4_31,
|
|
223
|
+
f4_32,
|
|
224
|
+
f4_33,
|
|
225
|
+
f4_34,
|
|
226
|
+
)
|
|
225
227
|
):
|
|
226
228
|
res[j] = v.x
|
|
227
229
|
res[j + 1] = v.y
|
|
@@ -287,16 +287,17 @@ class TestCudaWarpOperations(CUDATestCase):
|
|
|
287
287
|
valid_cases = [
|
|
288
288
|
# mask: unsigned/signed integer
|
|
289
289
|
# predicate: unsigned/signed integer, boolean
|
|
290
|
-
("void(uint32[:], uint32[:], int32[:])", np.uint32, np.uint32
|
|
291
|
-
("void(int64[:], int64[:], int32[:])", np.int64, np.int64
|
|
292
|
-
("void(uint64[:], uint64[:], int32[:])", np.uint64, np.uint64
|
|
293
|
-
("void(int32[:], int32[:], int32[:])", np.int32, np.int32
|
|
294
|
-
("void(uint32[:], boolean[:], int32[:])", np.uint32, np.bool_
|
|
295
|
-
("void(uint64[:], boolean[:], int32[:])", np.uint64, np.bool_
|
|
290
|
+
("void(uint32[:], uint32[:], int32[:])", np.uint32, np.uint32),
|
|
291
|
+
("void(int64[:], int64[:], int32[:])", np.int64, np.int64),
|
|
292
|
+
("void(uint64[:], uint64[:], int32[:])", np.uint64, np.uint64),
|
|
293
|
+
("void(int32[:], int32[:], int32[:])", np.int32, np.int32),
|
|
294
|
+
("void(uint32[:], boolean[:], int32[:])", np.uint32, np.bool_),
|
|
295
|
+
("void(uint64[:], boolean[:], int32[:])", np.uint64, np.bool_),
|
|
296
296
|
]
|
|
297
297
|
|
|
298
|
-
for sig, mask_dtype, pred_dtype
|
|
298
|
+
for sig, mask_dtype, pred_dtype in valid_cases:
|
|
299
299
|
with self.subTest(sig=sig):
|
|
300
|
+
mask_val = (~np.array(0, dtype=mask_dtype)).item()
|
|
300
301
|
compiled = cuda.jit(sig)(use_vote_sync_all_with_mask)
|
|
301
302
|
ary_mask = np.full(nelem, mask_val, dtype=mask_dtype)
|
|
302
303
|
ary_pred = np.ones(nelem, dtype=pred_dtype)
|
|
@@ -38,6 +38,7 @@ from numba.cuda.datamodel.models import OpaqueModel
|
|
|
38
38
|
from numba.cuda.np import numpy_support
|
|
39
39
|
|
|
40
40
|
from numba.cuda import HAS_NUMBA
|
|
41
|
+
from numba.cuda.utils import PYVERSION
|
|
41
42
|
|
|
42
43
|
if HAS_NUMBA:
|
|
43
44
|
from numba.core.extending import (
|
|
@@ -56,6 +57,16 @@ class EnableNRTStatsMixin(object):
|
|
|
56
57
|
rtsys.memsys_disable_stats()
|
|
57
58
|
|
|
58
59
|
|
|
60
|
+
skip_if_py314 = unittest.skipIf(PYVERSION == (3, 14), "Test unstable on 3.14")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def expected_failure_py314(fn):
|
|
64
|
+
if PYVERSION == (3, 14):
|
|
65
|
+
return unittest.expectedFailure(fn)
|
|
66
|
+
else:
|
|
67
|
+
return fn
|
|
68
|
+
|
|
69
|
+
|
|
59
70
|
skip_unless_cffi = unittest.skipUnless(cffi_utils.SUPPORTED, "requires cffi")
|
|
60
71
|
|
|
61
72
|
_lnx_reason = "linux only test"
|
|
@@ -334,7 +334,7 @@ class BaseFunction(Callable):
|
|
|
334
334
|
k: _unlit_non_poison(v) for k, v in kws.items()
|
|
335
335
|
}
|
|
336
336
|
sig = temp.apply(nolitargs, nolitkws)
|
|
337
|
-
except Exception as e:
|
|
337
|
+
except Exception as e: # noqa: PERF203
|
|
338
338
|
if not isinstance(e, errors.NumbaError):
|
|
339
339
|
raise e
|
|
340
340
|
sig = None
|
|
@@ -7,6 +7,7 @@ import typing as py_typing
|
|
|
7
7
|
from numba.cuda.typing.typeof import typeof
|
|
8
8
|
from numba.cuda.core import errors
|
|
9
9
|
from numba.cuda import types
|
|
10
|
+
from numba.cuda.utils import PYVERSION
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class AsNumbaTypeRegistry:
|
|
@@ -40,8 +41,42 @@ class AsNumbaTypeRegistry:
|
|
|
40
41
|
return py_type
|
|
41
42
|
|
|
42
43
|
def _builtin_infer(self, py_type):
|
|
43
|
-
if
|
|
44
|
-
return
|
|
44
|
+
if PYVERSION in ((3, 14),):
|
|
45
|
+
# As of 3.14 the typing module has been updated to return a
|
|
46
|
+
# different type when calling: `typing.Optional[X]`.
|
|
47
|
+
#
|
|
48
|
+
# On 3.14:
|
|
49
|
+
#
|
|
50
|
+
# >>> type(typing.Optional[float])
|
|
51
|
+
# <class 'typing.Union'>
|
|
52
|
+
#
|
|
53
|
+
#
|
|
54
|
+
# On 3.13 (and presumably below):
|
|
55
|
+
#
|
|
56
|
+
# >>> type(typing._UnionGenericAlias)
|
|
57
|
+
# <class 'typing._UnionGenericAlias'>
|
|
58
|
+
#
|
|
59
|
+
#
|
|
60
|
+
# The previous implementation of this predicate used
|
|
61
|
+
# `_GenericAlias`, which was possible because `_UnionGenericAlias`
|
|
62
|
+
# is a subclass of `_GenericAlias`...
|
|
63
|
+
#
|
|
64
|
+
# >>> issubclass(typing._UnionGenericAlias, typing._GenericAlias)
|
|
65
|
+
# True
|
|
66
|
+
#
|
|
67
|
+
# However, other types, such as `typing.List[float]` remain as
|
|
68
|
+
# `typing._GenericAlias`, so that must be keept.
|
|
69
|
+
#
|
|
70
|
+
if not isinstance(
|
|
71
|
+
py_type, (py_typing.Union, py_typing._GenericAlias)
|
|
72
|
+
):
|
|
73
|
+
return
|
|
74
|
+
elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
|
|
75
|
+
# Use of underscore type `_GenericAlias`.
|
|
76
|
+
if not isinstance(py_type, py_typing._GenericAlias):
|
|
77
|
+
return
|
|
78
|
+
else:
|
|
79
|
+
raise NotImplementedError(PYVERSION)
|
|
45
80
|
|
|
46
81
|
if getattr(py_type, "__origin__", None) is py_typing.Union:
|
|
47
82
|
if len(py_type.__args__) != 2:
|
|
@@ -5,6 +5,7 @@ from collections import namedtuple
|
|
|
5
5
|
from functools import singledispatch
|
|
6
6
|
import ctypes
|
|
7
7
|
import enum
|
|
8
|
+
import operator
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
10
11
|
from numpy.random.bit_generator import BitGenerator
|
|
@@ -47,10 +48,6 @@ def typeof_impl(val, c):
|
|
|
47
48
|
"""
|
|
48
49
|
Generic typeof() implementation.
|
|
49
50
|
"""
|
|
50
|
-
tp = _typeof_buffer(val, c)
|
|
51
|
-
if tp is not None:
|
|
52
|
-
return tp
|
|
53
|
-
|
|
54
51
|
tp = getattr(val, "_numba_type_", None)
|
|
55
52
|
if tp is not None:
|
|
56
53
|
return tp
|
|
@@ -64,6 +61,10 @@ def typeof_impl(val, c):
|
|
|
64
61
|
if tp is not None:
|
|
65
62
|
return tp
|
|
66
63
|
|
|
64
|
+
tp = _typeof_buffer(val, c)
|
|
65
|
+
if tp is not None:
|
|
66
|
+
return tp
|
|
67
|
+
|
|
67
68
|
# cffi is handled here as it does not expose a public base class
|
|
68
69
|
# for exported functions or CompiledFFI instances.
|
|
69
70
|
from numba.cuda.typing import cffi_utils
|
|
@@ -318,17 +319,13 @@ def _typeof_cuda_array_interface(val, c):
|
|
|
318
319
|
Array Interface. These are typed as regular Array types, with lowering
|
|
319
320
|
handled in numba.cuda.np.arrayobj.
|
|
320
321
|
"""
|
|
321
|
-
# Only handle constants, not arguments (arguments use regular array typing)
|
|
322
|
-
if c.purpose == Purpose.argument:
|
|
323
|
-
return None
|
|
324
|
-
|
|
325
322
|
dtype = numpy_support.from_dtype(np.dtype(val["typestr"]))
|
|
326
323
|
shape = val["shape"]
|
|
327
324
|
ndim = len(shape)
|
|
328
325
|
strides = val.get("strides")
|
|
329
326
|
|
|
330
327
|
# Determine layout
|
|
331
|
-
if ndim
|
|
328
|
+
if not ndim:
|
|
332
329
|
layout = "C"
|
|
333
330
|
elif strides is None:
|
|
334
331
|
layout = "C"
|
|
@@ -340,18 +337,14 @@ def _typeof_cuda_array_interface(val, c):
|
|
|
340
337
|
c_strides = numpy_support.strides_from_shape(
|
|
341
338
|
shape, itemsize, order="C"
|
|
342
339
|
)
|
|
343
|
-
layout = (
|
|
344
|
-
"C" if all(x == y for x, y in zip(strides, c_strides)) else "A"
|
|
345
|
-
)
|
|
340
|
+
layout = "C" if all(map(operator.eq, strides, c_strides)) else "A"
|
|
346
341
|
elif strides[0] == itemsize:
|
|
347
342
|
f_strides = numpy_support.strides_from_shape(
|
|
348
343
|
shape, itemsize, order="F"
|
|
349
344
|
)
|
|
350
|
-
layout = (
|
|
351
|
-
"F" if all(x == y for x, y in zip(strides, f_strides)) else "A"
|
|
352
|
-
)
|
|
345
|
+
layout = "F" if all(map(operator.eq, strides, f_strides)) else "A"
|
|
353
346
|
else:
|
|
354
347
|
layout = "A"
|
|
355
348
|
|
|
356
|
-
readonly = val["data"]
|
|
349
|
+
_, readonly = val["data"]
|
|
357
350
|
return types.Array(dtype, ndim, layout, readonly=readonly)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: numba-cuda
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.24.0
|
|
4
4
|
Summary: CUDA target for Numba
|
|
5
5
|
Author: Anaconda Inc., NVIDIA Corporation
|
|
6
6
|
License-Expression: BSD-2-Clause
|
|
@@ -16,24 +16,15 @@ License-File: LICENSE.numba
|
|
|
16
16
|
Requires-Dist: numba>=0.60.0
|
|
17
17
|
Requires-Dist: cuda-bindings<14.0.0,>=12.9.1
|
|
18
18
|
Requires-Dist: cuda-core<1.0.0,>=0.3.2
|
|
19
|
+
Requires-Dist: packaging
|
|
19
20
|
Provides-Extra: cu12
|
|
20
21
|
Requires-Dist: cuda-bindings<13.0.0,>=12.9.1; extra == "cu12"
|
|
21
22
|
Requires-Dist: cuda-core<1.0.0,>=0.3.0; extra == "cu12"
|
|
22
|
-
Requires-Dist: cuda-
|
|
23
|
-
Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "cu12"
|
|
24
|
-
Requires-Dist: nvidia-cuda-runtime-cu12; extra == "cu12"
|
|
25
|
-
Requires-Dist: nvidia-cuda-nvrtc-cu12; extra == "cu12"
|
|
26
|
-
Requires-Dist: nvidia-nvjitlink-cu12; extra == "cu12"
|
|
27
|
-
Requires-Dist: nvidia-cuda-cccl-cu12; extra == "cu12"
|
|
23
|
+
Requires-Dist: cuda-toolkit[cccl,cudart,nvcc,nvjitlink,nvrtc]==12.*; extra == "cu12"
|
|
28
24
|
Provides-Extra: cu13
|
|
29
25
|
Requires-Dist: cuda-bindings==13.*; extra == "cu13"
|
|
30
26
|
Requires-Dist: cuda-core<1.0.0,>=0.3.2; extra == "cu13"
|
|
31
|
-
Requires-Dist: cuda-
|
|
32
|
-
Requires-Dist: nvidia-nvvm==13.*; extra == "cu13"
|
|
33
|
-
Requires-Dist: nvidia-cuda-runtime==13.*; extra == "cu13"
|
|
34
|
-
Requires-Dist: nvidia-cuda-nvrtc==13.*; extra == "cu13"
|
|
35
|
-
Requires-Dist: nvidia-nvjitlink==13.*; extra == "cu13"
|
|
36
|
-
Requires-Dist: nvidia-cuda-cccl==13.*; extra == "cu13"
|
|
27
|
+
Requires-Dist: cuda-toolkit[cccl,cudart,nvjitlink,nvrtc,nvvm]==13.*; extra == "cu13"
|
|
37
28
|
Dynamic: license-file
|
|
38
29
|
|
|
39
30
|
<div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
|