numba-cuda 0.21.1__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_compat.py +47 -0
- numba_cuda/numba/cuda/api.py +4 -1
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +8 -40
- numba_cuda/numba/cuda/cext/_hashtable.cpp +5 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +1 -1
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +56 -119
- numba_cuda/numba/cuda/cext/mviewbuf.c +7 -1
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +4 -5
- numba_cuda/numba/cuda/codegen.py +46 -12
- numba_cuda/numba/cuda/compiler.py +15 -9
- numba_cuda/numba/cuda/core/analysis.py +29 -21
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +1 -1
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +4 -4
- numba_cuda/numba/cuda/core/base.py +12 -11
- numba_cuda/numba/cuda/core/bytecode.py +21 -13
- numba_cuda/numba/cuda/core/byteflow.py +336 -90
- numba_cuda/numba/cuda/core/compiler.py +3 -4
- numba_cuda/numba/cuda/core/compiler_machinery.py +3 -3
- numba_cuda/numba/cuda/core/config.py +5 -7
- numba_cuda/numba/cuda/core/consts.py +1 -1
- numba_cuda/numba/cuda/core/controlflow.py +17 -9
- numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
- numba_cuda/numba/cuda/core/errors.py +4 -912
- numba_cuda/numba/cuda/core/inline_closurecall.py +82 -67
- numba_cuda/numba/cuda/core/interpreter.py +334 -160
- numba_cuda/numba/cuda/core/ir.py +191 -119
- numba_cuda/numba/cuda/core/ir_utils.py +149 -128
- numba_cuda/numba/cuda/core/postproc.py +8 -8
- numba_cuda/numba/cuda/core/pythonapi.py +3 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +6 -3
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +1 -1
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +5 -5
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +3 -3
- numba_cuda/numba/cuda/core/ssa.py +5 -5
- numba_cuda/numba/cuda/core/transforms.py +29 -16
- numba_cuda/numba/cuda/core/typed_passes.py +10 -10
- numba_cuda/numba/cuda/core/typeinfer.py +42 -27
- numba_cuda/numba/cuda/core/untyped_passes.py +82 -65
- numba_cuda/numba/cuda/cpython/unicode.py +2 -2
- numba_cuda/numba/cuda/cpython/unicode_support.py +1 -3
- numba_cuda/numba/cuda/cudadecl.py +0 -13
- numba_cuda/numba/cuda/cudadrv/devicearray.py +10 -9
- numba_cuda/numba/cuda/cudadrv/driver.py +142 -519
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +87 -32
- numba_cuda/numba/cuda/cudaimpl.py +0 -12
- numba_cuda/numba/cuda/debuginfo.py +25 -0
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +4 -7
- numba_cuda/numba/cuda/deviceufunc.py +3 -6
- numba_cuda/numba/cuda/dispatcher.py +39 -49
- numba_cuda/numba/cuda/intrinsics.py +150 -1
- numba_cuda/numba/cuda/libdeviceimpl.py +1 -2
- numba_cuda/numba/cuda/lowering.py +36 -29
- numba_cuda/numba/cuda/memory_management/nrt.py +10 -14
- numba_cuda/numba/cuda/np/arrayobj.py +61 -9
- numba_cuda/numba/cuda/np/numpy_support.py +32 -9
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +4 -3
- numba_cuda/numba/cuda/printimpl.py +20 -0
- numba_cuda/numba/cuda/serialize.py +10 -0
- numba_cuda/numba/cuda/stubs.py +0 -11
- numba_cuda/numba/cuda/testing.py +4 -8
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +21 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +195 -51
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +6 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +3 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +6 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +11 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +53 -23
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +61 -9
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +6 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +22 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +13 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py +243 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_numba_interop.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +51 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +37 -35
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +117 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_globals.py +111 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +61 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +31 -0
- numba_cuda/numba/cuda/tests/support.py +11 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +1 -1
- numba_cuda/numba/cuda/typing/asnumbatype.py +37 -2
- numba_cuda/numba/cuda/typing/context.py +3 -1
- numba_cuda/numba/cuda/typing/typeof.py +51 -2
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/METADATA +4 -13
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/RECORD +106 -105
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +0 -159
- numba_cuda/numba/cuda/cext/_devicearray.h +0 -29
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -41
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/top_level.txt +0 -0
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
// SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
-
|
|
4
|
-
/* This file contains the base class implementation for all device arrays. The
|
|
5
|
-
* base class is implemented in C so that computing typecodes for device arrays
|
|
6
|
-
* can be implemented efficiently. */
|
|
7
|
-
|
|
8
|
-
#include "_pymodule.h"
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
/* Include _devicearray., but make sure we don't get the definitions intended
|
|
12
|
-
* for consumers of the Device Array API.
|
|
13
|
-
*/
|
|
14
|
-
#define NUMBA_IN_DEVICEARRAY_CPP_
|
|
15
|
-
#include "_devicearray.h"
|
|
16
|
-
|
|
17
|
-
/* DeviceArray PyObject implementation. Note that adding more members here is
|
|
18
|
-
* presently prohibited because mapped and managed arrays derive from both
|
|
19
|
-
* DeviceArray and NumPy's ndarray, which is also a C extension class - the
|
|
20
|
-
* layout of the object cannot be resolved if this class also has members beyond
|
|
21
|
-
* PyObject_HEAD. */
|
|
22
|
-
class DeviceArray {
|
|
23
|
-
PyObject_HEAD
|
|
24
|
-
};
|
|
25
|
-
|
|
26
|
-
/* Trivial traversal - DeviceArray instances own nothing. */
|
|
27
|
-
static int
|
|
28
|
-
DeviceArray_traverse(DeviceArray *self, visitproc visit, void *arg)
|
|
29
|
-
{
|
|
30
|
-
return 0;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
/* Trivial clear of all references - DeviceArray instances own nothing. */
|
|
34
|
-
static int
|
|
35
|
-
DeviceArray_clear(DeviceArray *self)
|
|
36
|
-
{
|
|
37
|
-
return 0;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/* The _devicearray.DeviceArray type */
|
|
41
|
-
PyTypeObject DeviceArrayType = {
|
|
42
|
-
PyVarObject_HEAD_INIT(NULL, 0)
|
|
43
|
-
"_devicearray.DeviceArray", /* tp_name */
|
|
44
|
-
sizeof(DeviceArray), /* tp_basicsize */
|
|
45
|
-
0, /* tp_itemsize */
|
|
46
|
-
0, /* tp_dealloc */
|
|
47
|
-
0, /* tp_vectorcall_offset */
|
|
48
|
-
0, /* tp_getattr */
|
|
49
|
-
0, /* tp_setattr */
|
|
50
|
-
0, /* tp_as_async */
|
|
51
|
-
0, /* tp_repr */
|
|
52
|
-
0, /* tp_as_number */
|
|
53
|
-
0, /* tp_as_sequence */
|
|
54
|
-
0, /* tp_as_mapping */
|
|
55
|
-
0, /* tp_hash */
|
|
56
|
-
0, /* tp_call*/
|
|
57
|
-
0, /* tp_str*/
|
|
58
|
-
0, /* tp_getattro*/
|
|
59
|
-
0, /* tp_setattro*/
|
|
60
|
-
0, /* tp_as_buffer*/
|
|
61
|
-
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
|
|
62
|
-
/* tp_flags*/
|
|
63
|
-
"DeviceArray object", /* tp_doc */
|
|
64
|
-
(traverseproc) DeviceArray_traverse, /* tp_traverse */
|
|
65
|
-
(inquiry) DeviceArray_clear, /* tp_clear */
|
|
66
|
-
0, /* tp_richcompare */
|
|
67
|
-
0, /* tp_weaklistoffset */
|
|
68
|
-
0, /* tp_iter */
|
|
69
|
-
0, /* tp_iternext */
|
|
70
|
-
0, /* tp_methods */
|
|
71
|
-
0, /* tp_members */
|
|
72
|
-
0, /* tp_getset */
|
|
73
|
-
0, /* tp_base */
|
|
74
|
-
0, /* tp_dict */
|
|
75
|
-
0, /* tp_descr_get */
|
|
76
|
-
0, /* tp_descr_set */
|
|
77
|
-
0, /* tp_dictoffset */
|
|
78
|
-
0, /* tp_init */
|
|
79
|
-
0, /* tp_alloc */
|
|
80
|
-
0, /* tp_new */
|
|
81
|
-
0, /* tp_free */
|
|
82
|
-
0, /* tp_is_gc */
|
|
83
|
-
0, /* tp_bases */
|
|
84
|
-
0, /* tp_mro */
|
|
85
|
-
0, /* tp_cache */
|
|
86
|
-
0, /* tp_subclasses */
|
|
87
|
-
0, /* tp_weaklist */
|
|
88
|
-
0, /* tp_del */
|
|
89
|
-
0, /* tp_version_tag */
|
|
90
|
-
0, /* tp_finalize */
|
|
91
|
-
0, /* tp_vectorcall */
|
|
92
|
-
#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 12)
|
|
93
|
-
/* This was introduced first in 3.12
|
|
94
|
-
* https://github.com/python/cpython/issues/91051
|
|
95
|
-
*/
|
|
96
|
-
0, /* tp_watched */
|
|
97
|
-
#endif
|
|
98
|
-
|
|
99
|
-
/* WARNING: Do not remove this, only modify it! It is a version guard to
|
|
100
|
-
* act as a reminder to update this struct on Python version update! */
|
|
101
|
-
#if (PY_MAJOR_VERSION == 3)
|
|
102
|
-
#if ! (NB_SUPPORTED_PYTHON_MINOR)
|
|
103
|
-
#error "Python minor version is not supported."
|
|
104
|
-
#endif
|
|
105
|
-
#else
|
|
106
|
-
#error "Python major version is not supported."
|
|
107
|
-
#endif
|
|
108
|
-
/* END WARNING*/
|
|
109
|
-
};
|
|
110
|
-
|
|
111
|
-
/* CUDA device array C API */
|
|
112
|
-
static void *_DeviceArray_API[1] = {
|
|
113
|
-
(void*)&DeviceArrayType
|
|
114
|
-
};
|
|
115
|
-
|
|
116
|
-
MOD_INIT(_devicearray) {
|
|
117
|
-
PyObject *m = nullptr;
|
|
118
|
-
PyObject *d = nullptr;
|
|
119
|
-
PyObject *c_api = nullptr;
|
|
120
|
-
int error = 0;
|
|
121
|
-
|
|
122
|
-
MOD_DEF(m, "_devicearray", "No docs", NULL)
|
|
123
|
-
if (m == NULL)
|
|
124
|
-
goto error_occurred;
|
|
125
|
-
|
|
126
|
-
c_api = PyCapsule_New((void *)_DeviceArray_API, NUMBA_DEVICEARRAY_IMPORT_NAME "._DEVICEARRAY_API", NULL);
|
|
127
|
-
if (c_api == NULL)
|
|
128
|
-
goto error_occurred;
|
|
129
|
-
|
|
130
|
-
DeviceArrayType.tp_new = PyType_GenericNew;
|
|
131
|
-
if (PyType_Ready(&DeviceArrayType) < 0)
|
|
132
|
-
goto error_occurred;
|
|
133
|
-
|
|
134
|
-
Py_INCREF(&DeviceArrayType);
|
|
135
|
-
error = PyModule_AddObject(m, "DeviceArray", (PyObject*)(&DeviceArrayType));
|
|
136
|
-
if (error)
|
|
137
|
-
goto error_occurred;
|
|
138
|
-
|
|
139
|
-
d = PyModule_GetDict(m);
|
|
140
|
-
if (d == NULL)
|
|
141
|
-
goto error_occurred;
|
|
142
|
-
|
|
143
|
-
error = PyDict_SetItemString(d, "_DEVICEARRAY_API", c_api);
|
|
144
|
-
/* Decref and set c_api to NULL, Py_XDECREF in error_occurred will have no
|
|
145
|
-
* effect. */
|
|
146
|
-
Py_CLEAR(c_api);
|
|
147
|
-
|
|
148
|
-
if (error)
|
|
149
|
-
goto error_occurred;
|
|
150
|
-
|
|
151
|
-
return MOD_SUCCESS_VAL(m);
|
|
152
|
-
|
|
153
|
-
error_occurred:
|
|
154
|
-
Py_XDECREF(m);
|
|
155
|
-
Py_XDECREF(c_api);
|
|
156
|
-
Py_XDECREF((PyObject*)&DeviceArrayType);
|
|
157
|
-
|
|
158
|
-
return MOD_ERROR_VAL;
|
|
159
|
-
}
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
// SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
-
|
|
4
|
-
#ifndef NUMBA_DEVICEARRAY_H_
|
|
5
|
-
#define NUMBA_DEVICEARRAY_H_
|
|
6
|
-
|
|
7
|
-
#ifdef __cplusplus
|
|
8
|
-
extern "C" {
|
|
9
|
-
#endif
|
|
10
|
-
|
|
11
|
-
#define NUMBA_DEVICEARRAY_IMPORT_NAME "numba.cuda.cext._devicearray"
|
|
12
|
-
/* These definitions should only be used by consumers of the Device Array API.
|
|
13
|
-
* Consumers access the API through the opaque pointer stored in
|
|
14
|
-
* _devicearray._DEVICEARRAY_API. We don't want these definitions in
|
|
15
|
-
* _devicearray.cpp itself because they would conflict with the actual
|
|
16
|
-
* implementations there.
|
|
17
|
-
*/
|
|
18
|
-
#ifndef NUMBA_IN_DEVICEARRAY_CPP_
|
|
19
|
-
|
|
20
|
-
extern void **DeviceArray_API;
|
|
21
|
-
#define DeviceArrayType (*(PyTypeObject*)DeviceArray_API[0])
|
|
22
|
-
|
|
23
|
-
#endif /* ndef NUMBA_IN_DEVICEARRAY_CPP */
|
|
24
|
-
|
|
25
|
-
#ifdef __cplusplus
|
|
26
|
-
}
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#endif /* NUMBA_DEVICEARRAY_H_ */
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
-
|
|
4
|
-
from .decorators import jit
|
|
5
|
-
import numba
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@jit(device=True)
|
|
9
|
-
def all_sync(mask, predicate):
|
|
10
|
-
"""
|
|
11
|
-
If for all threads in the masked warp the predicate is true, then
|
|
12
|
-
a non-zero value is returned, otherwise 0 is returned.
|
|
13
|
-
"""
|
|
14
|
-
return numba.cuda.vote_sync_intrinsic(mask, 0, predicate)[1]
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@jit(device=True)
|
|
18
|
-
def any_sync(mask, predicate):
|
|
19
|
-
"""
|
|
20
|
-
If for any thread in the masked warp the predicate is true, then
|
|
21
|
-
a non-zero value is returned, otherwise 0 is returned.
|
|
22
|
-
"""
|
|
23
|
-
return numba.cuda.vote_sync_intrinsic(mask, 1, predicate)[1]
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@jit(device=True)
|
|
27
|
-
def eq_sync(mask, predicate):
|
|
28
|
-
"""
|
|
29
|
-
If for all threads in the masked warp the boolean predicate is the same,
|
|
30
|
-
then a non-zero value is returned, otherwise 0 is returned.
|
|
31
|
-
"""
|
|
32
|
-
return numba.cuda.vote_sync_intrinsic(mask, 2, predicate)[1]
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
@jit(device=True)
|
|
36
|
-
def ballot_sync(mask, predicate):
|
|
37
|
-
"""
|
|
38
|
-
Returns a mask of all threads in the warp whose predicate is true,
|
|
39
|
-
and are within the given mask.
|
|
40
|
-
"""
|
|
41
|
-
return numba.cuda.vote_sync_intrinsic(mask, 3, predicate)[0]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|