numba-cuda 0.21.1__cp313-cp313-win_amd64.whl → 0.23.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/api.py +4 -1
  3. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  4. numba_cuda/numba/cuda/cext/_dispatcher.cpp +0 -38
  5. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  6. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  7. numba_cuda/numba/cuda/cext/_typeof.cpp +0 -111
  8. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  9. numba_cuda/numba/cuda/codegen.py +42 -10
  10. numba_cuda/numba/cuda/compiler.py +10 -4
  11. numba_cuda/numba/cuda/core/analysis.py +29 -21
  12. numba_cuda/numba/cuda/core/annotations/type_annotations.py +4 -4
  13. numba_cuda/numba/cuda/core/base.py +6 -1
  14. numba_cuda/numba/cuda/core/consts.py +1 -1
  15. numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
  16. numba_cuda/numba/cuda/core/errors.py +4 -912
  17. numba_cuda/numba/cuda/core/inline_closurecall.py +71 -57
  18. numba_cuda/numba/cuda/core/interpreter.py +79 -64
  19. numba_cuda/numba/cuda/core/ir.py +191 -119
  20. numba_cuda/numba/cuda/core/ir_utils.py +142 -112
  21. numba_cuda/numba/cuda/core/postproc.py +8 -8
  22. numba_cuda/numba/cuda/core/rewrites/ir_print.py +6 -3
  23. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +5 -5
  24. numba_cuda/numba/cuda/core/rewrites/static_raise.py +3 -3
  25. numba_cuda/numba/cuda/core/ssa.py +3 -3
  26. numba_cuda/numba/cuda/core/transforms.py +25 -10
  27. numba_cuda/numba/cuda/core/typed_passes.py +9 -9
  28. numba_cuda/numba/cuda/core/typeinfer.py +39 -24
  29. numba_cuda/numba/cuda/core/untyped_passes.py +71 -55
  30. numba_cuda/numba/cuda/cudadecl.py +0 -13
  31. numba_cuda/numba/cuda/cudadrv/devicearray.py +6 -5
  32. numba_cuda/numba/cuda/cudadrv/driver.py +132 -511
  33. numba_cuda/numba/cuda/cudadrv/dummyarray.py +4 -0
  34. numba_cuda/numba/cuda/cudadrv/nvrtc.py +16 -0
  35. numba_cuda/numba/cuda/cudaimpl.py +0 -12
  36. numba_cuda/numba/cuda/debuginfo.py +104 -10
  37. numba_cuda/numba/cuda/descriptor.py +1 -1
  38. numba_cuda/numba/cuda/device_init.py +4 -7
  39. numba_cuda/numba/cuda/dispatcher.py +36 -32
  40. numba_cuda/numba/cuda/intrinsics.py +150 -1
  41. numba_cuda/numba/cuda/lowering.py +64 -29
  42. numba_cuda/numba/cuda/memory_management/nrt.py +10 -14
  43. numba_cuda/numba/cuda/np/arrayobj.py +54 -0
  44. numba_cuda/numba/cuda/np/numpy_support.py +26 -0
  45. numba_cuda/numba/cuda/printimpl.py +20 -0
  46. numba_cuda/numba/cuda/serialize.py +10 -0
  47. numba_cuda/numba/cuda/stubs.py +0 -11
  48. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +21 -4
  49. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +1 -2
  50. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +130 -48
  51. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +6 -2
  52. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +3 -1
  53. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +5 -6
  54. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +11 -12
  55. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +27 -19
  56. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +47 -0
  57. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +10 -0
  58. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +89 -0
  59. numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py +243 -0
  60. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -3
  61. numba_cuda/numba/cuda/tests/cudapy/test_numba_interop.py +35 -0
  62. numba_cuda/numba/cuda/tests/cudapy/test_print.py +51 -0
  63. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +116 -1
  64. numba_cuda/numba/cuda/tests/doc_examples/test_globals.py +111 -0
  65. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +61 -0
  66. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +31 -0
  67. numba_cuda/numba/cuda/typing/context.py +3 -1
  68. numba_cuda/numba/cuda/typing/typeof.py +56 -0
  69. {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/METADATA +1 -1
  70. {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/RECORD +74 -74
  71. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  72. numba_cuda/numba/cuda/cext/_devicearray.cpp +0 -159
  73. numba_cuda/numba/cuda/cext/_devicearray.h +0 -29
  74. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -41
  75. {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/WHEEL +0 -0
  76. {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/licenses/LICENSE +0 -0
  77. {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/licenses/LICENSE.numba +0 -0
  78. {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/top_level.txt +0 -0
@@ -1,159 +0,0 @@
1
- // SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- // SPDX-License-Identifier: BSD-2-Clause
3
-
4
- /* This file contains the base class implementation for all device arrays. The
5
- * base class is implemented in C so that computing typecodes for device arrays
6
- * can be implemented efficiently. */
7
-
8
- #include "_pymodule.h"
9
-
10
-
11
- /* Include _devicearray., but make sure we don't get the definitions intended
12
- * for consumers of the Device Array API.
13
- */
14
- #define NUMBA_IN_DEVICEARRAY_CPP_
15
- #include "_devicearray.h"
16
-
17
- /* DeviceArray PyObject implementation. Note that adding more members here is
18
- * presently prohibited because mapped and managed arrays derive from both
19
- * DeviceArray and NumPy's ndarray, which is also a C extension class - the
20
- * layout of the object cannot be resolved if this class also has members beyond
21
- * PyObject_HEAD. */
22
- class DeviceArray {
23
- PyObject_HEAD
24
- };
25
-
26
- /* Trivial traversal - DeviceArray instances own nothing. */
27
- static int
28
- DeviceArray_traverse(DeviceArray *self, visitproc visit, void *arg)
29
- {
30
- return 0;
31
- }
32
-
33
- /* Trivial clear of all references - DeviceArray instances own nothing. */
34
- static int
35
- DeviceArray_clear(DeviceArray *self)
36
- {
37
- return 0;
38
- }
39
-
40
- /* The _devicearray.DeviceArray type */
41
- PyTypeObject DeviceArrayType = {
42
- PyVarObject_HEAD_INIT(NULL, 0)
43
- "_devicearray.DeviceArray", /* tp_name */
44
- sizeof(DeviceArray), /* tp_basicsize */
45
- 0, /* tp_itemsize */
46
- 0, /* tp_dealloc */
47
- 0, /* tp_vectorcall_offset */
48
- 0, /* tp_getattr */
49
- 0, /* tp_setattr */
50
- 0, /* tp_as_async */
51
- 0, /* tp_repr */
52
- 0, /* tp_as_number */
53
- 0, /* tp_as_sequence */
54
- 0, /* tp_as_mapping */
55
- 0, /* tp_hash */
56
- 0, /* tp_call*/
57
- 0, /* tp_str*/
58
- 0, /* tp_getattro*/
59
- 0, /* tp_setattro*/
60
- 0, /* tp_as_buffer*/
61
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
62
- /* tp_flags*/
63
- "DeviceArray object", /* tp_doc */
64
- (traverseproc) DeviceArray_traverse, /* tp_traverse */
65
- (inquiry) DeviceArray_clear, /* tp_clear */
66
- 0, /* tp_richcompare */
67
- 0, /* tp_weaklistoffset */
68
- 0, /* tp_iter */
69
- 0, /* tp_iternext */
70
- 0, /* tp_methods */
71
- 0, /* tp_members */
72
- 0, /* tp_getset */
73
- 0, /* tp_base */
74
- 0, /* tp_dict */
75
- 0, /* tp_descr_get */
76
- 0, /* tp_descr_set */
77
- 0, /* tp_dictoffset */
78
- 0, /* tp_init */
79
- 0, /* tp_alloc */
80
- 0, /* tp_new */
81
- 0, /* tp_free */
82
- 0, /* tp_is_gc */
83
- 0, /* tp_bases */
84
- 0, /* tp_mro */
85
- 0, /* tp_cache */
86
- 0, /* tp_subclasses */
87
- 0, /* tp_weaklist */
88
- 0, /* tp_del */
89
- 0, /* tp_version_tag */
90
- 0, /* tp_finalize */
91
- 0, /* tp_vectorcall */
92
- #if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 12)
93
- /* This was introduced first in 3.12
94
- * https://github.com/python/cpython/issues/91051
95
- */
96
- 0, /* tp_watched */
97
- #endif
98
-
99
- /* WARNING: Do not remove this, only modify it! It is a version guard to
100
- * act as a reminder to update this struct on Python version update! */
101
- #if (PY_MAJOR_VERSION == 3)
102
- #if ! (NB_SUPPORTED_PYTHON_MINOR)
103
- #error "Python minor version is not supported."
104
- #endif
105
- #else
106
- #error "Python major version is not supported."
107
- #endif
108
- /* END WARNING*/
109
- };
110
-
111
- /* CUDA device array C API */
112
- static void *_DeviceArray_API[1] = {
113
- (void*)&DeviceArrayType
114
- };
115
-
116
- MOD_INIT(_devicearray) {
117
- PyObject *m = nullptr;
118
- PyObject *d = nullptr;
119
- PyObject *c_api = nullptr;
120
- int error = 0;
121
-
122
- MOD_DEF(m, "_devicearray", "No docs", NULL)
123
- if (m == NULL)
124
- goto error_occurred;
125
-
126
- c_api = PyCapsule_New((void *)_DeviceArray_API, NUMBA_DEVICEARRAY_IMPORT_NAME "._DEVICEARRAY_API", NULL);
127
- if (c_api == NULL)
128
- goto error_occurred;
129
-
130
- DeviceArrayType.tp_new = PyType_GenericNew;
131
- if (PyType_Ready(&DeviceArrayType) < 0)
132
- goto error_occurred;
133
-
134
- Py_INCREF(&DeviceArrayType);
135
- error = PyModule_AddObject(m, "DeviceArray", (PyObject*)(&DeviceArrayType));
136
- if (error)
137
- goto error_occurred;
138
-
139
- d = PyModule_GetDict(m);
140
- if (d == NULL)
141
- goto error_occurred;
142
-
143
- error = PyDict_SetItemString(d, "_DEVICEARRAY_API", c_api);
144
- /* Decref and set c_api to NULL, Py_XDECREF in error_occurred will have no
145
- * effect. */
146
- Py_CLEAR(c_api);
147
-
148
- if (error)
149
- goto error_occurred;
150
-
151
- return MOD_SUCCESS_VAL(m);
152
-
153
- error_occurred:
154
- Py_XDECREF(m);
155
- Py_XDECREF(c_api);
156
- Py_XDECREF((PyObject*)&DeviceArrayType);
157
-
158
- return MOD_ERROR_VAL;
159
- }
@@ -1,29 +0,0 @@
1
- // SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- // SPDX-License-Identifier: BSD-2-Clause
3
-
4
- #ifndef NUMBA_DEVICEARRAY_H_
5
- #define NUMBA_DEVICEARRAY_H_
6
-
7
- #ifdef __cplusplus
8
- extern "C" {
9
- #endif
10
-
11
- #define NUMBA_DEVICEARRAY_IMPORT_NAME "numba.cuda.cext._devicearray"
12
- /* These definitions should only be used by consumers of the Device Array API.
13
- * Consumers access the API through the opaque pointer stored in
14
- * _devicearray._DEVICEARRAY_API. We don't want these definitions in
15
- * _devicearray.cpp itself because they would conflict with the actual
16
- * implementations there.
17
- */
18
- #ifndef NUMBA_IN_DEVICEARRAY_CPP_
19
-
20
- extern void **DeviceArray_API;
21
- #define DeviceArrayType (*(PyTypeObject*)DeviceArray_API[0])
22
-
23
- #endif /* ndef NUMBA_IN_DEVICEARRAY_CPP */
24
-
25
- #ifdef __cplusplus
26
- }
27
- #endif
28
-
29
- #endif /* NUMBA_DEVICEARRAY_H_ */
@@ -1,41 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: BSD-2-Clause
3
-
4
- from .decorators import jit
5
- import numba
6
-
7
-
8
- @jit(device=True)
9
- def all_sync(mask, predicate):
10
- """
11
- If for all threads in the masked warp the predicate is true, then
12
- a non-zero value is returned, otherwise 0 is returned.
13
- """
14
- return numba.cuda.vote_sync_intrinsic(mask, 0, predicate)[1]
15
-
16
-
17
- @jit(device=True)
18
- def any_sync(mask, predicate):
19
- """
20
- If for any thread in the masked warp the predicate is true, then
21
- a non-zero value is returned, otherwise 0 is returned.
22
- """
23
- return numba.cuda.vote_sync_intrinsic(mask, 1, predicate)[1]
24
-
25
-
26
- @jit(device=True)
27
- def eq_sync(mask, predicate):
28
- """
29
- If for all threads in the masked warp the boolean predicate is the same,
30
- then a non-zero value is returned, otherwise 0 is returned.
31
- """
32
- return numba.cuda.vote_sync_intrinsic(mask, 2, predicate)[1]
33
-
34
-
35
- @jit(device=True)
36
- def ballot_sync(mask, predicate):
37
- """
38
- Returns a mask of all threads in the warp whose predicate is true,
39
- and are within the given mask.
40
- """
41
- return numba.cuda.vote_sync_intrinsic(mask, 3, predicate)[0]