numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +7 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
  129. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  130. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  134. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  139. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  141. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  143. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  146. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  147. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  148. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  151. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  152. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  153. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  154. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  155. numba_cuda/numba/cuda/tests/support.py +55 -15
  156. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  157. numba_cuda/numba/cuda/types.py +56 -0
  158. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  159. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  160. numba_cuda/numba/cuda/typing/context.py +751 -0
  161. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  162. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  163. numba_cuda/numba/cuda/typing/templates.py +7 -6
  164. numba_cuda/numba/cuda/ufuncs.py +3 -3
  165. numba_cuda/numba/cuda/utils.py +6 -112
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
  167. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
  168. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
  172. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,553 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import collections
5
+ import numpy as np
6
+ import re
7
+
8
+ from numba.core import types, errors
9
+ from numba.cuda.typing.templates import signature
10
+ from numba.cuda.np import npdatetime_helpers
11
+
12
+ numpy_version = tuple(map(int, np.__version__.split(".")[:2]))
13
+
14
+
15
+ FROM_DTYPE = {
16
+ np.dtype("bool"): types.boolean,
17
+ np.dtype("int8"): types.int8,
18
+ np.dtype("int16"): types.int16,
19
+ np.dtype("int32"): types.int32,
20
+ np.dtype("int64"): types.int64,
21
+ np.dtype("uint8"): types.uint8,
22
+ np.dtype("uint16"): types.uint16,
23
+ np.dtype("uint32"): types.uint32,
24
+ np.dtype("uint64"): types.uint64,
25
+ np.dtype("float32"): types.float32,
26
+ np.dtype("float64"): types.float64,
27
+ np.dtype("float16"): types.float16,
28
+ np.dtype("complex64"): types.complex64,
29
+ np.dtype("complex128"): types.complex128,
30
+ np.dtype(object): types.pyobject,
31
+ }
32
+
33
+
34
+ re_typestr = re.compile(r"[<>=\|]([a-z])(\d+)?$", re.I)
35
+ re_datetimestr = re.compile(r"[<>=\|]([mM])8?(\[([a-z]+)\])?$", re.I)
36
+
37
+ sizeof_unicode_char = np.dtype("U1").itemsize
38
+
39
+
40
+ def _from_str_dtype(dtype):
41
+ m = re_typestr.match(dtype.str)
42
+ if not m:
43
+ raise errors.NumbaNotImplementedError(dtype)
44
+ groups = m.groups()
45
+ typecode = groups[0]
46
+ if typecode == "U":
47
+ # unicode
48
+ if dtype.byteorder not in "=|":
49
+ raise errors.NumbaNotImplementedError(
50
+ "Does not support non-native byteorder"
51
+ )
52
+ count = dtype.itemsize // sizeof_unicode_char
53
+ assert count == int(groups[1]), "Unicode char size mismatch"
54
+ return types.UnicodeCharSeq(count)
55
+
56
+ elif typecode == "S":
57
+ # char
58
+ count = dtype.itemsize
59
+ assert count == int(groups[1]), "Char size mismatch"
60
+ return types.CharSeq(count)
61
+
62
+ else:
63
+ raise errors.NumbaNotImplementedError(dtype)
64
+
65
+
66
+ def _from_datetime_dtype(dtype):
67
+ m = re_datetimestr.match(dtype.str)
68
+ if not m:
69
+ raise errors.NumbaNotImplementedError(dtype)
70
+ groups = m.groups()
71
+ typecode = groups[0]
72
+ unit = groups[2] or ""
73
+ if typecode == "m":
74
+ return types.NPTimedelta(unit)
75
+ elif typecode == "M":
76
+ return types.NPDatetime(unit)
77
+ else:
78
+ raise errors.NumbaNotImplementedError(dtype)
79
+
80
+
81
+ def from_dtype(dtype):
82
+ """
83
+ Return a Numba Type instance corresponding to the given Numpy *dtype*.
84
+ NumbaNotImplementedError is raised on unsupported Numpy dtypes.
85
+ """
86
+ if type(dtype) is type and issubclass(dtype, np.generic):
87
+ dtype = np.dtype(dtype)
88
+ elif getattr(dtype, "fields", None) is not None:
89
+ return from_struct_dtype(dtype)
90
+
91
+ try:
92
+ return FROM_DTYPE[dtype]
93
+ except KeyError:
94
+ pass
95
+
96
+ try:
97
+ char = dtype.char
98
+ except AttributeError:
99
+ pass
100
+ else:
101
+ if char in "SU":
102
+ return _from_str_dtype(dtype)
103
+ if char in "mM":
104
+ return _from_datetime_dtype(dtype)
105
+ if char in "V" and dtype.subdtype is not None:
106
+ subtype = from_dtype(dtype.subdtype[0])
107
+ return types.NestedArray(subtype, dtype.shape)
108
+
109
+ raise errors.NumbaNotImplementedError(dtype)
110
+
111
+
112
+ _as_dtype_letters = {
113
+ types.NPDatetime: "M8",
114
+ types.NPTimedelta: "m8",
115
+ types.CharSeq: "S",
116
+ types.UnicodeCharSeq: "U",
117
+ }
118
+
119
+
120
+ def as_struct_dtype(rec):
121
+ """Convert Numba Record type to NumPy structured dtype"""
122
+ assert isinstance(rec, types.Record)
123
+ names = []
124
+ formats = []
125
+ offsets = []
126
+ titles = []
127
+ # Fill the fields if they are not a title.
128
+ for k, t in rec.members:
129
+ if not rec.is_title(k):
130
+ names.append(k)
131
+ formats.append(as_dtype(t))
132
+ offsets.append(rec.offset(k))
133
+ titles.append(rec.fields[k].title)
134
+
135
+ fields = {
136
+ "names": names,
137
+ "formats": formats,
138
+ "offsets": offsets,
139
+ "itemsize": rec.size,
140
+ "titles": titles,
141
+ }
142
+ _check_struct_alignment(rec, fields)
143
+ return np.dtype(fields, align=rec.aligned)
144
+
145
+
146
+ def _check_struct_alignment(rec, fields):
147
+ """Check alignment compatibility with Numpy"""
148
+ if rec.aligned:
149
+ for k, dt in zip(fields["names"], fields["formats"]):
150
+ llvm_align = rec.alignof(k)
151
+ npy_align = dt.alignment
152
+ if llvm_align is not None and npy_align != llvm_align:
153
+ msg = (
154
+ "NumPy is using a different alignment ({}) "
155
+ "than Numba/LLVM ({}) for {}. "
156
+ "This is likely a NumPy bug."
157
+ )
158
+ raise ValueError(msg.format(npy_align, llvm_align, dt))
159
+
160
+
161
+ def as_dtype(nbtype):
162
+ """
163
+ Return a numpy dtype instance corresponding to the given Numba type.
164
+ NotImplementedError is if no correspondence is known.
165
+ """
166
+ nbtype = types.unliteral(nbtype)
167
+ if isinstance(nbtype, (types.Complex, types.Integer, types.Float)):
168
+ return np.dtype(str(nbtype))
169
+ if isinstance(nbtype, (types.Boolean)):
170
+ return np.dtype("?")
171
+ if isinstance(nbtype, (types.NPDatetime, types.NPTimedelta)):
172
+ letter = _as_dtype_letters[type(nbtype)]
173
+ if nbtype.unit:
174
+ return np.dtype("%s[%s]" % (letter, nbtype.unit))
175
+ else:
176
+ return np.dtype(letter)
177
+ if isinstance(nbtype, (types.CharSeq, types.UnicodeCharSeq)):
178
+ letter = _as_dtype_letters[type(nbtype)]
179
+ return np.dtype("%s%d" % (letter, nbtype.count))
180
+ if isinstance(nbtype, types.Record):
181
+ return as_struct_dtype(nbtype)
182
+ if isinstance(nbtype, types.EnumMember):
183
+ return as_dtype(nbtype.dtype)
184
+ if isinstance(nbtype, types.npytypes.DType):
185
+ return as_dtype(nbtype.dtype)
186
+ if isinstance(nbtype, types.NumberClass):
187
+ return as_dtype(nbtype.dtype)
188
+ if isinstance(nbtype, types.NestedArray):
189
+ spec = (as_dtype(nbtype.dtype), tuple(nbtype.shape))
190
+ return np.dtype(spec)
191
+ if isinstance(nbtype, types.PyObject):
192
+ return np.dtype(object)
193
+
194
+ msg = f"{nbtype} cannot be represented as a NumPy dtype"
195
+ raise errors.NumbaNotImplementedError(msg)
196
+
197
+
198
+ def select_array_wrapper(inputs):
199
+ """
200
+ Given the array-compatible input types to an operation (e.g. ufunc),
201
+ select the appropriate input for wrapping the operation output,
202
+ according to each input's __array_priority__.
203
+
204
+ An index into *inputs* is returned.
205
+ """
206
+ max_prio = float("-inf")
207
+ selected_index = None
208
+ for index, ty in enumerate(inputs):
209
+ # Ties are broken by choosing the first winner, as in Numpy
210
+ if (
211
+ isinstance(ty, types.ArrayCompatible)
212
+ and ty.array_priority > max_prio
213
+ ):
214
+ selected_index = index
215
+ max_prio = ty.array_priority
216
+
217
+ assert selected_index is not None
218
+ return selected_index
219
+
220
+
221
+ def resolve_output_type(context, inputs, formal_output):
222
+ """
223
+ Given the array-compatible input types to an operation (e.g. ufunc),
224
+ and the operation's formal output type (a types.Array instance),
225
+ resolve the actual output type using the typing *context*.
226
+
227
+ This uses a mechanism compatible with Numpy's __array_priority__ /
228
+ __array_wrap__.
229
+ """
230
+ selected_input = inputs[select_array_wrapper(inputs)]
231
+ args = selected_input, formal_output
232
+ sig = context.resolve_function_type("__array_wrap__", args, {})
233
+ if sig is None:
234
+ if selected_input.array_priority == types.Array.array_priority:
235
+ # If it's the same priority as a regular array, assume we
236
+ # should return the output unchanged.
237
+ # (we can't define __array_wrap__ explicitly for types.Buffer,
238
+ # as that would be inherited by most array-compatible objects)
239
+ return formal_output
240
+ raise errors.TypingError("__array_wrap__ failed for %s" % (args,))
241
+ return sig.return_type
242
+
243
+
244
+ def supported_ufunc_loop(ufunc, loop):
245
+ """Return whether the *loop* for the *ufunc* is supported -in nopython-.
246
+
247
+ *loop* should be a UFuncLoopSpec instance, and *ufunc* a numpy ufunc.
248
+
249
+ For ufuncs implemented using the ufunc_db, it is supported if the ufunc_db
250
+ contains a lowering definition for 'loop' in the 'ufunc' entry.
251
+
252
+ For other ufuncs, it is type based. The loop will be considered valid if it
253
+ only contains the following letter types: '?bBhHiIlLqQfd'. Note this is
254
+ legacy and when implementing new ufuncs the ufunc_db should be preferred,
255
+ as it allows for a more fine-grained incremental support.
256
+ """
257
+ # NOTE: Assuming ufunc for the CPUContext
258
+ from numba.np import ufunc_db
259
+
260
+ loop_sig = loop.ufunc_sig
261
+ try:
262
+ # check if the loop has a codegen description in the
263
+ # ufunc_db. If so, we can proceed.
264
+
265
+ # note that as of now not all ufuncs have an entry in the
266
+ # ufunc_db
267
+ supported_loop = loop_sig in ufunc_db.get_ufunc_info(ufunc)
268
+ except KeyError:
269
+ # for ufuncs not in ufunc_db, base the decision of whether the
270
+ # loop is supported on its types
271
+ loop_types = [x.char for x in loop.numpy_inputs + loop.numpy_outputs]
272
+ supported_types = "?bBhHiIlLqQfd"
273
+ # check if all the types involved in the ufunc loop are
274
+ # supported in this mode
275
+ supported_loop = all(t in supported_types for t in loop_types)
276
+
277
+ return supported_loop
278
+
279
+
280
+ class UFuncLoopSpec(
281
+ collections.namedtuple("_UFuncLoopSpec", ("inputs", "outputs", "ufunc_sig"))
282
+ ):
283
+ """
284
+ An object describing a ufunc loop's inner types. Properties:
285
+ - inputs: the inputs' Numba types
286
+ - outputs: the outputs' Numba types
287
+ - ufunc_sig: the string representing the ufunc's type signature, in
288
+ Numpy format (e.g. "ii->i")
289
+ """
290
+
291
+ __slots__ = ()
292
+
293
+ @property
294
+ def numpy_inputs(self):
295
+ return [as_dtype(x) for x in self.inputs]
296
+
297
+ @property
298
+ def numpy_outputs(self):
299
+ return [as_dtype(x) for x in self.outputs]
300
+
301
+
302
+ def _ufunc_loop_sig(out_tys, in_tys):
303
+ if len(out_tys) == 1:
304
+ return signature(out_tys[0], *in_tys)
305
+ else:
306
+ return signature(types.Tuple(out_tys), *in_tys)
307
+
308
+
309
+ def ufunc_can_cast(from_, to, has_mixed_inputs, casting="safe"):
310
+ """
311
+ A variant of np.can_cast() that can allow casting any integer to
312
+ any real or complex type, in case the operation has mixed-kind
313
+ inputs.
314
+
315
+ For example we want `np.power(float32, int32)` to be computed using
316
+ SP arithmetic and return `float32`.
317
+ However, `np.sqrt(int32)` should use DP arithmetic and return `float64`.
318
+ """
319
+ from_ = np.dtype(from_)
320
+ to = np.dtype(to)
321
+ if has_mixed_inputs and from_.kind in "iu" and to.kind in "cf":
322
+ # Decide that all integers can cast to any real or complex type.
323
+ return True
324
+ return np.can_cast(from_, to, casting)
325
+
326
+
327
+ def ufunc_find_matching_loop(ufunc, arg_types):
328
+ """Find the appropriate loop to be used for a ufunc based on the types
329
+ of the operands
330
+
331
+ ufunc - The ufunc we want to check
332
+ arg_types - The tuple of arguments to the ufunc, including any
333
+ explicit output(s).
334
+ return value - A UFuncLoopSpec identifying the loop, or None
335
+ if no matching loop is found.
336
+ """
337
+
338
+ # Separate logical input from explicit output arguments
339
+ input_types = arg_types[: ufunc.nin]
340
+ output_types = arg_types[ufunc.nin :]
341
+ assert len(input_types) == ufunc.nin
342
+
343
+ try:
344
+ np_input_types = [as_dtype(x) for x in input_types]
345
+ except errors.NumbaNotImplementedError:
346
+ return None
347
+ try:
348
+ np_output_types = [as_dtype(x) for x in output_types]
349
+ except errors.NumbaNotImplementedError:
350
+ return None
351
+
352
+ # Whether the inputs are mixed integer / floating-point
353
+ has_mixed_inputs = any(dt.kind in "iu" for dt in np_input_types) and any(
354
+ dt.kind in "cf" for dt in np_input_types
355
+ )
356
+
357
+ def choose_types(numba_types, ufunc_letters):
358
+ """
359
+ Return a list of Numba types representing *ufunc_letters*,
360
+ except when the letter designates a datetime64 or timedelta64,
361
+ in which case the type is taken from *numba_types*.
362
+ """
363
+ assert len(ufunc_letters) >= len(numba_types)
364
+ types = [
365
+ tp if letter in "mM" else from_dtype(np.dtype(letter))
366
+ for tp, letter in zip(numba_types, ufunc_letters)
367
+ ]
368
+ # Add missing types (presumably implicit outputs)
369
+ types += [
370
+ from_dtype(np.dtype(letter))
371
+ for letter in ufunc_letters[len(numba_types) :]
372
+ ]
373
+ return types
374
+
375
+ def set_output_dt_units(inputs, outputs, ufunc_inputs, ufunc_name):
376
+ """
377
+ Sets the output unit of a datetime type based on the input units
378
+
379
+ Timedelta is a special dtype that requires the time unit to be
380
+ specified (day, month, etc). Not every operation with timedelta inputs
381
+ leads to an output of timedelta output. However, for those that do,
382
+ the unit of output must be inferred based on the units of the inputs.
383
+
384
+ At the moment this function takes care of two cases:
385
+ a) where all inputs are timedelta with the same unit (mm), and
386
+ therefore the output has the same unit.
387
+ This case is used for arr.sum, and for arr1+arr2 where all arrays
388
+ are timedeltas.
389
+ If in the future this needs to be extended to a case with mixed units,
390
+ the rules should be implemented in `npdatetime_helpers` and called
391
+ from this function to set the correct output unit.
392
+ b) where left operand is a timedelta, i.e. the "m?" case. This case
393
+ is used for division, eg timedelta / int.
394
+
395
+ At the time of writing, Numba does not support addition of timedelta
396
+ and other types, so this function does not consider the case "?m",
397
+ i.e. where timedelta is the right operand to a non-timedelta left
398
+ operand. To extend it in the future, just add another elif clause.
399
+ """
400
+
401
+ def make_specific(outputs, unit):
402
+ new_outputs = []
403
+ for out in outputs:
404
+ if isinstance(out, types.NPTimedelta) and out.unit == "":
405
+ new_outputs.append(types.NPTimedelta(unit))
406
+ else:
407
+ new_outputs.append(out)
408
+ return new_outputs
409
+
410
+ def make_datetime_specific(outputs, dt_unit, td_unit):
411
+ new_outputs = []
412
+ for out in outputs:
413
+ if isinstance(out, types.NPDatetime) and out.unit == "":
414
+ unit = npdatetime_helpers.combine_datetime_timedelta_units(
415
+ dt_unit, td_unit
416
+ )
417
+ if unit is None:
418
+ raise errors.TypingError(
419
+ f"ufunc '{ufunc_name}' is not "
420
+ + "supported between "
421
+ + f"datetime64[{dt_unit}] "
422
+ + f"and timedelta64[{td_unit}]"
423
+ )
424
+ new_outputs.append(types.NPDatetime(unit))
425
+ else:
426
+ new_outputs.append(out)
427
+ return new_outputs
428
+
429
+ if ufunc_inputs == "mm":
430
+ if all(inp.unit == inputs[0].unit for inp in inputs):
431
+ # Case with operation on same units. Operations on different
432
+ # units not adjusted for now but might need to be
433
+ # added in the future
434
+ unit = inputs[0].unit
435
+ new_outputs = make_specific(outputs, unit)
436
+ else:
437
+ return outputs
438
+ return new_outputs
439
+ elif ufunc_inputs == "mM":
440
+ # case where the left operand has timedelta type
441
+ # and the right operand has datetime
442
+ td_unit = inputs[0].unit
443
+ dt_unit = inputs[1].unit
444
+ return make_datetime_specific(outputs, dt_unit, td_unit)
445
+
446
+ elif ufunc_inputs == "Mm":
447
+ # case where the right operand has timedelta type
448
+ # and the left operand has datetime
449
+ dt_unit = inputs[0].unit
450
+ td_unit = inputs[1].unit
451
+ return make_datetime_specific(outputs, dt_unit, td_unit)
452
+
453
+ elif ufunc_inputs[0] == "m":
454
+ # case where the left operand has timedelta type
455
+ unit = inputs[0].unit
456
+ new_outputs = make_specific(outputs, unit)
457
+ return new_outputs
458
+
459
+ # In NumPy, the loops are evaluated from first to last. The first one
460
+ # that is viable is the one used. One loop is viable if it is possible
461
+ # to cast every input operand to the one expected by the ufunc.
462
+ # Also under NumPy 1.10+ the output must be able to be cast back
463
+ # to a close enough type ("same_kind").
464
+
465
+ for candidate in ufunc.types:
466
+ ufunc_inputs = candidate[: ufunc.nin]
467
+ ufunc_outputs = candidate[-ufunc.nout :] if ufunc.nout else []
468
+
469
+ if "e" in ufunc_inputs:
470
+ # Skip float16 arrays since we don't have implementation for them
471
+ continue
472
+ if "O" in ufunc_inputs:
473
+ # Skip object arrays
474
+ continue
475
+ found = True
476
+ # Skip if any input or output argument is mismatching
477
+ for outer, inner in zip(np_input_types, ufunc_inputs):
478
+ # (outer is a dtype instance, inner is a type char)
479
+ if outer.char in "mM" or inner in "mM":
480
+ # For datetime64 and timedelta64, we want to retain
481
+ # precise typing (i.e. the units); therefore we look for
482
+ # an exact match.
483
+ if outer.char != inner:
484
+ found = False
485
+ break
486
+ elif not ufunc_can_cast(
487
+ outer.char, inner, has_mixed_inputs, "safe"
488
+ ):
489
+ found = False
490
+ break
491
+ if found:
492
+ # Can we cast the inner result to the outer result type?
493
+ for outer, inner in zip(np_output_types, ufunc_outputs):
494
+ if outer.char not in "mM" and not ufunc_can_cast(
495
+ inner, outer.char, has_mixed_inputs, "same_kind"
496
+ ):
497
+ found = False
498
+ break
499
+ if found:
500
+ # Found: determine the Numba types for the loop's inputs and
501
+ # outputs.
502
+ try:
503
+ inputs = choose_types(input_types, ufunc_inputs)
504
+ outputs = choose_types(output_types, ufunc_outputs)
505
+ # if the left operand or both are timedeltas, or the first
506
+ # argument is datetime and the second argument is timedelta,
507
+ # then the output units need to be determined.
508
+ if ufunc_inputs[0] == "m" or ufunc_inputs == "Mm":
509
+ outputs = set_output_dt_units(
510
+ inputs, outputs, ufunc_inputs, ufunc.__name__
511
+ )
512
+
513
+ except errors.NumbaNotImplementedError:
514
+ # One of the selected dtypes isn't supported by Numba
515
+ # (e.g. float16), try other candidates
516
+ continue
517
+ else:
518
+ return UFuncLoopSpec(inputs, outputs, candidate)
519
+
520
+ return None
521
+
522
+
523
+ def _is_aligned_struct(struct):
524
+ return struct.isalignedstruct
525
+
526
+
527
+ def from_struct_dtype(dtype):
528
+ """Convert a NumPy structured dtype to Numba Record type"""
529
+ if dtype.hasobject:
530
+ msg = "dtypes that contain object are not supported."
531
+ raise errors.NumbaNotImplementedError(msg)
532
+
533
+ fields = []
534
+ for name, info in dtype.fields.items():
535
+ # *info* may have 3 element
536
+ [elemdtype, offset] = info[:2]
537
+ title = info[2] if len(info) == 3 else None
538
+
539
+ ty = from_dtype(elemdtype)
540
+ infos = {
541
+ "type": ty,
542
+ "offset": offset,
543
+ "title": title,
544
+ }
545
+ fields.append((name, infos))
546
+
547
+ # Note: dtype.alignment is not consistent.
548
+ # It is different after passing into a recarray.
549
+ # recarray(N, dtype=mydtype).dtype.alignment != mydtype.alignment
550
+ size = dtype.itemsize
551
+ aligned = _is_aligned_struct(dtype)
552
+
553
+ return types.Record(fields, size, aligned)
@@ -0,0 +1,59 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.core import sigutils
5
+ from numba.np.ufunc import _internal
6
+
7
+ # Utility functions
8
+
9
+
10
+ def _compile_element_wise_function(nb_func, targetoptions, sig):
11
+ # Do compilation
12
+ # Return CompileResult to test
13
+ cres = nb_func.compile(sig, **targetoptions)
14
+ args, return_type = sigutils.normalize_signature(sig)
15
+ return cres, args, return_type
16
+
17
+
18
+ # Class definitions
19
+
20
+
21
+ class _BaseUFuncBuilder(object):
22
+ def add(self, sig=None):
23
+ if hasattr(self, "targetoptions"):
24
+ targetoptions = self.targetoptions
25
+ else:
26
+ targetoptions = self.nb_func.targetoptions
27
+ cres, args, return_type = _compile_element_wise_function(
28
+ self.nb_func, targetoptions, sig
29
+ )
30
+ sig = self._finalize_signature(cres, args, return_type)
31
+ self._sigs.append(sig)
32
+ self._cres[sig] = cres
33
+ return cres
34
+
35
+ def disable_compile(self):
36
+ """
37
+ Disable the compilation of new signatures at call time.
38
+ """
39
+ # Override this for implementations that support lazy compilation
40
+
41
+
42
+ _identities = {
43
+ 0: _internal.PyUFunc_Zero,
44
+ 1: _internal.PyUFunc_One,
45
+ None: _internal.PyUFunc_None,
46
+ "reorderable": _internal.PyUFunc_ReorderableNone,
47
+ }
48
+
49
+
50
+ def parse_identity(identity):
51
+ """
52
+ Parse an identity value and return the corresponding low-level value
53
+ for Numpy.
54
+ """
55
+ try:
56
+ identity = _identities[identity]
57
+ except KeyError:
58
+ raise ValueError("Invalid identity value %r" % (identity,))
59
+ return identity
@@ -3,7 +3,7 @@
3
3
 
4
4
  import itertools
5
5
  from llvmlite import ir
6
- from numba.core import targetconfig
6
+ from numba.cuda.core import targetconfig
7
7
  from numba.cuda import cgutils
8
8
  from .cudadrv import nvvm
9
9
 
@@ -8,7 +8,7 @@ from numba.cuda import cgutils
8
8
  from numba.core.errors import NumbaWarning
9
9
  from numba.core.imputils import Registry
10
10
  from numba.cuda import nvvmutils
11
- from numba.cuda.types import Dim3
11
+ from numba.cuda.types import Dim3, Bfloat16
12
12
  from warnings import warn
13
13
 
14
14
  registry = Registry()
@@ -51,6 +51,17 @@ def real_print_impl(ty, context, builder, val):
51
51
  return "%f", [lld]
52
52
 
53
53
 
54
+ @print_item.register(Bfloat16)
55
+ def bfloat16_print_impl(ty, context, builder, val):
56
+ # Hand rolled bfloat16 -> float32 -> double conversion with zero-ext
57
+ bits32 = builder.zext(val, ir.IntType(32))
58
+ shift = builder.shl(bits32, ir.Constant(ir.IntType(32), 16))
59
+ f32 = builder.bitcast(shift, ir.FloatType())
60
+ # printf("%f") expects a double; promote to f64 to match vararg expectation
61
+ f64 = builder.fpext(f32, ir.DoubleType())
62
+ return "%f", [f64]
63
+
64
+
54
65
  @print_item.register(types.StringLiteral)
55
66
  def const_print_impl(ty, context, builder, sigval):
56
67
  pyval = ty.literal_value
@@ -4,7 +4,6 @@
4
4
  import math
5
5
 
6
6
  from numba import (
7
- config,
8
7
  cuda,
9
8
  float32,
10
9
  float64,
@@ -14,6 +13,7 @@ from numba import (
14
13
  from_dtype,
15
14
  jit,
16
15
  )
16
+ from numba.cuda import config
17
17
 
18
18
  import numpy as np
19
19
 
@@ -12,7 +12,7 @@ import copyreg
12
12
 
13
13
 
14
14
  import pickle
15
- from numba import cloudpickle
15
+ from numba.cuda import cloudpickle
16
16
  from llvmlite import ir
17
17
 
18
18
 
@@ -20,7 +20,7 @@ from .cudadrv import devicearray
20
20
  from .cudadrv.devices import require_context, gpus
21
21
  from .cudadrv.devices import get_context as current_context
22
22
  from .cudadrv.runtime import runtime
23
- from numba.core import config
23
+ from numba.cuda.core import config
24
24
 
25
25
  reduce = Reduce
26
26
 
@@ -20,7 +20,7 @@ from .cudadrv.linkable_code import (
20
20
  LTOIR, # noqa: F401
21
21
  ) # noqa: F401
22
22
  from .kernel import FakeCUDAKernel
23
- from numba.core import config
23
+ from numba.cuda.core import config
24
24
  from numba.cuda.core.sigutils import is_signature
25
25
  from warnings import warn
26
26
  from ..args import In, Out, InOut # noqa: F401