numba-cuda 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (171) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +4 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  129. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  130. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  135. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  140. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  142. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  143. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  145. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  146. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  147. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  148. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  151. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  152. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  153. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  154. numba_cuda/numba/cuda/tests/support.py +55 -15
  155. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  156. numba_cuda/numba/cuda/types.py +56 -0
  157. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  158. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  159. numba_cuda/numba/cuda/typing/context.py +751 -0
  160. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  161. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  162. numba_cuda/numba/cuda/typing/templates.py +7 -6
  163. numba_cuda/numba/cuda/ufuncs.py +3 -3
  164. numba_cuda/numba/cuda/utils.py +6 -112
  165. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +2 -1
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/RECORD +170 -115
  167. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  168. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE.numba +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,249 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+ from functools import cached_property
4
+ from numba.core import ir, analysis
5
+ from numba.cuda.core import ir_utils, transforms
6
+
7
+
8
+ class YieldPoint(object):
9
+ def __init__(self, block, inst):
10
+ assert isinstance(block, ir.Block)
11
+ assert isinstance(inst, ir.Yield)
12
+ self.block = block
13
+ self.inst = inst
14
+ self.live_vars = None
15
+ self.weak_live_vars = None
16
+
17
+
18
+ class GeneratorInfo(object):
19
+ def __init__(self):
20
+ # { index: YieldPoint }
21
+ self.yield_points = {}
22
+ # Ordered list of variable names
23
+ self.state_vars = []
24
+
25
+ def get_yield_points(self):
26
+ """
27
+ Return an iterable of YieldPoint instances.
28
+ """
29
+ return self.yield_points.values()
30
+
31
+
32
+ class VariableLifetime(object):
33
+ """
34
+ For lazily building information of variable lifetime
35
+ """
36
+
37
+ def __init__(self, blocks):
38
+ self._blocks = blocks
39
+
40
+ @cached_property
41
+ def cfg(self):
42
+ return analysis.compute_cfg_from_blocks(self._blocks)
43
+
44
+ @cached_property
45
+ def usedefs(self):
46
+ return analysis.compute_use_defs(self._blocks)
47
+
48
+ @cached_property
49
+ def livemap(self):
50
+ return analysis.compute_live_map(
51
+ self.cfg, self._blocks, self.usedefs.usemap, self.usedefs.defmap
52
+ )
53
+
54
+ @cached_property
55
+ def deadmaps(self):
56
+ return analysis.compute_dead_maps(
57
+ self.cfg, self._blocks, self.livemap, self.usedefs.defmap
58
+ )
59
+
60
+
61
+ # other packages that define new nodes add calls for inserting dels
62
+ # format: {type:function}
63
+ ir_extension_insert_dels = {}
64
+
65
+
66
+ class PostProcessor(object):
67
+ """
68
+ A post-processor for Numba IR.
69
+ """
70
+
71
+ def __init__(self, func_ir):
72
+ self.func_ir = func_ir
73
+
74
+ def run(self, emit_dels: bool = False, extend_lifetimes: bool = False):
75
+ """
76
+ Run the following passes over Numba IR:
77
+ - canonicalize the CFG
78
+ - emit explicit `del` instructions for variables
79
+ - compute lifetime of variables
80
+ - compute generator info (if function is a generator function)
81
+ """
82
+ self.func_ir.blocks = transforms.canonicalize_cfg(self.func_ir.blocks)
83
+ vlt = VariableLifetime(self.func_ir.blocks)
84
+ self.func_ir.variable_lifetime = vlt
85
+
86
+ bev = analysis.compute_live_variables(
87
+ vlt.cfg,
88
+ self.func_ir.blocks,
89
+ vlt.usedefs.defmap,
90
+ vlt.deadmaps.combined,
91
+ )
92
+ for offset, ir_block in self.func_ir.blocks.items():
93
+ self.func_ir.block_entry_vars[ir_block] = bev[offset]
94
+
95
+ if self.func_ir.is_generator:
96
+ self.func_ir.generator_info = GeneratorInfo()
97
+ self._compute_generator_info()
98
+ else:
99
+ self.func_ir.generator_info = None
100
+
101
+ # Emit del nodes, do this last as the generator info parsing generates
102
+ # and then strips dels as part of its analysis.
103
+ if emit_dels:
104
+ self._insert_var_dels(extend_lifetimes=extend_lifetimes)
105
+
106
+ def _populate_generator_info(self):
107
+ """
108
+ Fill `index` for the Yield instruction and create YieldPoints.
109
+ """
110
+ dct = self.func_ir.generator_info.yield_points
111
+ assert not dct, "rerunning _populate_generator_info"
112
+ for block in self.func_ir.blocks.values():
113
+ for inst in block.body:
114
+ if isinstance(inst, ir.Assign):
115
+ yieldinst = inst.value
116
+ if isinstance(yieldinst, ir.Yield):
117
+ index = len(dct) + 1
118
+ yieldinst.index = index
119
+ yp = YieldPoint(block, yieldinst)
120
+ dct[yieldinst.index] = yp
121
+
122
+ def _compute_generator_info(self):
123
+ """
124
+ Compute the generator's state variables as the union of live variables
125
+ at all yield points.
126
+ """
127
+ # generate del info, it's used in analysis here, strip it out at the end
128
+ self._insert_var_dels()
129
+ self._populate_generator_info()
130
+ gi = self.func_ir.generator_info
131
+ for yp in gi.get_yield_points():
132
+ live_vars = set(self.func_ir.get_block_entry_vars(yp.block))
133
+ weak_live_vars = set()
134
+ stmts = iter(yp.block.body)
135
+ for stmt in stmts:
136
+ if isinstance(stmt, ir.Assign):
137
+ if stmt.value is yp.inst:
138
+ break
139
+ live_vars.add(stmt.target.name)
140
+ elif isinstance(stmt, ir.Del):
141
+ live_vars.remove(stmt.value)
142
+ else:
143
+ assert 0, "couldn't find yield point"
144
+ # Try to optimize out any live vars that are deleted immediately
145
+ # after the yield point.
146
+ for stmt in stmts:
147
+ if isinstance(stmt, ir.Del):
148
+ name = stmt.value
149
+ if name in live_vars:
150
+ live_vars.remove(name)
151
+ weak_live_vars.add(name)
152
+ else:
153
+ break
154
+ yp.live_vars = live_vars
155
+ yp.weak_live_vars = weak_live_vars
156
+
157
+ st = set()
158
+ for yp in gi.get_yield_points():
159
+ st |= yp.live_vars
160
+ st |= yp.weak_live_vars
161
+ gi.state_vars = sorted(st)
162
+ self.remove_dels()
163
+
164
+ def _insert_var_dels(self, extend_lifetimes=False):
165
+ """
166
+ Insert del statements for each variable.
167
+ Returns a 2-tuple of (variable definition map, variable deletion map)
168
+ which indicates variables defined and deleted in each block.
169
+
170
+ The algorithm avoids relying on explicit knowledge on loops and
171
+ distinguish between variables that are defined locally vs variables that
172
+ come from incoming blocks.
173
+ We start with simple usage (variable reference) and definition (variable
174
+ creation) maps on each block. Propagate the liveness info to predecessor
175
+ blocks until it stabilize, at which point we know which variables must
176
+ exist before entering each block. Then, we compute the end of variable
177
+ lives and insert del statements accordingly. Variables are deleted after
178
+ the last use. Variable referenced by terminators (e.g. conditional
179
+ branch and return) are deleted by the successors or the caller.
180
+ """
181
+ vlt = self.func_ir.variable_lifetime
182
+ self._patch_var_dels(
183
+ vlt.deadmaps.internal,
184
+ vlt.deadmaps.escaping,
185
+ extend_lifetimes=extend_lifetimes,
186
+ )
187
+
188
+ def _patch_var_dels(
189
+ self, internal_dead_map, escaping_dead_map, extend_lifetimes=False
190
+ ):
191
+ """
192
+ Insert delete in each block
193
+ """
194
+ for offset, ir_block in self.func_ir.blocks.items():
195
+ # for each internal var, insert delete after the last use
196
+ internal_dead_set = internal_dead_map[offset].copy()
197
+ delete_pts = []
198
+ # for each statement in reverse order
199
+ for stmt in reversed(ir_block.body[:-1]):
200
+ # internal vars that are used here
201
+ live_set = set(v.name for v in stmt.list_vars())
202
+ dead_set = live_set & internal_dead_set
203
+ for T, def_func in ir_extension_insert_dels.items():
204
+ if isinstance(stmt, T):
205
+ done_dels = def_func(stmt, dead_set)
206
+ dead_set -= done_dels
207
+ internal_dead_set -= done_dels
208
+ # used here but not afterwards
209
+ delete_pts.append((stmt, dead_set))
210
+ internal_dead_set -= dead_set
211
+
212
+ # rewrite body and insert dels
213
+ body = []
214
+ lastloc = ir_block.loc
215
+ del_store = []
216
+ for stmt, delete_set in reversed(delete_pts):
217
+ # If using extended lifetimes then the Dels are all put at the
218
+ # block end just ahead of the terminator, so associate their
219
+ # location with the terminator.
220
+ if extend_lifetimes:
221
+ lastloc = ir_block.body[-1].loc
222
+ else:
223
+ lastloc = stmt.loc
224
+ # Ignore dels (assuming no user inserted deletes)
225
+ if not isinstance(stmt, ir.Del):
226
+ body.append(stmt)
227
+ # note: the reverse sort is not necessary for correctness
228
+ # it is just to minimize changes to test for now
229
+ for var_name in sorted(delete_set, reverse=True):
230
+ delnode = ir.Del(var_name, loc=lastloc)
231
+ if extend_lifetimes:
232
+ del_store.append(delnode)
233
+ else:
234
+ body.append(delnode)
235
+ if extend_lifetimes:
236
+ body.extend(del_store)
237
+ body.append(ir_block.body[-1]) # terminator
238
+ ir_block.body = body
239
+
240
+ # vars to delete at the start
241
+ escape_dead_set = escaping_dead_map[offset]
242
+ for var_name in sorted(escape_dead_set):
243
+ ir_block.prepend(ir.Del(var_name, loc=ir_block.body[0].loc))
244
+
245
+ def remove_dels(self):
246
+ """
247
+ Strips the IR of Del nodes
248
+ """
249
+ ir_utils.remove_dels(self.func_ir.blocks)