numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +7 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
  129. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  130. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  134. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  139. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  141. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  143. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  146. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  147. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  148. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  151. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  152. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  153. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  154. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  155. numba_cuda/numba/cuda/tests/support.py +55 -15
  156. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  157. numba_cuda/numba/cuda/types.py +56 -0
  158. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  159. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  160. numba_cuda/numba/cuda/typing/context.py +751 -0
  161. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  162. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  163. numba_cuda/numba/cuda/typing/templates.py +7 -6
  164. numba_cuda/numba/cuda/ufuncs.py +3 -3
  165. numba_cuda/numba/cuda/utils.py +6 -112
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
  167. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
  168. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
  172. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,727 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from collections import namedtuple, OrderedDict
5
+ import dis
6
+ import inspect
7
+ import itertools
8
+
9
+ from types import CodeType, ModuleType
10
+
11
+ from numba.core import errors, serialize
12
+ from numba.cuda import utils
13
+ from numba.cuda.utils import PYVERSION
14
+
15
+
16
+ if PYVERSION in ((3, 12), (3, 13)):
17
+ from opcode import _inline_cache_entries
18
+
19
+ # Instruction/opcode length in bytes
20
+ INSTR_LEN = 2
21
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
22
+ pass
23
+ else:
24
+ raise NotImplementedError(PYVERSION)
25
+
26
+
27
+ opcode_info = namedtuple("opcode_info", ["argsize"])
28
+ _ExceptionTableEntry = namedtuple(
29
+ "_ExceptionTableEntry", "start end target depth lasti"
30
+ )
31
+
32
+ # The following offset is used as a hack to inject a NOP at the start of the
33
+ # bytecode. So that function starting with `while True` will not have block-0
34
+ # as a jump target. The Lowerer puts argument initialization at block-0.
35
+ _FIXED_OFFSET = 2
36
+
37
+
38
+ def get_function_object(obj):
39
+ """
40
+ Objects that wraps function should provide a "__numba__" magic attribute
41
+ that contains a name of an attribute that contains the actual python
42
+ function object.
43
+ """
44
+ attr = getattr(obj, "__numba__", None)
45
+ if attr:
46
+ return getattr(obj, attr)
47
+ return obj
48
+
49
+
50
+ def get_code_object(obj):
51
+ "Shamelessly borrowed from llpython"
52
+ return getattr(obj, "__code__", getattr(obj, "func_code", None))
53
+
54
+
55
+ def _as_opcodes(seq):
56
+ lst = []
57
+ for s in seq:
58
+ c = dis.opmap.get(s)
59
+ if c is not None:
60
+ lst.append(c)
61
+ return lst
62
+
63
+
64
+ JREL_OPS = frozenset(dis.hasjrel)
65
+ JABS_OPS = frozenset(dis.hasjabs)
66
+ JUMP_OPS = JREL_OPS | JABS_OPS
67
+ TERM_OPS = frozenset(_as_opcodes(["RETURN_VALUE", "RAISE_VARARGS"]))
68
+ EXTENDED_ARG = dis.EXTENDED_ARG
69
+ HAVE_ARGUMENT = dis.HAVE_ARGUMENT
70
+
71
+
72
+ class ByteCodeInst(object):
73
+ """
74
+ Attributes
75
+ ----------
76
+ - offset:
77
+ byte offset of opcode
78
+ - opcode:
79
+ opcode integer value
80
+ - arg:
81
+ instruction arg
82
+ - lineno:
83
+ -1 means unknown
84
+ """
85
+
86
+ __slots__ = "offset", "next", "opcode", "opname", "arg", "lineno"
87
+
88
+ def __init__(self, offset, opcode, arg, nextoffset):
89
+ self.offset = offset
90
+ self.next = nextoffset
91
+ self.opcode = opcode
92
+ self.opname = dis.opname[opcode]
93
+ self.arg = arg
94
+ self.lineno = -1 # unknown line number
95
+
96
+ @property
97
+ def is_jump(self):
98
+ return self.opcode in JUMP_OPS
99
+
100
+ @property
101
+ def is_terminator(self):
102
+ return self.opcode in TERM_OPS
103
+
104
+ def get_jump_target(self):
105
+ # With Python 3.10 the addressing of "bytecode" instructions has
106
+ # changed from using bytes to using 16-bit words instead. As a
107
+ # consequence the code to determine where a jump will lead had to be
108
+ # adapted.
109
+ # See also:
110
+ # https://bugs.python.org/issue26647
111
+ # https://bugs.python.org/issue27129
112
+ # https://github.com/python/cpython/pull/25069
113
+ assert self.is_jump
114
+ if PYVERSION in ((3, 13),):
115
+ if self.opcode in (
116
+ dis.opmap[k]
117
+ for k in ["JUMP_BACKWARD", "JUMP_BACKWARD_NO_INTERRUPT"]
118
+ ):
119
+ return self.next - (self.arg * 2)
120
+ elif PYVERSION in ((3, 12),):
121
+ if self.opcode in (dis.opmap[k] for k in ["JUMP_BACKWARD"]):
122
+ return self.offset - (self.arg - 1) * 2
123
+ elif PYVERSION in ((3, 11),):
124
+ if self.opcode in (
125
+ dis.opmap[k]
126
+ for k in (
127
+ "JUMP_BACKWARD",
128
+ "POP_JUMP_BACKWARD_IF_TRUE",
129
+ "POP_JUMP_BACKWARD_IF_FALSE",
130
+ "POP_JUMP_BACKWARD_IF_NONE",
131
+ "POP_JUMP_BACKWARD_IF_NOT_NONE",
132
+ )
133
+ ):
134
+ return self.offset - (self.arg - 1) * 2
135
+ elif PYVERSION in (
136
+ (3, 9),
137
+ (3, 10),
138
+ ):
139
+ pass
140
+ else:
141
+ raise NotImplementedError(PYVERSION)
142
+
143
+ if PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
144
+ if self.opcode in JREL_OPS:
145
+ return self.next + self.arg * 2
146
+ else:
147
+ assert self.opcode in JABS_OPS
148
+ return self.arg * 2 - 2
149
+ elif PYVERSION in ((3, 9),):
150
+ if self.opcode in JREL_OPS:
151
+ return self.next + self.arg
152
+ else:
153
+ assert self.opcode in JABS_OPS
154
+ return self.arg
155
+ else:
156
+ raise NotImplementedError(PYVERSION)
157
+
158
+ def __repr__(self):
159
+ return "%s(arg=%s, lineno=%d)" % (self.opname, self.arg, self.lineno)
160
+
161
+ @property
162
+ def block_effect(self):
163
+ """Effect of the block stack
164
+ Returns +1 (push), 0 (none) or -1 (pop)
165
+ """
166
+ if self.opname.startswith("SETUP_"):
167
+ return 1
168
+ elif self.opname == "POP_BLOCK":
169
+ return -1
170
+ else:
171
+ return 0
172
+
173
+
174
+ CODE_LEN = 1
175
+ ARG_LEN = 1
176
+ NO_ARG_LEN = 1
177
+
178
+ OPCODE_NOP = dis.opname.index("NOP")
179
+
180
+
181
+ if PYVERSION in ((3, 13),):
182
+
183
+ def _unpack_opargs(code):
184
+ buf = []
185
+ for i, start_offset, op, arg in dis._unpack_opargs(code):
186
+ buf.append((start_offset, op, arg))
187
+ for i, (start_offset, op, arg) in enumerate(buf):
188
+ if i + 1 < len(buf):
189
+ next_offset = buf[i + 1][0]
190
+ else:
191
+ next_offset = len(code)
192
+ yield (start_offset, op, arg, next_offset)
193
+
194
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
195
+ # Adapted from Lib/dis.py
196
+ def _unpack_opargs(code):
197
+ """
198
+ Returns a 4-int-tuple of
199
+ (bytecode offset, opcode, argument, offset of next bytecode).
200
+ """
201
+ extended_arg = 0
202
+ n = len(code)
203
+ offset = i = 0
204
+ while i < n:
205
+ op = code[i]
206
+ i += CODE_LEN
207
+ if op >= HAVE_ARGUMENT:
208
+ arg = code[i] | extended_arg
209
+ for j in range(ARG_LEN):
210
+ arg |= code[i + j] << (8 * j)
211
+ i += ARG_LEN
212
+ if PYVERSION in ((3, 12),):
213
+ # Python 3.12 introduced cache slots. We need to account for
214
+ # cache slots when we determine the offset of the next
215
+ # opcode. The number of cache slots is specific to each
216
+ # opcode and can be looked up in the _inline_cache_entries
217
+ # dictionary.
218
+ i += _inline_cache_entries[op] * INSTR_LEN
219
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
220
+ pass
221
+ else:
222
+ raise NotImplementedError(PYVERSION)
223
+ if op == EXTENDED_ARG:
224
+ # This is a deviation from what dis does...
225
+ # In python 3.11 it seems like EXTENDED_ARGs appear more
226
+ # often and are also used as jump targets. So as to not have
227
+ # to do "book keeping" for where EXTENDED_ARGs have been
228
+ # "skipped" they are replaced with NOPs so as to provide a
229
+ # legal jump target and also ensure that the bytecode
230
+ # offsets are correct.
231
+ yield (offset, OPCODE_NOP, arg, i)
232
+ extended_arg = arg << 8 * ARG_LEN
233
+ offset = i
234
+ continue
235
+ else:
236
+ arg = None
237
+ i += NO_ARG_LEN
238
+ if PYVERSION in ((3, 12),):
239
+ # Python 3.12 introduced cache slots. We need to account for
240
+ # cache slots when we determine the offset of the next
241
+ # opcode. The number of cache slots is specific to each
242
+ # opcode and can be looked up in the _inline_cache_entries
243
+ # dictionary.
244
+ i += _inline_cache_entries[op] * INSTR_LEN
245
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
246
+ pass
247
+ else:
248
+ raise NotImplementedError(PYVERSION)
249
+
250
+ extended_arg = 0
251
+ yield (offset, op, arg, i)
252
+ offset = i # Mark inst offset at first extended
253
+ else:
254
+ raise NotImplementedError(PYVERSION)
255
+
256
+
257
+ def _patched_opargs(bc_stream):
258
+ """Patch the bytecode stream.
259
+
260
+ - Adds a NOP bytecode at the start to avoid jump target being at the entry.
261
+ """
262
+ # Injected NOP
263
+ yield (0, OPCODE_NOP, None, _FIXED_OFFSET)
264
+ # Adjust bytecode offset for the rest of the stream
265
+ for offset, opcode, arg, nextoffset in bc_stream:
266
+ # If the opcode has an absolute jump target, adjust it.
267
+ if opcode in JABS_OPS:
268
+ arg += _FIXED_OFFSET
269
+ yield offset + _FIXED_OFFSET, opcode, arg, nextoffset + _FIXED_OFFSET
270
+
271
+
272
+ class ByteCodeIter(object):
273
+ def __init__(self, code):
274
+ self.code = code
275
+ self.iter = iter(_patched_opargs(_unpack_opargs(self.code.co_code)))
276
+
277
+ def __iter__(self):
278
+ return self
279
+
280
+ def _fetch_opcode(self):
281
+ return next(self.iter)
282
+
283
+ def next(self):
284
+ offset, opcode, arg, nextoffset = self._fetch_opcode()
285
+ return offset, ByteCodeInst(
286
+ offset=offset, opcode=opcode, arg=arg, nextoffset=nextoffset
287
+ )
288
+
289
+ __next__ = next
290
+
291
+ def read_arg(self, size):
292
+ buf = 0
293
+ for i in range(size):
294
+ _offset, byte = next(self.iter)
295
+ buf |= byte << (8 * i)
296
+ return buf
297
+
298
+
299
+ class _ByteCode(object):
300
+ """
301
+ The decoded bytecode of a function, and related information.
302
+ """
303
+
304
+ __slots__ = (
305
+ "func_id",
306
+ "co_names",
307
+ "co_varnames",
308
+ "co_consts",
309
+ "co_cellvars",
310
+ "co_freevars",
311
+ "exception_entries",
312
+ "table",
313
+ "labels",
314
+ )
315
+
316
+ def __init__(self, func_id):
317
+ code = func_id.code
318
+
319
+ labels = set(x + _FIXED_OFFSET for x in dis.findlabels(code.co_code))
320
+ labels.add(0)
321
+
322
+ # A map of {offset: ByteCodeInst}
323
+ table = OrderedDict(ByteCodeIter(code))
324
+ self._compute_lineno(table, code)
325
+
326
+ self.func_id = func_id
327
+ self.co_names = code.co_names
328
+ self.co_varnames = code.co_varnames
329
+ self.co_consts = code.co_consts
330
+ self.co_cellvars = code.co_cellvars
331
+ self.co_freevars = code.co_freevars
332
+
333
+ self.table = table
334
+ self.labels = sorted(labels)
335
+
336
+ @classmethod
337
+ def _compute_lineno(cls, table, code):
338
+ """
339
+ Compute the line numbers for all bytecode instructions.
340
+ """
341
+ for offset, lineno in dis.findlinestarts(code):
342
+ adj_offset = offset + _FIXED_OFFSET
343
+ if adj_offset in table:
344
+ table[adj_offset].lineno = lineno
345
+ # Assign unfilled lineno
346
+ # Start with first bytecode's lineno
347
+ known = code.co_firstlineno
348
+ for inst in table.values():
349
+ if inst.lineno is not None and inst.lineno >= 0:
350
+ known = inst.lineno
351
+ else:
352
+ inst.lineno = known
353
+ return table
354
+
355
+ def __iter__(self):
356
+ return iter(self.table.values())
357
+
358
+ def __getitem__(self, offset):
359
+ return self.table[offset]
360
+
361
+ def __contains__(self, offset):
362
+ return offset in self.table
363
+
364
+ def dump(self):
365
+ def label_marker(i):
366
+ if i[1].offset in self.labels:
367
+ return ">"
368
+ else:
369
+ return " "
370
+
371
+ return "\n".join(
372
+ "%s %10s\t%s" % ((label_marker(i),) + i)
373
+ for i in self.table.items()
374
+ if i[1].opname != "CACHE"
375
+ )
376
+
377
+ @classmethod
378
+ def _compute_used_globals(cls, func, table, co_consts, co_names):
379
+ """
380
+ Compute the globals used by the function with the given
381
+ bytecode table.
382
+ """
383
+ d = {}
384
+ globs = func.__globals__
385
+ builtins = globs.get("__builtins__", utils.builtins)
386
+ if isinstance(builtins, ModuleType):
387
+ builtins = builtins.__dict__
388
+ # Look for LOAD_GLOBALs in the bytecode
389
+ for inst in table.values():
390
+ if inst.opname == "LOAD_GLOBAL":
391
+ name = co_names[_fix_LOAD_GLOBAL_arg(inst.arg)]
392
+ if name not in d:
393
+ try:
394
+ value = globs[name]
395
+ except KeyError:
396
+ value = builtins[name]
397
+ d[name] = value
398
+ # Add globals used by any nested code object
399
+ for co in co_consts:
400
+ if isinstance(co, CodeType):
401
+ subtable = OrderedDict(ByteCodeIter(co))
402
+ d.update(
403
+ cls._compute_used_globals(
404
+ func, subtable, co.co_consts, co.co_names
405
+ )
406
+ )
407
+ return d
408
+
409
+ def get_used_globals(self):
410
+ """
411
+ Get a {name: value} map of the globals used by this code
412
+ object and any nested code objects.
413
+ """
414
+ return self._compute_used_globals(
415
+ self.func_id.func, self.table, self.co_consts, self.co_names
416
+ )
417
+
418
+
419
+ def _fix_LOAD_GLOBAL_arg(arg):
420
+ if PYVERSION in ((3, 11), (3, 12), (3, 13)):
421
+ return arg >> 1
422
+ elif PYVERSION in (
423
+ (3, 9),
424
+ (3, 10),
425
+ ):
426
+ return arg
427
+ else:
428
+ raise NotImplementedError(PYVERSION)
429
+
430
+
431
+ class ByteCodePy311(_ByteCode):
432
+ def __init__(self, func_id):
433
+ super().__init__(func_id)
434
+ entries = dis.Bytecode(func_id.code).exception_entries
435
+ self.exception_entries = tuple(map(self.fixup_eh, entries))
436
+
437
+ @staticmethod
438
+ def fixup_eh(ent):
439
+ # Patch up the exception table offset
440
+ # because we add a NOP in _patched_opargs
441
+ out = dis._ExceptionTableEntry(
442
+ start=ent.start + _FIXED_OFFSET,
443
+ end=ent.end + _FIXED_OFFSET,
444
+ target=ent.target + _FIXED_OFFSET,
445
+ depth=ent.depth,
446
+ lasti=ent.lasti,
447
+ )
448
+ return out
449
+
450
+ def find_exception_entry(self, offset):
451
+ """
452
+ Returns the exception entry for the given instruction offset
453
+ """
454
+ candidates = []
455
+ for ent in self.exception_entries:
456
+ if ent.start <= offset < ent.end:
457
+ candidates.append((ent.depth, ent))
458
+ if candidates:
459
+ ent = max(candidates)[1]
460
+ return ent
461
+
462
+
463
+ class ByteCodePy312(ByteCodePy311):
464
+ def __init__(self, func_id):
465
+ super().__init__(func_id)
466
+
467
+ # initialize lazy property
468
+ self._ordered_offsets = None
469
+
470
+ # Fixup offsets for all exception entries.
471
+ entries = [
472
+ self.fixup_eh(e)
473
+ for e in dis.Bytecode(func_id.code).exception_entries
474
+ ]
475
+
476
+ # Remove exceptions, innermost ones first
477
+ # Can be done by using a stack
478
+ entries = self.remove_build_list_swap_pattern(entries)
479
+
480
+ # If this is a generator, we need to skip any exception table entries
481
+ # that point to the exception handler with the highest offset.
482
+ if func_id.is_generator:
483
+ # Get the exception handler with the highest offset.
484
+ max_exception_target = max([e.target for e in entries])
485
+ # Remove any exception table entries that point to that exception
486
+ # handler.
487
+ entries = [e for e in entries if e.target != max_exception_target]
488
+
489
+ self.exception_entries = tuple(entries)
490
+
491
+ @property
492
+ def ordered_offsets(self):
493
+ if not self._ordered_offsets:
494
+ # Get an ordered list of offsets.
495
+ self._ordered_offsets = [o for o in self.table]
496
+ return self._ordered_offsets
497
+
498
+ def remove_build_list_swap_pattern(self, entries):
499
+ """Find the following bytecode pattern:
500
+
501
+ BUILD_{LIST, MAP, SET}
502
+ SWAP(2)
503
+ FOR_ITER
504
+ ...
505
+ END_FOR
506
+ SWAP(2)
507
+
508
+ This pattern indicates that a list/dict/set comprehension has
509
+ been inlined. In this case we can skip the exception blocks
510
+ entirely along with the dead exceptions that it points to.
511
+ A pair of exception that sandwiches these exception will
512
+ also be merged into a single exception.
513
+
514
+ Update for Python 3.13, the ending of the pattern has a extra
515
+ POP_TOP:
516
+
517
+ ...
518
+ END_FOR
519
+ POP_TOP
520
+ SWAP(2)
521
+
522
+ Update for Python 3.13.1, there's now a GET_ITER before FOR_ITER.
523
+ This patch the GET_ITER to NOP to minimize changes downstream
524
+ (e.g. array-comprehension).
525
+ """
526
+
527
+ def pop_and_merge_exceptions(
528
+ entries: list, entry_to_remove: _ExceptionTableEntry
529
+ ):
530
+ lower_entry_idx = entries.index(entry_to_remove) - 1
531
+ upper_entry_idx = entries.index(entry_to_remove) + 1
532
+
533
+ # Merge the upper and lower exceptions if possible.
534
+ if lower_entry_idx >= 0 and upper_entry_idx < len(entries):
535
+ lower_entry = entries[lower_entry_idx]
536
+ upper_entry = entries[upper_entry_idx]
537
+ if lower_entry.target == upper_entry.target:
538
+ entries[lower_entry_idx] = _ExceptionTableEntry(
539
+ lower_entry.start,
540
+ upper_entry.end,
541
+ lower_entry.target,
542
+ lower_entry.depth,
543
+ upper_entry.lasti,
544
+ )
545
+ entries.remove(upper_entry)
546
+
547
+ # Remove the exception entry.
548
+ entries.remove(entry_to_remove)
549
+ # Remove dead exceptions, if any, that the entry above may point to.
550
+ entries = [
551
+ e for e in entries if not e.start == entry_to_remove.target
552
+ ]
553
+ return entries
554
+
555
+ change_to_nop = set()
556
+ work_remaining = True
557
+ while work_remaining:
558
+ # Temporarily set work_remaining to False, if we find a pattern
559
+ # then work is not complete, hence we set it again to True.
560
+ work_remaining = False
561
+ current_nop_fixes = set()
562
+ for entry in entries.copy():
563
+ # Check start of pattern, three instructions.
564
+ # Work out the index of the instruction.
565
+ index = self.ordered_offsets.index(entry.start)
566
+ # If there is a BUILD_{LIST, MAP, SET} instruction at this
567
+ # location.
568
+ curr_inst = self.table[self.ordered_offsets[index]]
569
+ if curr_inst.opname not in (
570
+ "BUILD_LIST",
571
+ "BUILD_MAP",
572
+ "BUILD_SET",
573
+ ):
574
+ continue
575
+ # Check if the BUILD_{LIST, MAP, SET} instruction is followed
576
+ # by a SWAP(2).
577
+ next_inst = self.table[self.ordered_offsets[index + 1]]
578
+ if not next_inst.opname == "SWAP" and next_inst.arg == 2:
579
+ continue
580
+ next_inst = self.table[self.ordered_offsets[index + 2]]
581
+ # Check if the SWAP is followed by a FOR_ITER
582
+ # BUT Python3.13.1 introduced an extra GET_ITER.
583
+ # If we see a GET_ITER here, check if the next thing is a
584
+ # FOR_ITER.
585
+ if next_inst.opname == "GET_ITER":
586
+ # Add the inst to potentially be replaced to NOP
587
+ current_nop_fixes.add(next_inst)
588
+ # Loop up next instruction.
589
+ next_inst = self.table[self.ordered_offsets[index + 3]]
590
+
591
+ if not next_inst.opname == "FOR_ITER":
592
+ continue
593
+
594
+ if PYVERSION in ((3, 13),):
595
+ # Check end of pattern, two instructions.
596
+ # Check for the corresponding END_FOR, exception table end
597
+ # is non-inclusive, so subtract one.
598
+ index = self.ordered_offsets.index(entry.end)
599
+ curr_inst = self.table[self.ordered_offsets[index - 2]]
600
+ if not curr_inst.opname == "END_FOR":
601
+ continue
602
+ next_inst = self.table[self.ordered_offsets[index - 1]]
603
+ if not next_inst.opname == "POP_TOP":
604
+ continue
605
+ # END_FOR must be followed by SWAP(2)
606
+ next_inst = self.table[self.ordered_offsets[index]]
607
+ if not next_inst.opname == "SWAP" and next_inst.arg == 2:
608
+ continue
609
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
610
+ # Check end of pattern, two instructions.
611
+ # Check for the corresponding END_FOR, exception table end
612
+ # is non-inclusive, so subtract one.
613
+ index = self.ordered_offsets.index(entry.end)
614
+ curr_inst = self.table[self.ordered_offsets[index - 1]]
615
+ if not curr_inst.opname == "END_FOR":
616
+ continue
617
+ # END_FOR must be followed by SWAP(2)
618
+ next_inst = self.table[self.ordered_offsets[index]]
619
+ if not next_inst.opname == "SWAP" and next_inst.arg == 2:
620
+ continue
621
+ else:
622
+ raise NotImplementedError(PYVERSION)
623
+ # If all conditions are met that means this exception entry
624
+ # is for a list/dict/set comprehension and can be removed.
625
+ # Also if there exist exception entries above and below this
626
+ # entry pointing to the same target. those can be merged into
627
+ # a single bigger exception block.
628
+ entries = pop_and_merge_exceptions(entries, entry)
629
+ work_remaining = True
630
+
631
+ # Commit NOP fixes since we confirmed the suspects belong to
632
+ # a comprehension code.
633
+ change_to_nop |= current_nop_fixes
634
+
635
+ # Complete fixes to NOPs
636
+ for inst in change_to_nop:
637
+ self.table[inst.offset] = ByteCodeInst(
638
+ inst.offset, dis.opmap["NOP"], None, inst.next
639
+ )
640
+ return entries
641
+
642
+
643
+ if PYVERSION == (3, 11):
644
+ ByteCode = ByteCodePy311
645
+ elif PYVERSION in (
646
+ (3, 12),
647
+ (3, 13),
648
+ ):
649
+ ByteCode = ByteCodePy312
650
+ elif PYVERSION < (3, 11):
651
+ ByteCode = _ByteCode
652
+ else:
653
+ raise NotImplementedError(PYVERSION)
654
+
655
+
656
+ class FunctionIdentity(serialize.ReduceMixin):
657
+ """
658
+ A function's identity and metadata.
659
+
660
+ Note this typically represents a function whose bytecode is
661
+ being compiled, not necessarily the top-level user function
662
+ (the two might be distinct).
663
+ """
664
+
665
+ _unique_ids = itertools.count(1)
666
+
667
+ @classmethod
668
+ def from_function(cls, pyfunc):
669
+ """
670
+ Create the FunctionIdentity of the given function.
671
+ """
672
+ func = get_function_object(pyfunc)
673
+ code = get_code_object(func)
674
+ pysig = utils.pysignature(func)
675
+ if not code:
676
+ raise errors.ByteCodeSupportError(
677
+ "%s does not provide its bytecode" % func
678
+ )
679
+
680
+ try:
681
+ func_qualname = func.__qualname__
682
+ except AttributeError:
683
+ func_qualname = func.__name__
684
+
685
+ self = cls()
686
+ self.func = func
687
+ self.func_qualname = func_qualname
688
+ self.func_name = func_qualname.split(".")[-1]
689
+ self.code = code
690
+ self.module = inspect.getmodule(func)
691
+ self.modname = (
692
+ utils._dynamic_modname
693
+ if self.module is None
694
+ else self.module.__name__
695
+ )
696
+ self.is_generator = inspect.isgeneratorfunction(func)
697
+ self.pysig = pysig
698
+ self.filename = code.co_filename
699
+ self.firstlineno = code.co_firstlineno
700
+ self.arg_count = len(pysig.parameters)
701
+ self.arg_names = list(pysig.parameters)
702
+
703
+ # Even the same function definition can be compiled into
704
+ # several different function objects with distinct closure
705
+ # variables, so we make sure to disambiguate using an unique id.
706
+ uid = next(cls._unique_ids)
707
+ self.unique_name = "{}${}".format(self.func_qualname, uid)
708
+ self.unique_id = uid
709
+
710
+ return self
711
+
712
+ def derive(self):
713
+ """Copy the object and increment the unique counter."""
714
+ return self.from_function(self.func)
715
+
716
+ def _reduce_states(self):
717
+ """
718
+ NOTE: part of ReduceMixin protocol
719
+ """
720
+ return dict(pyfunc=self.func)
721
+
722
+ @classmethod
723
+ def _rebuild(cls, pyfunc):
724
+ """
725
+ NOTE: part of ReduceMixin protocol
726
+ """
727
+ return cls.from_function(pyfunc)