pyvex 9.2.193__cp310-cp310-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. pyvex/__init__.py +92 -0
  2. pyvex/_register_info.py +1800 -0
  3. pyvex/arches.py +94 -0
  4. pyvex/block.py +697 -0
  5. pyvex/const.py +426 -0
  6. pyvex/const_val.py +26 -0
  7. pyvex/data_ref.py +55 -0
  8. pyvex/enums.py +156 -0
  9. pyvex/errors.py +31 -0
  10. pyvex/expr.py +974 -0
  11. pyvex/include/libvex.h +1029 -0
  12. pyvex/include/libvex_basictypes.h +236 -0
  13. pyvex/include/libvex_emnote.h +142 -0
  14. pyvex/include/libvex_guest_amd64.h +252 -0
  15. pyvex/include/libvex_guest_arm.h +224 -0
  16. pyvex/include/libvex_guest_arm64.h +203 -0
  17. pyvex/include/libvex_guest_mips32.h +175 -0
  18. pyvex/include/libvex_guest_mips64.h +173 -0
  19. pyvex/include/libvex_guest_offsets.h +941 -0
  20. pyvex/include/libvex_guest_ppc32.h +298 -0
  21. pyvex/include/libvex_guest_ppc64.h +343 -0
  22. pyvex/include/libvex_guest_riscv64.h +148 -0
  23. pyvex/include/libvex_guest_s390x.h +201 -0
  24. pyvex/include/libvex_guest_tilegx.h +149 -0
  25. pyvex/include/libvex_guest_x86.h +322 -0
  26. pyvex/include/libvex_ir.h +3113 -0
  27. pyvex/include/libvex_s390x_common.h +123 -0
  28. pyvex/include/libvex_trc_values.h +99 -0
  29. pyvex/include/pyvex.h +96 -0
  30. pyvex/lib/libpyvex.dylib +0 -0
  31. pyvex/lifting/__init__.py +18 -0
  32. pyvex/lifting/gym/README.md +7 -0
  33. pyvex/lifting/gym/__init__.py +5 -0
  34. pyvex/lifting/gym/aarch64_spotter.py +40 -0
  35. pyvex/lifting/gym/arm_spotter.py +427 -0
  36. pyvex/lifting/gym/x86_spotter.py +129 -0
  37. pyvex/lifting/libvex.py +117 -0
  38. pyvex/lifting/lift_function.py +304 -0
  39. pyvex/lifting/lifter.py +124 -0
  40. pyvex/lifting/post_processor.py +16 -0
  41. pyvex/lifting/util/__init__.py +14 -0
  42. pyvex/lifting/util/instr_helper.py +422 -0
  43. pyvex/lifting/util/lifter_helper.py +154 -0
  44. pyvex/lifting/util/syntax_wrapper.py +312 -0
  45. pyvex/lifting/util/vex_helper.py +301 -0
  46. pyvex/lifting/zerodivision.py +71 -0
  47. pyvex/native.py +63 -0
  48. pyvex/py.typed +1 -0
  49. pyvex/stmt.py +740 -0
  50. pyvex/types.py +48 -0
  51. pyvex/utils.py +63 -0
  52. pyvex/vex_ffi.py +1453 -0
  53. pyvex-9.2.193.dist-info/METADATA +181 -0
  54. pyvex-9.2.193.dist-info/RECORD +59 -0
  55. pyvex-9.2.193.dist-info/WHEEL +6 -0
  56. pyvex-9.2.193.dist-info/licenses/LICENSE +24 -0
  57. pyvex-9.2.193.dist-info/licenses/pyvex_c/LICENSE +339 -0
  58. pyvex-9.2.193.dist-info/licenses/vex/LICENSE.GPL +340 -0
  59. pyvex-9.2.193.dist-info/licenses/vex/LICENSE.README +23 -0
@@ -0,0 +1,304 @@
1
+ import logging
2
+ from collections import defaultdict
3
+ from typing import DefaultDict
4
+
5
+ from pyvex import const
6
+ from pyvex.block import IRSB
7
+ from pyvex.const import vex_int_class
8
+ from pyvex.errors import LiftingException, NeedStatementsNotification, PyVEXError, SkipStatementsError
9
+ from pyvex.expr import Const
10
+ from pyvex.native import ffi
11
+ from pyvex.types import LiftSource, PyLiftSource
12
+
13
+ from .lifter import Lifter
14
+ from .post_processor import Postprocessor
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+ lifters: DefaultDict[str, list[type[Lifter]]] = defaultdict(list)
19
+ postprocessors: DefaultDict[str, list[type[Postprocessor]]] = defaultdict(list)
20
+
21
+
22
+ def lift(
23
+ data: LiftSource,
24
+ addr,
25
+ arch,
26
+ max_bytes=None,
27
+ max_inst=None,
28
+ bytes_offset=0,
29
+ opt_level=1,
30
+ traceflags=0,
31
+ strict_block_end=True,
32
+ inner=False,
33
+ skip_stmts=False,
34
+ collect_data_refs=False,
35
+ cross_insn_opt=True,
36
+ load_from_ro_regions=False,
37
+ const_prop=False,
38
+ ):
39
+ """
40
+ Recursively lifts blocks using the registered lifters and postprocessors. Tries each lifter in the order in
41
+ which they are registered on the data to lift.
42
+
43
+ If a lifter raises a LiftingException on the data, it is skipped.
44
+ If it succeeds and returns a block with a jumpkind of Ijk_NoDecode, all of the lifters are tried on the rest
45
+ of the data and if they work, their output is appended to the first block.
46
+
47
+ :param arch: The arch to lift the data as.
48
+ :param addr: The starting address of the block. Effects the IMarks.
49
+ :param data: The bytes to lift as either a python string of bytes or a cffi buffer object.
50
+ :param max_bytes: The maximum number of bytes to lift. If set to None, no byte limit is used.
51
+ :param max_inst: The maximum number of instructions to lift. If set to None, no instruction limit is used.
52
+ :param bytes_offset: The offset into `data` to start lifting at.
53
+ :param opt_level: The level of optimization to apply to the IR, -1 through 2. -1 is the strictest
54
+ unoptimized level, 0 is unoptimized but will perform some lookahead/lookbehind
55
+ optimizations, 1 performs constant propogation, and 2 performs loop unrolling,
56
+ which honestly doesn't make much sense in the context of pyvex. The default is 1.
57
+ :param traceflags: The libVEX traceflags, controlling VEX debug prints.
58
+
59
+ .. note:: Explicitly specifying the number of instructions to lift (`max_inst`) may not always work
60
+ exactly as expected. For example, on MIPS, it is meaningless to lift a branch or jump
61
+ instruction without its delay slot. VEX attempts to Do The Right Thing by possibly decoding
62
+ fewer instructions than requested. Specifically, this means that lifting a branch or jump
63
+ on MIPS as a single instruction (`max_inst=1`) will result in an empty IRSB, and subsequent
64
+ attempts to run this block will raise `SimIRSBError('Empty IRSB passed to SimIRSB.')`.
65
+
66
+ .. note:: If no instruction and byte limit is used, pyvex will continue lifting the block until the block
67
+ ends properly or until it runs out of data to lift.
68
+ """
69
+ if max_bytes is not None and max_bytes <= 0:
70
+ raise PyVEXError("Cannot lift block with no data (max_bytes <= 0)")
71
+
72
+ if not data:
73
+ raise PyVEXError("Cannot lift block with no data (data is empty)")
74
+
75
+ if isinstance(data, str):
76
+ raise TypeError("Cannot pass unicode string as data to lifter")
77
+
78
+ py_data: PyLiftSource | None
79
+ if isinstance(data, (bytes, bytearray, memoryview)):
80
+ py_data = data
81
+ c_data = None
82
+ else:
83
+ if max_bytes is None:
84
+ raise PyVEXError("Cannot lift block with ffi pointer and no size (max_bytes is None)")
85
+ c_data = data
86
+ py_data = None
87
+
88
+ allow_arch_optimizations = True
89
+ # In order to attempt to preserve the property that
90
+ # VEX lifts the same bytes to the same IR at all times when optimizations are disabled
91
+ # we hack off all of VEX's non-IROpt optimizations when opt_level == -1.
92
+ # This is intended to enable comparisons of the lifted IR between code that happens to be
93
+ # found in different contexts.
94
+ if opt_level < 0:
95
+ allow_arch_optimizations = False
96
+ opt_level = 0
97
+
98
+ for lifter in lifters[arch.name]:
99
+ try:
100
+ u_data: LiftSource = data
101
+ if lifter.REQUIRE_DATA_C:
102
+ if c_data is None:
103
+ assert py_data is not None
104
+ if isinstance(py_data, (bytearray, memoryview)):
105
+ u_data = ffi.from_buffer(ffi.BVoidP, py_data)
106
+ else:
107
+ u_data = ffi.from_buffer(ffi.BVoidP, py_data + b"\0" * 8)
108
+ max_bytes = min(len(py_data), max_bytes) if max_bytes is not None else len(py_data)
109
+ else:
110
+ u_data = c_data
111
+ skip = 0
112
+ elif lifter.REQUIRE_DATA_PY:
113
+ if bytes_offset and arch.name.startswith("ARM") and (addr & 1) == 1:
114
+ skip = bytes_offset - 1
115
+ else:
116
+ skip = bytes_offset
117
+ if py_data is None:
118
+ assert c_data is not None
119
+ if max_bytes is None:
120
+ log.debug("Cannot create py_data from c_data when no max length is given")
121
+ continue
122
+ u_data = ffi.buffer(c_data + skip, max_bytes)[:]
123
+ else:
124
+ if max_bytes is None:
125
+ u_data = py_data[skip:]
126
+ else:
127
+ u_data = py_data[skip : skip + max_bytes]
128
+ else:
129
+ raise RuntimeError(
130
+ "Incorrect lifter configuration. What type of data does %s expect?" % lifter.__class__
131
+ )
132
+
133
+ try:
134
+ final_irsb = lifter(arch, addr).lift(
135
+ u_data,
136
+ bytes_offset - skip,
137
+ max_bytes,
138
+ max_inst,
139
+ opt_level,
140
+ traceflags,
141
+ allow_arch_optimizations,
142
+ strict_block_end,
143
+ skip_stmts,
144
+ collect_data_refs=collect_data_refs,
145
+ cross_insn_opt=cross_insn_opt,
146
+ load_from_ro_regions=load_from_ro_regions,
147
+ const_prop=const_prop,
148
+ )
149
+ except SkipStatementsError:
150
+ assert skip_stmts is True
151
+ final_irsb = lifter(arch, addr).lift(
152
+ u_data,
153
+ bytes_offset - skip,
154
+ max_bytes,
155
+ max_inst,
156
+ opt_level,
157
+ traceflags,
158
+ allow_arch_optimizations,
159
+ strict_block_end,
160
+ skip_stmts=False,
161
+ collect_data_refs=collect_data_refs,
162
+ cross_insn_opt=cross_insn_opt,
163
+ load_from_ro_regions=load_from_ro_regions,
164
+ const_prop=const_prop,
165
+ )
166
+ break
167
+ except LiftingException as ex:
168
+ log.debug("Lifting Exception: %s", str(ex))
169
+ continue
170
+ else:
171
+ final_irsb = IRSB.empty_block(
172
+ arch,
173
+ addr,
174
+ size=0,
175
+ nxt=Const(const.vex_int_class(arch.bits)(addr)),
176
+ jumpkind="Ijk_NoDecode",
177
+ )
178
+ final_irsb.invalidate_direct_next()
179
+ return final_irsb
180
+
181
+ if final_irsb.size > 0 and final_irsb.jumpkind == "Ijk_NoDecode":
182
+ # We have decoded a few bytes before we hit an undecodeable instruction.
183
+
184
+ # Determine if this is an intentional NoDecode, like the ud2 instruction on AMD64
185
+ nodecode_addr_expr = final_irsb.next
186
+ if type(nodecode_addr_expr) is Const:
187
+ nodecode_addr = nodecode_addr_expr.con.value
188
+ next_irsb_start_addr = addr + final_irsb.size
189
+ if nodecode_addr != next_irsb_start_addr:
190
+ # The last instruction of the IRSB has a non-zero length. This is an intentional NoDecode.
191
+ # The very last instruction has been decoded
192
+ final_irsb.jumpkind = "Ijk_NoDecode"
193
+ final_irsb.next = final_irsb.next
194
+ final_irsb.invalidate_direct_next()
195
+ return final_irsb
196
+
197
+ # Decode more bytes
198
+ if skip_stmts:
199
+ # When gymrat will be invoked, we will merge future basic blocks to the current basic block. In this case,
200
+ # statements are usually required.
201
+ # TODO: In the future, we may further optimize it to handle cases where getting statements in gymrat is not
202
+ # TODO: required.
203
+ return lift(
204
+ data,
205
+ addr,
206
+ arch,
207
+ max_bytes=max_bytes,
208
+ max_inst=max_inst,
209
+ bytes_offset=bytes_offset,
210
+ opt_level=opt_level,
211
+ traceflags=traceflags,
212
+ strict_block_end=strict_block_end,
213
+ skip_stmts=False,
214
+ collect_data_refs=collect_data_refs,
215
+ load_from_ro_regions=load_from_ro_regions,
216
+ const_prop=const_prop,
217
+ )
218
+
219
+ next_addr = addr + final_irsb.size
220
+ if max_bytes is not None:
221
+ max_bytes -= final_irsb.size
222
+ if isinstance(data, (bytes, bytearray, memoryview)):
223
+ data_left = data[final_irsb.size :]
224
+ else:
225
+ data_left = data + final_irsb.size
226
+ if max_inst is not None:
227
+ max_inst -= final_irsb.instructions
228
+ if (max_bytes is None or max_bytes > 0) and (max_inst is None or max_inst > 0) and data_left:
229
+ more_irsb = lift(
230
+ data_left,
231
+ next_addr,
232
+ arch,
233
+ max_bytes=max_bytes,
234
+ max_inst=max_inst,
235
+ bytes_offset=bytes_offset,
236
+ opt_level=opt_level,
237
+ traceflags=traceflags,
238
+ strict_block_end=strict_block_end,
239
+ inner=True,
240
+ skip_stmts=False,
241
+ collect_data_refs=collect_data_refs,
242
+ load_from_ro_regions=load_from_ro_regions,
243
+ const_prop=const_prop,
244
+ )
245
+ if more_irsb.size:
246
+ # Successfully decoded more bytes
247
+ final_irsb.extend(more_irsb)
248
+ elif max_bytes == 0:
249
+ # We have no more bytes left. Mark the jumpkind of the IRSB as Ijk_Boring
250
+ if final_irsb.size > 0 and final_irsb.jumpkind == "Ijk_NoDecode":
251
+ final_irsb.jumpkind = "Ijk_Boring"
252
+ final_irsb.next = Const(vex_int_class(arch.bits)(final_irsb.addr + final_irsb.size))
253
+
254
+ if not inner:
255
+ for postprocessor in postprocessors[arch.name]:
256
+ try:
257
+ postprocessor(final_irsb).postprocess()
258
+ except NeedStatementsNotification as e:
259
+ # The post-processor cannot work without statements. Re-lift the current block with skip_stmts=False
260
+ if not skip_stmts:
261
+ # sanity check
262
+ # Why does the post-processor raise NeedStatementsNotification when skip_stmts is False?
263
+ raise TypeError(
264
+ "Bad post-processor %s: "
265
+ "NeedStatementsNotification is raised when statements are available." % postprocessor.__class__
266
+ ) from e
267
+
268
+ # Re-lift the current IRSB
269
+ return lift(
270
+ data,
271
+ addr,
272
+ arch,
273
+ max_bytes=max_bytes,
274
+ max_inst=max_inst,
275
+ bytes_offset=bytes_offset,
276
+ opt_level=opt_level,
277
+ traceflags=traceflags,
278
+ strict_block_end=strict_block_end,
279
+ inner=inner,
280
+ skip_stmts=False,
281
+ collect_data_refs=collect_data_refs,
282
+ load_from_ro_regions=load_from_ro_regions,
283
+ const_prop=const_prop,
284
+ )
285
+ except LiftingException:
286
+ continue
287
+
288
+ return final_irsb
289
+
290
+
291
+ def register(lifter, arch_name):
292
+ """
293
+ Registers a Lifter or Postprocessor to be used by pyvex. Lifters are are given priority based on the order
294
+ in which they are registered. Postprocessors will be run in registration order.
295
+
296
+ :param lifter: The Lifter or Postprocessor to register
297
+ :vartype lifter: :class:`Lifter` or :class:`Postprocessor`
298
+ """
299
+ if issubclass(lifter, Lifter):
300
+ log.debug("Registering lifter %s for architecture %s.", lifter.__name__, arch_name)
301
+ lifters[arch_name].append(lifter)
302
+ if issubclass(lifter, Postprocessor):
303
+ log.debug("Registering postprocessor %s for architecture %s.", lifter.__name__, arch_name)
304
+ postprocessors[arch_name].append(lifter)
@@ -0,0 +1,124 @@
1
+ from pyvex.block import IRSB
2
+ from pyvex.types import Arch, LiftSource
3
+
4
+ # pylint:disable=attribute-defined-outside-init
5
+
6
+
7
+ class Lifter:
8
+ __slots__ = (
9
+ "data",
10
+ "bytes_offset",
11
+ "opt_level",
12
+ "traceflags",
13
+ "allow_arch_optimizations",
14
+ "strict_block_end",
15
+ "collect_data_refs",
16
+ "max_inst",
17
+ "max_bytes",
18
+ "skip_stmts",
19
+ "irsb",
20
+ "arch",
21
+ "addr",
22
+ "cross_insn_opt",
23
+ "load_from_ro_regions",
24
+ "const_prop",
25
+ "disasm",
26
+ "dump_irsb",
27
+ )
28
+
29
+ """
30
+ A lifter is a class of methods for processing a block.
31
+
32
+ :ivar data: The bytes to lift as either a python string of bytes or a cffi buffer object.
33
+ :ivar bytes_offset: The offset into `data` to start lifting at.
34
+ :ivar max_bytes: The maximum number of bytes to lift. If set to None, no byte limit is used.
35
+ :ivar max_inst: The maximum number of instructions to lift. If set to None, no instruction limit is used.
36
+ :ivar opt_level: The level of optimization to apply to the IR, 0-2. Most likely will be ignored in any lifter
37
+ other then LibVEX.
38
+ :ivar traceflags: The libVEX traceflags, controlling VEX debug prints. Most likely will be ignored in any
39
+ lifter other than LibVEX.
40
+ :ivar allow_arch_optimizations: Should the LibVEX lifter be allowed to perform lift-time preprocessing
41
+ optimizations (e.g., lookback ITSTATE optimization on THUMB)
42
+ Most likely will be ignored in any lifter other than LibVEX.
43
+ :ivar strict_block_end: Should the LibVEX arm-thumb split block at some instructions, for example CB{N}Z.
44
+ :ivar skip_stmts: Should LibVEX ignore statements.
45
+ """
46
+ REQUIRE_DATA_C = False
47
+ REQUIRE_DATA_PY = False
48
+
49
+ def __init__(self, arch: Arch, addr: int):
50
+ self.arch: Arch = arch
51
+ self.addr: int = addr
52
+
53
+ def lift(
54
+ self,
55
+ data: LiftSource,
56
+ bytes_offset: int | None = None,
57
+ max_bytes: int | None = None,
58
+ max_inst: int | None = None,
59
+ opt_level: int | float = 1,
60
+ traceflags: int | None = None,
61
+ allow_arch_optimizations: bool | None = None,
62
+ strict_block_end: bool | None = None,
63
+ skip_stmts: bool = False,
64
+ collect_data_refs: bool = False,
65
+ cross_insn_opt: bool = True,
66
+ load_from_ro_regions: bool = False,
67
+ const_prop: bool = False,
68
+ disasm: bool = False,
69
+ dump_irsb: bool = False,
70
+ ):
71
+ """
72
+ Wrapper around the `_lift` method on Lifters. Should not be overridden in child classes.
73
+
74
+ :param data: The bytes to lift as either a python string of bytes or a cffi buffer object.
75
+ :param bytes_offset: The offset into `data` to start lifting at.
76
+ :param max_bytes: The maximum number of bytes to lift. If set to None, no byte limit is used.
77
+ :param max_inst: The maximum number of instructions to lift. If set to None, no instruction limit is
78
+ used.
79
+ :param opt_level: The level of optimization to apply to the IR, 0-2. Most likely will be ignored in
80
+ any lifter other then LibVEX.
81
+ :param traceflags: The libVEX traceflags, controlling VEX debug prints. Most likely will be ignored in
82
+ any lifter other than LibVEX.
83
+ :param allow_arch_optimizations: Should the LibVEX lifter be allowed to perform lift-time preprocessing
84
+ optimizations (e.g., lookback ITSTATE optimization on THUMB) Most likely will be
85
+ ignored in any lifter other than LibVEX.
86
+ :param strict_block_end: Should the LibVEX arm-thumb split block at some instructions, for example CB{N}Z.
87
+ :param skip_stmts: Should the lifter skip transferring IRStmts from C to Python.
88
+ :param collect_data_refs: Should the LibVEX lifter collect data references in C.
89
+ :param cross_insn_opt: If cross-instruction-boundary optimizations are allowed or not.
90
+ :param disasm: Should the GymratLifter generate disassembly during lifting.
91
+ :param dump_irsb: Should the GymratLifter log the lifted IRSB.
92
+ """
93
+ irsb: IRSB = IRSB.empty_block(self.arch, self.addr)
94
+ self.data = data
95
+ self.bytes_offset = bytes_offset
96
+ self.opt_level = opt_level
97
+ self.traceflags = traceflags
98
+ self.allow_arch_optimizations = allow_arch_optimizations
99
+ self.strict_block_end = strict_block_end
100
+ self.collect_data_refs = collect_data_refs
101
+ self.max_inst = max_inst
102
+ self.max_bytes = max_bytes
103
+ self.skip_stmts = skip_stmts
104
+ self.irsb = irsb
105
+ self.cross_insn_opt = cross_insn_opt
106
+ self.load_from_ro_regions = load_from_ro_regions
107
+ self.const_prop = const_prop
108
+ self.disasm = disasm
109
+ self.dump_irsb = dump_irsb
110
+ self._lift()
111
+ return self.irsb
112
+
113
+ def _lift(self):
114
+ """
115
+ Lifts the data using the information passed into _lift. Should be overridden in child classes.
116
+
117
+ Should set the lifted IRSB to self.irsb.
118
+ If a lifter raises a LiftingException on the data, this signals that the lifter cannot lift this data and arch
119
+ and the lifter is skipped.
120
+ If a lifter can lift any amount of data, it should lift it and return the lifted block with a jumpkind of
121
+ Ijk_NoDecode, signalling to pyvex that other lifters should be used on the undecodable data.
122
+
123
+ """
124
+ raise NotImplementedError()
@@ -0,0 +1,16 @@
1
+ #
2
+ # The post-processor base class
3
+ #
4
+
5
+
6
+ class Postprocessor:
7
+ def __init__(self, irsb):
8
+ self.irsb = irsb
9
+
10
+ def postprocess(self):
11
+ """
12
+ Modify the irsb
13
+
14
+ All of the postprocessors will be used in the order that they are registered
15
+ """
16
+ pass
@@ -0,0 +1,14 @@
1
+ from .instr_helper import Instruction
2
+ from .lifter_helper import GymratLifter, ParseError
3
+ from .syntax_wrapper import VexValue
4
+ from .vex_helper import JumpKind, Type
5
+
6
+ __all__ = [
7
+ "Type",
8
+ "JumpKind",
9
+ "VexValue",
10
+ "ParseError",
11
+ "Instruction",
12
+ "GymratLifter",
13
+ "ParseError",
14
+ ]