pyvex 9.2.189__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. pyvex/__init__.py +92 -0
  2. pyvex/_register_info.py +1800 -0
  3. pyvex/arches.py +94 -0
  4. pyvex/block.py +697 -0
  5. pyvex/const.py +426 -0
  6. pyvex/const_val.py +26 -0
  7. pyvex/data_ref.py +55 -0
  8. pyvex/enums.py +156 -0
  9. pyvex/errors.py +31 -0
  10. pyvex/expr.py +974 -0
  11. pyvex/include/libvex.h +1029 -0
  12. pyvex/include/libvex_basictypes.h +236 -0
  13. pyvex/include/libvex_emnote.h +142 -0
  14. pyvex/include/libvex_guest_amd64.h +252 -0
  15. pyvex/include/libvex_guest_arm.h +224 -0
  16. pyvex/include/libvex_guest_arm64.h +203 -0
  17. pyvex/include/libvex_guest_mips32.h +175 -0
  18. pyvex/include/libvex_guest_mips64.h +173 -0
  19. pyvex/include/libvex_guest_offsets.h +941 -0
  20. pyvex/include/libvex_guest_ppc32.h +298 -0
  21. pyvex/include/libvex_guest_ppc64.h +343 -0
  22. pyvex/include/libvex_guest_riscv64.h +148 -0
  23. pyvex/include/libvex_guest_s390x.h +201 -0
  24. pyvex/include/libvex_guest_tilegx.h +149 -0
  25. pyvex/include/libvex_guest_x86.h +322 -0
  26. pyvex/include/libvex_ir.h +3113 -0
  27. pyvex/include/libvex_s390x_common.h +123 -0
  28. pyvex/include/libvex_trc_values.h +99 -0
  29. pyvex/include/pyvex.h +96 -0
  30. pyvex/lib/pyvex.dll +0 -0
  31. pyvex/lib/pyvex.lib +0 -0
  32. pyvex/lifting/__init__.py +18 -0
  33. pyvex/lifting/gym/README.md +7 -0
  34. pyvex/lifting/gym/__init__.py +5 -0
  35. pyvex/lifting/gym/aarch64_spotter.py +40 -0
  36. pyvex/lifting/gym/arm_spotter.py +427 -0
  37. pyvex/lifting/gym/x86_spotter.py +129 -0
  38. pyvex/lifting/libvex.py +117 -0
  39. pyvex/lifting/lift_function.py +304 -0
  40. pyvex/lifting/lifter.py +124 -0
  41. pyvex/lifting/post_processor.py +16 -0
  42. pyvex/lifting/util/__init__.py +14 -0
  43. pyvex/lifting/util/instr_helper.py +422 -0
  44. pyvex/lifting/util/lifter_helper.py +154 -0
  45. pyvex/lifting/util/syntax_wrapper.py +312 -0
  46. pyvex/lifting/util/vex_helper.py +301 -0
  47. pyvex/lifting/zerodivision.py +71 -0
  48. pyvex/native.py +63 -0
  49. pyvex/py.typed +1 -0
  50. pyvex/stmt.py +740 -0
  51. pyvex/types.py +48 -0
  52. pyvex/utils.py +63 -0
  53. pyvex/vex_ffi.py +1452 -0
  54. pyvex-9.2.189.dist-info/METADATA +181 -0
  55. pyvex-9.2.189.dist-info/RECORD +60 -0
  56. pyvex-9.2.189.dist-info/WHEEL +5 -0
  57. pyvex-9.2.189.dist-info/licenses/LICENSE +24 -0
  58. pyvex-9.2.189.dist-info/licenses/pyvex_c/LICENSE +339 -0
  59. pyvex-9.2.189.dist-info/licenses/vex/LICENSE.GPL +340 -0
  60. pyvex-9.2.189.dist-info/licenses/vex/LICENSE.README +23 -0
@@ -0,0 +1,422 @@
1
+ import abc
2
+ import string
3
+
4
+ import bitstring
5
+
6
+ from pyvex.expr import IRExpr, RdTmp
7
+
8
+ from .lifter_helper import ParseError
9
+ from .syntax_wrapper import VexValue
10
+ from .vex_helper import IRSBCustomizer, JumpKind, vex_int_class
11
+
12
+
13
+ class Instruction(metaclass=abc.ABCMeta):
14
+ """
15
+ Base class for an Instruction.
16
+
17
+ You should make a subclass of this for each instruction you want to lift. These classes will contain the "semantics"
18
+ of the instruction, that is, what it _does_, in terms of the VEX IR.
19
+
20
+ You may want to subclass this for your architecture, and add arch-specific handling for parsing, argument
21
+ resolution, etc., and have instructions subclass that instead.
22
+
23
+ The core parsing functionality is done via ``bin_format``. Each instruction should be a subclass of ``Instruction``
24
+ and will be parsed by comparing bits in the provided bitstream to symbols in the ``bin_format`` member of the class.
25
+ "Bin formats" are strings of symbols, like those you'd find in an ISA document, such as "0010rrrrddddffmm"
26
+ 0 or 1 specify hard-coded bits that must match for an instruction to match.
27
+ Any letters specify arguments, grouped by letter, which will be parsed and provided as bitstrings in the ``data``
28
+ member of the class as a dictionary.
29
+ So, in our example, the bits ``0010110101101001``, applied to format string ``0010rrrrddddffmm``
30
+ will result in the following in ``self.data``:
31
+
32
+ {'r': '1101',
33
+ 'd': '0110',
34
+ 'f': '10',
35
+ 'm': '01'}
36
+
37
+ Implement compute_result to provide the "meat" of what your instruction does.
38
+ You can also implement it in your arch-specific subclass of ``Instruction``, to handle things common to all
39
+ instructions, and provide instruction implementations elsewhere.
40
+
41
+ We provide the ``VexValue`` syntax wrapper to make expressing instruction semantics easy.
42
+ You first convert the bitstring arguments into ``VexValue``s using the provided convenience methods
43
+ (``self.get/put/load/store/etc.``)
44
+ This loads the register from the actual registers into a temporary value we can work with.
45
+ You can then write it back to a register when you're done.
46
+ For example, if you have the register in ``r``, as above, you can make a ``VexValue`` like this:
47
+
48
+ r = int(self.data['r'], 2) # we get bits corresponding to `r` bits and convert it to an int
49
+ r_vv = self.get(r, Type.int_32)
50
+
51
+ If you then had an instruction to increment ``r``, you could simply:
52
+
53
+ return r_vv += 1
54
+
55
+ You could then write it back to the register like this:
56
+
57
+ self.put(r_vv, r)
58
+
59
+ Note that most architectures have special flags that get set differently for each instruction, make sure to
60
+ implement those as well (override ``set_flags()`` )
61
+
62
+ Override ``parse()`` to extend parsing.
63
+ For example, in MSP430, this allows us to grab extra words from the bitstream
64
+ when extra immediate words are present.
65
+
66
+ All architectures are different enough that there's no magic recipe for how to write a lifter.
67
+ See the examples provided by gymrat for ideas of how to use this to build your own lifters quickly and easily.
68
+ """
69
+
70
+ data: dict[str, str]
71
+ irsb_c: IRSBCustomizer
72
+
73
+ def __init__(self, bitstrm, arch, addr):
74
+ """
75
+ Create an instance of the instruction
76
+
77
+ :param irsb_c: The IRSBCustomizer to put VEX instructions into
78
+ :param bitstrm: The bitstream to decode instructions from
79
+ :param addr: The address of the instruction to be lifted, used only for jumps and branches
80
+ """
81
+ self.addr = addr
82
+ self.arch = arch
83
+ self.bitwidth = len(self.bin_format)
84
+ self.data = self.parse(bitstrm)
85
+
86
+ @property
87
+ @abc.abstractmethod
88
+ def bin_format(self) -> str:
89
+ """
90
+ Read the documentation of the class to understand what a bin format string is
91
+
92
+ :return: str bin format string
93
+ """
94
+
95
+ @property
96
+ @abc.abstractmethod
97
+ def name(self) -> str:
98
+ """
99
+ Name of the instruction
100
+
101
+ Can be useful to name the instruction when there's an error related to it
102
+ """
103
+
104
+ def __call__(self, irsb_c, past_instructions, future_instructions):
105
+ self.lift(irsb_c, past_instructions, future_instructions)
106
+
107
+ def mark_instruction_start(self):
108
+ self.irsb_c.imark(self.addr, self.bytewidth, 0)
109
+
110
+ def fetch_operands(self): # pylint: disable=no-self-use
111
+ """
112
+ Get the operands out of memory or registers
113
+ Return a tuple of operands for the instruction
114
+ """
115
+ return ()
116
+
117
+ def lift(self, irsb_c: IRSBCustomizer, past_instructions, future_instructions): # pylint: disable=unused-argument
118
+ """
119
+ This is the main body of the "lifting" for the instruction.
120
+ This can/should be overridden to provide the general flow of how instructions in your arch work.
121
+ For example, in MSP430, this is:
122
+
123
+ - Figure out what your operands are by parsing the addressing, and load them into temporary registers
124
+ - Do the actual operation, and commit the result, if needed.
125
+ - Compute the flags
126
+ """
127
+ self.irsb_c = irsb_c
128
+ # Always call this first!
129
+ self.mark_instruction_start()
130
+ # Then do the actual stuff.
131
+ inputs = self.fetch_operands()
132
+ retval = self.compute_result(*inputs) # pylint: disable=assignment-from-none
133
+ if retval is not None:
134
+ self.commit_result(retval)
135
+ vals = list(inputs) + [retval]
136
+ self.compute_flags(*vals)
137
+
138
+ def commit_result(self, res):
139
+ """
140
+ This where the result of the operation is written to a destination.
141
+ This happens only if compute_result does not return None, and happens before compute_flags is called.
142
+ Override this to specify how to write out the result.
143
+ The results of fetch_operands can be used to resolve various addressing modes for the write outward.
144
+ A common pattern is to return a function from fetch_operands which will be called here to perform the write.
145
+
146
+ :param args: A tuple of the results of fetch_operands and compute_result
147
+ """
148
+
149
+ def compute_result(self, *args): # pylint: disable=unused-argument,no-self-use
150
+ """
151
+ This is where the actual operation performed by your instruction, excluding the calculation of flags, should be
152
+ performed. Return the VexValue of the "result" of the instruction, which may
153
+ be used to calculate the flags later.
154
+ For example, for a simple add, with arguments src and dst, you can simply write:
155
+
156
+ return src + dst:
157
+
158
+ :param args:
159
+ :return: A VexValue containing the "result" of the operation.
160
+ """
161
+ return None
162
+
163
+ def compute_flags(self, *args):
164
+ """
165
+ Most CPU architectures have "flags" that should be computed for many instructions.
166
+ Override this to specify how that happens. One common pattern is to define this method to call specifi methods
167
+ to update each flag, which can then be overriden in the actual classes for each instruction.
168
+ """
169
+
170
+ def match_instruction(self, data, bitstrm): # pylint: disable=unused-argument,no-self-use
171
+ """
172
+ Override this to extend the parsing functionality.
173
+ This is great for if your arch has instruction "formats" that have an opcode that has to match.
174
+
175
+ :param data:
176
+ :param bitstrm:
177
+ :return: data
178
+ """
179
+ return data
180
+
181
+ def parse(self, bitstrm):
182
+ if self.arch.instruction_endness == "Iend_LE":
183
+ # This arch stores its instructions in memory endian-flipped compared to the ISA.
184
+ # To enable natural lifter-writing, we let the user write them like in the manual, and correct for
185
+ # endness here.
186
+ instr_bits = self._load_le_instr(bitstrm, self.bitwidth)
187
+ else:
188
+ instr_bits = bitstrm.peek("bin:%d" % self.bitwidth)
189
+
190
+ data = {c: "" for c in self.bin_format if c in string.ascii_letters}
191
+ for c, b in zip(self.bin_format, instr_bits):
192
+ if c in "01":
193
+ if b != c:
194
+ raise ParseError("Mismatch between format bit %c and instruction bit %c" % (c, b))
195
+ elif c in string.ascii_letters:
196
+ data[c] += b
197
+ else:
198
+ raise ValueError("Invalid bin_format character %c" % c)
199
+
200
+ # Hook here for extra matching functionality
201
+ if hasattr(self, "match_instruction"):
202
+ # Should raise if it's not right
203
+ self.match_instruction(data, bitstrm)
204
+
205
+ # Use up the bits once we're sure it's right
206
+ self.rawbits = bitstrm.read("hex:%d" % self.bitwidth)
207
+
208
+ # Hook here for extra parsing functionality (e.g., trailers)
209
+ if hasattr(self, "_extra_parsing"):
210
+ data = self._extra_parsing(data, bitstrm) # pylint: disable=no-member
211
+
212
+ return data
213
+
214
+ @property
215
+ def bytewidth(self):
216
+ if self.bitwidth % self.arch.byte_width != 0:
217
+ raise ValueError("Instruction is not a multiple of bytes wide!")
218
+ return self.bitwidth // self.arch.byte_width
219
+
220
+ def disassemble(self):
221
+ """
222
+ Return the disassembly of this instruction, as a string.
223
+ Override this in subclasses.
224
+
225
+ :return: The address (self.addr), the instruction's name, and a list of its operands, as strings
226
+ """
227
+ return self.addr, "UNK", [self.rawbits]
228
+
229
+ # These methods should be called in subclasses to do register and memory operations
230
+
231
+ def load(self, addr, ty):
232
+ """
233
+ Load a value from memory into a VEX temporary register.
234
+
235
+ :param addr: The VexValue containing the addr to load from.
236
+ :param ty: The Type of the resulting data
237
+ :return: a VexValue
238
+ """
239
+ rdt = self.irsb_c.load(addr.rdt, ty)
240
+ return VexValue(self.irsb_c, rdt)
241
+
242
+ def constant(self, val, ty):
243
+ """
244
+ Creates a constant as a VexValue
245
+
246
+ :param val: The value, as an integer
247
+ :param ty: The type of the resulting VexValue
248
+ :return: a VexValue
249
+ """
250
+ if isinstance(val, VexValue) and not isinstance(val, IRExpr):
251
+ raise Exception("Constant cannot be made from VexValue or IRExpr")
252
+ rdt = self.irsb_c.mkconst(val, ty)
253
+ return VexValue(self.irsb_c, rdt)
254
+
255
+ @staticmethod
256
+ def _lookup_register(arch, reg):
257
+ # TODO: This is a hack to make it work with archinfo where we use
258
+ # register indicies instead of names
259
+ if isinstance(reg, int):
260
+ if hasattr(arch, "register_index"):
261
+ reg = arch.register_index[reg]
262
+ else:
263
+ reg = arch.register_list[reg].name
264
+ return arch.get_register_offset(reg)
265
+
266
+ def get(self, reg, ty):
267
+ """
268
+ Load a value from a machine register into a VEX temporary register.
269
+ All values must be loaded out of registers before they can be used with operations, etc
270
+ and stored back into them when the instruction is over. See Put().
271
+
272
+ :param reg: Register number as an integer, or register string name
273
+ :param ty: The Type to use.
274
+ :return: A VexValue of the gotten value.
275
+ """
276
+ offset = self._lookup_register(self.irsb_c.irsb.arch, reg)
277
+ if offset == self.irsb_c.irsb.arch.ip_offset:
278
+ return self.constant(self.addr, ty)
279
+ rdt = self.irsb_c.rdreg(offset, ty)
280
+ return VexValue(self.irsb_c, rdt)
281
+
282
+ def put(self, val, reg):
283
+ """
284
+ Puts a value from a VEX temporary register into a machine register.
285
+ This is how the results of operations done to registers get committed to the machine's state.
286
+
287
+ :param val: The VexValue to store (Want to store a constant? See Constant() first)
288
+ :param reg: The integer register number to store into, or register name
289
+ :return: None
290
+ """
291
+ offset = self._lookup_register(self.irsb_c.irsb.arch, reg)
292
+ self.irsb_c.put(val.rdt, offset)
293
+
294
+ def put_conditional(self, cond, valiftrue, valiffalse, reg):
295
+ """
296
+ Like put, except it checks a condition
297
+ to decide what to put in the destination register.
298
+
299
+ :param cond: The VexValue representing the logical expression for the condition
300
+ (if your expression only has constants, don't use this method!)
301
+ :param valiftrue: the VexValue to put in reg if cond evals as true
302
+ :param validfalse: the VexValue to put in reg if cond evals as false
303
+ :param reg: The integer register number to store into, or register name
304
+ :return: None
305
+ """
306
+
307
+ val = self.irsb_c.ite(cond.rdt, valiftrue.rdt, valiffalse.rdt)
308
+ offset = self._lookup_register(self.irsb_c.irsb.arch, reg)
309
+ self.irsb_c.put(val, offset)
310
+
311
+ def store(self, val, addr):
312
+ """
313
+ Store a VexValue in memory at the specified loaction.
314
+
315
+ :param val: The VexValue of the value to store
316
+ :param addr: The VexValue of the address to store into
317
+ :return: None
318
+ """
319
+ self.irsb_c.store(addr.rdt, val.rdt)
320
+
321
+ def jump(self, condition, to_addr, jumpkind=JumpKind.Boring, ip_offset=None):
322
+ """
323
+ Jump to a specified destination, under the specified condition.
324
+ Used for branches, jumps, calls, returns, etc.
325
+
326
+ :param condition: The VexValue representing the expression for the guard, or None for an unconditional jump
327
+ :param to_addr: The address to jump to.
328
+ :param jumpkind: The JumpKind to use. See the VEX docs for what these are; you only need them for things
329
+ aren't normal jumps (e.g., calls, interrupts, program exits, etc etc)
330
+ :return: None
331
+ """
332
+ to_addr_ty = None
333
+ if isinstance(to_addr, VexValue):
334
+ # Unpack a VV
335
+ to_addr_rdt = to_addr.rdt
336
+ to_addr_ty = to_addr.ty
337
+ elif isinstance(to_addr, int):
338
+ # Direct jump to an int, make an RdT and Ty
339
+ to_addr_ty = vex_int_class(self.irsb_c.irsb.arch.bits).type
340
+ to_addr = self.constant(to_addr, to_addr_ty) # TODO archinfo may be changing
341
+ to_addr_rdt = to_addr.rdt
342
+ elif isinstance(to_addr, RdTmp):
343
+ # An RdT; just get the Ty of the arch's pointer type
344
+ to_addr_ty = vex_int_class(self.irsb_c.irsb.arch.bits).type
345
+ to_addr_rdt = to_addr
346
+ else:
347
+ raise TypeError("Jump destination has unknown type: " + repr(type(to_addr)))
348
+ if not condition:
349
+ # This is the default exit.
350
+ self.irsb_c.irsb.jumpkind = jumpkind
351
+ self.irsb_c.irsb.next = to_addr_rdt
352
+ else:
353
+ # add another exit
354
+ # EDG says: We should make sure folks set ArchXYZ.ip_offset like they're supposed to
355
+ if ip_offset is None:
356
+ ip_offset = self.arch.ip_offset
357
+ assert ip_offset is not None
358
+
359
+ negated_condition_rdt = self.ite(condition, self.constant(0, condition.ty), self.constant(1, condition.ty))
360
+ direct_exit_target = self.constant(self.addr + (self.bitwidth // 8), to_addr_ty)
361
+ self.irsb_c.add_exit(negated_condition_rdt, direct_exit_target.rdt, jumpkind, ip_offset)
362
+ self.irsb_c.irsb.jumpkind = jumpkind
363
+ self.irsb_c.irsb.next = to_addr_rdt
364
+
365
+ def ite(self, cond, t, f):
366
+ return self.irsb_c.ite(cond.rdt, t.rdt, f.rdt)
367
+
368
+ def ccall(self, ret_type, func_name, args):
369
+ """
370
+ Creates a CCall operation.
371
+ A CCall is a procedure that calculates a value at *runtime*, not at lift-time.
372
+ You can use these for flags, unresolvable jump targets, etc.
373
+ We caution you to avoid using them when at all possible though.
374
+
375
+ :param ret_type: The return type of the CCall
376
+ :param func_obj: The name of the helper function to call. If you're using angr, this should be added (or
377
+ monkeypatched) into ``angr.engines.vex.claripy.ccall``.
378
+ :param args: List of arguments to the function
379
+ :return: A VexValue of the result.
380
+ """
381
+
382
+ # Check the args to make sure they're the right type
383
+ list_args = list(args)
384
+ new_args = []
385
+ for arg in list_args:
386
+ if isinstance(arg, VexValue):
387
+ arg = arg.rdt
388
+ new_args.append(arg)
389
+ args = tuple(new_args)
390
+
391
+ cc = self.irsb_c.op_ccall(ret_type, func_name, args)
392
+ return VexValue(self.irsb_c, cc)
393
+
394
+ def dirty(self, ret_type, func_name, args) -> VexValue:
395
+ """
396
+ Creates a dirty call operation.
397
+
398
+ These are like ccalls (clean calls) but their implementations are theoretically allowed to read or write to or
399
+ from any part of the state, making them a nightmare for static analysis to reason about. Avoid their use at all
400
+ costs.
401
+
402
+ :param ret_type: The return type of the dirty call, or None if the dirty call doesn't return anything.
403
+ :param func_name: The name of the helper function to call. If you're using angr, this should be added (or
404
+ monkeypatched) into ``angr.engines.vex.heavy.dirty``.
405
+ :param args: List of arguments to the function
406
+ :return: A VexValue of the result.
407
+ """
408
+
409
+ # Check the args to make sure they're the right type
410
+ list_args = list(args)
411
+ new_args = []
412
+ for arg in list_args:
413
+ if isinstance(arg, VexValue):
414
+ arg = arg.rdt
415
+ new_args.append(arg)
416
+ args = tuple(new_args)
417
+
418
+ rdt = self.irsb_c.dirty(ret_type, func_name, args)
419
+ return VexValue(self.irsb_c, rdt)
420
+
421
+ def _load_le_instr(self, bitstream: bitstring.ConstBitStream, numbits: int) -> str:
422
+ return bitstring.Bits(uint=bitstream.peek("uintle:%d" % numbits), length=numbits).bin
@@ -0,0 +1,154 @@
1
+ import logging
2
+ from typing import TYPE_CHECKING
3
+
4
+ import bitstring
5
+
6
+ from pyvex.const import vex_int_class
7
+ from pyvex.errors import LiftingException
8
+ from pyvex.lifting.lifter import Lifter
9
+
10
+ from .vex_helper import IRSBCustomizer, JumpKind
11
+
12
+ if TYPE_CHECKING:
13
+ from .instr_helper import Instruction
14
+
15
+ log = logging.getLogger(__name__)
16
+
17
+
18
+ def is_empty(bitstrm):
19
+ try:
20
+ bitstrm.peek(1)
21
+ return False
22
+ except bitstring.ReadError:
23
+ return True
24
+
25
+
26
+ class ParseError(Exception):
27
+ pass
28
+
29
+
30
+ class GymratLifter(Lifter):
31
+ """
32
+ This is a base class for lifters that use Gymrat.
33
+ For most architectures, all you need to do is subclass this, and set the property "instructions"
34
+ to be a list of classes that define each instruction.
35
+ By default, a lifter will decode instructions by attempting to instantiate every class until one works.
36
+ This will use an IRSBCustomizer, which will, if it succeeds, add the appropriate VEX instructions to a pyvex IRSB.
37
+ pyvex, when lifting a block of code for this architecture, will call the method "lift", which will produce the IRSB
38
+ of the lifted code.
39
+ """
40
+
41
+ __slots__ = (
42
+ "bitstrm",
43
+ "errors",
44
+ "thedata",
45
+ "disassembly",
46
+ )
47
+
48
+ REQUIRE_DATA_PY = True
49
+ instrs: list[type["Instruction"]]
50
+
51
+ def __init__(self, arch, addr):
52
+ super().__init__(arch, addr)
53
+ self.bitstrm = None
54
+ self.errors = None
55
+ self.thedata = None
56
+ self.disassembly = None
57
+
58
+ def create_bitstrm(self):
59
+ self.bitstrm = bitstring.ConstBitStream(bytes=self.thedata)
60
+
61
+ def _decode_next_instruction(self, addr):
62
+ # Try every instruction until one works
63
+ for possible_instr in self.instrs:
64
+ try:
65
+ log.debug("Trying %s", possible_instr.name)
66
+ return possible_instr(self.bitstrm, self.irsb.arch, addr)
67
+ # a ParserError signals that this instruction did not match
68
+ # we need to try other instructions, so we ignore this error
69
+ except ParseError:
70
+ pass # l.exception(repr(possible_instr))
71
+ # if we are out of input, ignore.
72
+ # there may be other, shorter instructions that still match,
73
+ # so we continue with the loop
74
+ except (bitstring.ReadError, bitstring.InterpretError):
75
+ pass
76
+
77
+ # If no instruction matches, log an error
78
+ errorstr = "Unknown instruction at bit position %d" % self.bitstrm.bitpos
79
+ log.debug(errorstr)
80
+ log.debug("Address: %#08x" % addr)
81
+
82
+ def decode(self):
83
+ try:
84
+ self.create_bitstrm()
85
+ count = 0
86
+ disas = []
87
+ addr = self.irsb.addr
88
+ log.debug("Starting block at address: " + hex(addr))
89
+ bytepos = self.bitstrm.bytepos
90
+
91
+ while not is_empty(self.bitstrm):
92
+ instr = self._decode_next_instruction(addr)
93
+ if not instr:
94
+ break
95
+ disas.append(instr)
96
+ log.debug("Matched " + instr.name)
97
+ addr += self.bitstrm.bytepos - bytepos
98
+ bytepos = self.bitstrm.bytepos
99
+ count += 1
100
+ return disas
101
+ except Exception as e:
102
+ self.errors = str(e)
103
+ log.exception(f"Error decoding block at offset {bytepos:#x} (address {addr:#x}):")
104
+ raise
105
+
106
+ def _lift(self):
107
+ self.thedata = (
108
+ self.data[: self.max_bytes]
109
+ if isinstance(self.data, (bytes, bytearray, memoryview))
110
+ else self.data[: self.max_bytes].encode()
111
+ )
112
+ log.debug(repr(self.thedata))
113
+ instructions = self.decode()
114
+
115
+ if self.disasm:
116
+ self.disassembly = [instr.disassemble() for instr in instructions]
117
+ self.irsb.jumpkind = JumpKind.Invalid
118
+ irsb_c = IRSBCustomizer(self.irsb)
119
+ log.debug("Decoding complete.")
120
+ for i, instr in enumerate(instructions[: self.max_inst]):
121
+ log.debug("Lifting instruction %s", instr.name)
122
+ instr(irsb_c, instructions[:i], instructions[i + 1 :])
123
+ if irsb_c.irsb.jumpkind != JumpKind.Invalid:
124
+ break
125
+ if (i + 1) == self.max_inst: # if we are on our last iteration
126
+ instr.jump(None, irsb_c.irsb.addr + irsb_c.irsb.size)
127
+ break
128
+ else:
129
+ if len(irsb_c.irsb.statements) == 0:
130
+ raise LiftingException("Could not decode any instructions")
131
+ irsb_c.irsb.jumpkind = JumpKind.NoDecode
132
+ dst = irsb_c.irsb.addr + irsb_c.irsb.size
133
+ dst_ty = vex_int_class(irsb_c.irsb.arch.bits).type
134
+ irsb_c.irsb.next = irsb_c.mkconst(dst, dst_ty)
135
+ log.debug(str(self.irsb))
136
+ if self.dump_irsb:
137
+ self.irsb.pp()
138
+ return self.irsb
139
+
140
+ def pp_disas(self):
141
+ disasstr = ""
142
+ insts = self.disassemble()
143
+ for addr, name, args in insts:
144
+ args_str = ",".join(str(a) for a in args)
145
+ disasstr += f"{addr:#08x}:\t{name} {args_str}\n"
146
+ print(disasstr)
147
+
148
+ def error(self):
149
+ return self.errors
150
+
151
+ def disassemble(self):
152
+ if self.disassembly is None:
153
+ self.lift(self.data, disasm=True)
154
+ return self.disassembly