angr 9.2.139__py3-none-manylinux2014_x86_64.whl → 9.2.140__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (68) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +48 -21
  3. angr/analyses/cfg/cfg_base.py +13 -0
  4. angr/analyses/cfg/cfg_fast.py +11 -0
  5. angr/analyses/decompiler/ail_simplifier.py +67 -52
  6. angr/analyses/decompiler/clinic.py +68 -43
  7. angr/analyses/decompiler/decompiler.py +17 -7
  8. angr/analyses/decompiler/expression_narrower.py +1 -1
  9. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
  10. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
  11. angr/analyses/decompiler/optimization_passes/optimization_pass.py +16 -10
  12. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +2 -2
  13. angr/analyses/decompiler/region_simplifiers/expr_folding.py +259 -108
  14. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +27 -12
  15. angr/analyses/decompiler/structuring/dream.py +21 -17
  16. angr/analyses/decompiler/structuring/phoenix.py +152 -40
  17. angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
  18. angr/analyses/decompiler/structuring/structurer_base.py +36 -10
  19. angr/analyses/decompiler/structuring/structurer_nodes.py +4 -1
  20. angr/analyses/decompiler/utils.py +60 -1
  21. angr/analyses/deobfuscator/api_obf_finder.py +8 -5
  22. angr/analyses/deobfuscator/api_obf_type2_finder.py +18 -10
  23. angr/analyses/deobfuscator/string_obf_finder.py +105 -18
  24. angr/analyses/forward_analysis/forward_analysis.py +1 -1
  25. angr/analyses/propagator/top_checker_mixin.py +6 -6
  26. angr/analyses/reaching_definitions/__init__.py +2 -1
  27. angr/analyses/reaching_definitions/dep_graph.py +1 -12
  28. angr/analyses/reaching_definitions/engine_vex.py +36 -31
  29. angr/analyses/reaching_definitions/function_handler.py +15 -2
  30. angr/analyses/reaching_definitions/rd_state.py +1 -37
  31. angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
  32. angr/analyses/s_propagator.py +6 -41
  33. angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
  34. angr/analyses/stack_pointer_tracker.py +36 -22
  35. angr/analyses/typehoon/simple_solver.py +45 -7
  36. angr/analyses/typehoon/typeconsts.py +18 -5
  37. angr/analyses/variable_recovery/engine_base.py +7 -5
  38. angr/block.py +69 -107
  39. angr/callable.py +14 -7
  40. angr/calling_conventions.py +15 -1
  41. angr/distributed/__init__.py +1 -1
  42. angr/engines/__init__.py +7 -8
  43. angr/engines/engine.py +1 -120
  44. angr/engines/failure.py +2 -2
  45. angr/engines/hook.py +2 -2
  46. angr/engines/light/engine.py +2 -2
  47. angr/engines/pcode/engine.py +2 -14
  48. angr/engines/procedure.py +2 -2
  49. angr/engines/soot/engine.py +2 -2
  50. angr/engines/soot/statements/switch.py +1 -1
  51. angr/engines/successors.py +124 -11
  52. angr/engines/syscall.py +2 -2
  53. angr/engines/unicorn.py +3 -3
  54. angr/engines/vex/heavy/heavy.py +3 -15
  55. angr/factory.py +4 -19
  56. angr/knowledge_plugins/key_definitions/atoms.py +8 -4
  57. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
  58. angr/sim_type.py +19 -17
  59. angr/state_plugins/plugin.py +19 -4
  60. angr/storage/memory_mixins/memory_mixin.py +1 -1
  61. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
  62. angr/utils/ssa/__init__.py +119 -4
  63. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/METADATA +6 -6
  64. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/RECORD +68 -68
  65. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/LICENSE +0 -0
  66. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/WHEEL +0 -0
  67. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/entry_points.txt +0 -0
  68. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,7 @@ try:
22
22
  from angr.engines import pcode
23
23
  except ImportError:
24
24
  pypcode = None
25
+ pcode = None
25
26
 
26
27
  if TYPE_CHECKING:
27
28
  from angr.block import Block
@@ -93,6 +94,11 @@ class Register:
93
94
  return self.offset == other.offset
94
95
  return False
95
96
 
97
+ def __add__(self, other) -> OffsetVal:
98
+ if type(other) is Constant:
99
+ return OffsetVal(self, other.val)
100
+ raise CouldNotResolveException
101
+
96
102
  def __repr__(self):
97
103
  return str(self.offset)
98
104
 
@@ -232,6 +238,7 @@ class StackPointerTrackerState:
232
238
  def give_up_on_memory_tracking(self):
233
239
  self.memory = {}
234
240
  self.is_tracking_memory = False
241
+ return self
235
242
 
236
243
  def store(self, addr, val):
237
244
  # strong update
@@ -370,7 +377,8 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
370
377
  self._mem_merge_cache = {}
371
378
 
372
379
  if initial_reg_values:
373
- self._reg_value_at_block_start[func.addr if func is not None else block.addr] = initial_reg_values
380
+ block_start_addr = func.addr if func is not None else block.addr # type: ignore
381
+ self._reg_value_at_block_start[block_start_addr] = initial_reg_values
374
382
 
375
383
  _l.debug("Running on function %r", self._func)
376
384
  self._analyze()
@@ -461,9 +469,13 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
461
469
  return any(self.inconsistent_for(r) for r in self.reg_offsets)
462
470
 
463
471
  def inconsistent_for(self, reg):
472
+ if self._func is None:
473
+ raise ValueError("inconsistent_for() is only supported in function mode")
464
474
  return any(self.offset_after_block(endpoint.addr, reg) is TOP for endpoint in self._func.endpoints)
465
475
 
466
476
  def offsets_for(self, reg):
477
+ if self._func is None:
478
+ raise ValueError("offsets_for() is only supported in function mode")
467
479
  return [
468
480
  o for block in self._func.blocks if (o := self.offset_after_block(block.addr, reg)) not in (TOP, BOTTOM)
469
481
  ]
@@ -481,7 +493,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
481
493
  def _post_analysis(self):
482
494
  pass
483
495
 
484
- def _get_register(self, offset):
496
+ def _get_register(self, offset) -> Register:
485
497
  name = self.project.arch.register_names[offset]
486
498
  size = self.project.arch.registers[name][1]
487
499
  return Register(offset, size * self.project.arch.byte_width)
@@ -557,7 +569,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
557
569
  output_state = state.freeze()
558
570
  return None, output_state
559
571
 
560
- def _process_vex_irsb(self, node, vex_block: pyvex.IRSB, state: StackPointerTrackerState) -> int:
572
+ def _process_vex_irsb(self, node, vex_block: pyvex.IRSB, state: StackPointerTrackerState) -> int | None:
561
573
  tmps = {}
562
574
  curr_stmt_start_addr = None
563
575
 
@@ -704,21 +716,16 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
704
716
  if callees:
705
717
  if len(callees) == 1:
706
718
  callee = callees[0]
719
+ track_rax = False
720
+ if (
721
+ (callee.info.get("is_rust_probestack", False) and self.project.arch.name == "AMD64")
722
+ or (callee.info.get("is_alloca_probe", False) and self.project.arch.name == "AMD64")
723
+ or callee.name == "__chkstk"
724
+ ):
725
+ # sp = sp - rax right after returning from the call
726
+ track_rax = True
707
727
 
708
- if callee.info.get("is_rust_probestack", False) is True and self.project.arch.name == "AMD64":
709
- # special-case for rust_probestack: sp = sp - rax right after returning from the call, so we
710
- # need to keep track of rax
711
- for stmt in reversed(vex_block.statements):
712
- if (
713
- isinstance(stmt, pyvex.IRStmt.Put)
714
- and stmt.offset == self.project.arch.registers["rax"][0]
715
- and isinstance(stmt.data, pyvex.IRExpr.Const)
716
- ):
717
- state.put(stmt.offset, Constant(stmt.data.con.value), force=True)
718
- break
719
- elif callee.name == "__chkstk":
720
- # special-case for __chkstk: sp = sp - rax right after returning from the call, so we need to
721
- # keep track of rax
728
+ if track_rax:
722
729
  for stmt in reversed(vex_block.statements):
723
730
  if (
724
731
  isinstance(stmt, pyvex.IRStmt.Put)
@@ -737,18 +744,20 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
737
744
  # found callee clean-up cases...
738
745
  try:
739
746
  v = state.get(self.project.arch.sp_offset)
747
+ incremented = None
740
748
  if v is BOTTOM:
741
749
  incremented = BOTTOM
742
750
  elif callee_cleanups[0].prototype is not None:
743
751
  num_args = len(callee_cleanups[0].prototype.args)
744
752
  incremented = v + Constant(self.project.arch.bytes * num_args)
745
- state.put(self.project.arch.sp_offset, incremented)
753
+ if incremented is not None:
754
+ state.put(self.project.arch.sp_offset, incremented)
746
755
  except CouldNotResolveException:
747
756
  pass
748
757
 
749
758
  return curr_stmt_start_addr
750
759
 
751
- def _process_pcode_irsb(self, node, pcode_irsb: pcode.lifter.IRSB, state: StackPointerTrackerState) -> int:
760
+ def _process_pcode_irsb(self, node, pcode_irsb: pcode.lifter.IRSB, state: StackPointerTrackerState) -> int | None:
752
761
  unique = {}
753
762
  curr_stmt_start_addr = None
754
763
 
@@ -830,18 +839,20 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
830
839
  # found callee clean-up cases...
831
840
  try:
832
841
  v = state.get(self.project.arch.sp_offset)
842
+ incremented = None
833
843
  if v is BOTTOM:
834
844
  incremented = BOTTOM
835
845
  elif callee_cleanups[0].prototype is not None:
836
846
  num_args = len(callee_cleanups[0].prototype.args)
837
847
  incremented = v + Constant(self.project.arch.bytes * num_args)
838
- state.put(self.project.arch.sp_offset, incremented)
848
+ if incremented is not None:
849
+ state.put(self.project.arch.sp_offset, incremented)
839
850
  except CouldNotResolveException:
840
851
  pass
841
852
 
842
853
  return curr_stmt_start_addr
843
854
 
844
- def _widen_states(self, *states):
855
+ def _widen_states(self, *states: FrozenStackPointerTrackerState):
845
856
  assert len(states) == 2
846
857
  merged, _ = self._merge_states(None, *states)
847
858
  if len(merged.memory) > 5:
@@ -849,13 +860,16 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
849
860
  merged = merged.unfreeze().give_up_on_memory_tracking().freeze()
850
861
  return merged
851
862
 
852
- def _merge_states(self, node, *states: StackPointerTrackerState):
863
+ def _merge_states(self, node, *states: FrozenStackPointerTrackerState):
853
864
  merged_state = states[0]
854
865
  for other in states[1:]:
855
866
  merged_state = merged_state.merge(other, node.addr, self._reg_merge_cache, self._mem_merge_cache)
856
867
  return merged_state, merged_state == states[0]
857
868
 
858
869
  def _find_callees(self, node) -> list[Function]:
870
+ if self._func is None:
871
+ raise ValueError("find_callees() is only supported in function mode")
872
+
859
873
  callees: list[Function] = []
860
874
  for _, dst, data in self._func.transition_graph.out_edges(node, data=True):
861
875
  if data.get("type") == "call" and isinstance(dst, Function):
@@ -5,6 +5,7 @@ from collections import defaultdict
5
5
  import logging
6
6
 
7
7
  import networkx
8
+ from sortedcontainers import SortedDict
8
9
 
9
10
  from angr.utils.constants import MAX_POINTSTO_BITS
10
11
  from .typevars import (
@@ -1165,25 +1166,45 @@ class SimpleSolver:
1165
1166
  # this might be a struct
1166
1167
  fields = {}
1167
1168
 
1168
- candidate_bases = defaultdict(set)
1169
+ candidate_bases = SortedDict()
1169
1170
 
1170
1171
  for labels, _succ in path_and_successors:
1171
1172
  last_label = labels[-1] if labels else None
1172
1173
  if isinstance(last_label, HasField):
1173
1174
  # TODO: Really determine the maximum possible size of the field when MAX_POINTSTO_BITS is in use
1175
+ if last_label.offset not in candidate_bases:
1176
+ candidate_bases[last_label.offset] = set()
1174
1177
  candidate_bases[last_label.offset].add(
1175
1178
  1 if last_label.bits == MAX_POINTSTO_BITS else (last_label.bits // 8)
1176
1179
  )
1177
1180
 
1181
+ # determine possible bases and map each offset to its base
1182
+ offset_to_base = SortedDict()
1183
+ for start_offset, sizes in candidate_bases.items():
1184
+ for size in sizes:
1185
+ for i in range(size):
1186
+ access_off = start_offset + i
1187
+ if access_off not in offset_to_base:
1188
+ offset_to_base[access_off] = start_offset
1189
+
1190
+ # determine again the maximum size of each field (at each offset)
1191
+ offset_to_maxsize = defaultdict(int)
1192
+ offset_to_sizes = defaultdict(set) # we do not consider offsets to each base offset
1193
+ for labels, _succ in path_and_successors:
1194
+ last_label = labels[-1] if labels else None
1195
+ if isinstance(last_label, HasField):
1196
+ base = offset_to_base[last_label.offset]
1197
+ access_size = 1 if last_label.bits == MAX_POINTSTO_BITS else (last_label.bits // 8)
1198
+ offset_to_maxsize[base] = max(offset_to_maxsize[base], (last_label.offset - base) + access_size)
1199
+ offset_to_sizes[base].add(access_size)
1200
+
1178
1201
  node_to_base = {}
1179
1202
 
1180
1203
  for labels, succ in path_and_successors:
1181
1204
  last_label = labels[-1] if labels else None
1182
1205
  if isinstance(last_label, HasField):
1183
- for start_offset, sizes in candidate_bases.items():
1184
- for size in sizes:
1185
- if last_label.offset > start_offset and last_label.offset < start_offset + size: # ???
1186
- node_to_base[succ] = start_offset
1206
+ prev_offset = next(offset_to_base.irange(maximum=last_label.offset, reverse=True))
1207
+ node_to_base[succ] = offset_to_base[prev_offset]
1187
1208
 
1188
1209
  node_by_offset = defaultdict(set)
1189
1210
 
@@ -1195,16 +1216,33 @@ class SimpleSolver:
1195
1216
  else:
1196
1217
  node_by_offset[last_label.offset].add(succ)
1197
1218
 
1198
- for offset, child_nodes in node_by_offset.items():
1219
+ sorted_offsets: list[int] = sorted(node_by_offset)
1220
+ for i in range(len(sorted_offsets)): # pylint:disable=consider-using-enumerate
1221
+ offset = sorted_offsets[i]
1222
+
1223
+ child_nodes = node_by_offset[offset]
1199
1224
  sol = self._determine(equivalent_classes, the_typevar, sketch, solution, nodes=child_nodes)
1200
1225
  if isinstance(sol, TopType):
1201
- sol = int_type(min(candidate_bases[offset]) * 8)
1226
+ # make it an array if possible
1227
+ elem_size = min(offset_to_sizes[offset])
1228
+ array_size = offset_to_maxsize[offset]
1229
+ if array_size % elem_size != 0:
1230
+ # fall back to byte_t
1231
+ elem_size = 1
1232
+ elem_type = int_type(elem_size * 8)
1233
+ sol = elem_type if array_size == elem_size else Array(elem_type, array_size // elem_size)
1202
1234
  fields[offset] = sol
1203
1235
 
1204
1236
  if not fields:
1205
1237
  result = Top_
1206
1238
  for node in nodes:
1207
1239
  self._solution_cache[node.typevar] = result
1240
+ solution[node.typevar] = result
1241
+ elif any(off < 0 for off in fields):
1242
+ result = self._pointer_class()(Bottom_)
1243
+ for node in nodes:
1244
+ self._solution_cache[node.typevar] = result
1245
+ solution[node.typevar] = result
1208
1246
  else:
1209
1247
  # back-patch
1210
1248
  struct_type.fields = fields
@@ -42,7 +42,7 @@ class TypeConstant:
42
42
  raise NotImplementedError
43
43
  return self.SIZE
44
44
 
45
- def __repr__(self, memo=None):
45
+ def __repr__(self, memo=None) -> str:
46
46
  raise NotImplementedError
47
47
 
48
48
 
@@ -57,7 +57,7 @@ class BottomType(TypeConstant):
57
57
 
58
58
 
59
59
  class Int(TypeConstant):
60
- def __repr__(self, memo=None):
60
+ def __repr__(self, memo=None) -> str:
61
61
  return "intbase"
62
62
 
63
63
 
@@ -82,14 +82,14 @@ class Int16(Int):
82
82
  class Int32(Int):
83
83
  SIZE = 4
84
84
 
85
- def __repr__(self, memo=None):
85
+ def __repr__(self, memo=None) -> str:
86
86
  return "int32"
87
87
 
88
88
 
89
89
  class Int64(Int):
90
90
  SIZE = 8
91
91
 
92
- def __repr__(self, memo=None):
92
+ def __repr__(self, memo=None) -> str:
93
93
  return "int64"
94
94
 
95
95
 
@@ -115,7 +115,7 @@ class Int512(Int):
115
115
 
116
116
 
117
117
  class FloatBase(TypeConstant):
118
- def __repr__(self, memo=None):
118
+ def __repr__(self, memo=None) -> str:
119
119
  return "floatbase"
120
120
 
121
121
 
@@ -185,6 +185,12 @@ class Array(TypeConstant):
185
185
  self.element: TypeConstant | None = element
186
186
  self.count: int | None = count
187
187
 
188
+ @property
189
+ def size(self) -> int:
190
+ if not self.count or not self.element:
191
+ return 0
192
+ return self.element.size * self.count
193
+
188
194
  @memoize
189
195
  def __repr__(self, memo=None):
190
196
  if self.count is None:
@@ -221,6 +227,13 @@ class Struct(TypeConstant):
221
227
  tpl = tuple((k, self.fields[k]._hash(visited) if self.fields[k] is not None else None) for k in keys)
222
228
  return hash(tpl)
223
229
 
230
+ @property
231
+ def size(self) -> int:
232
+ if not self.fields:
233
+ return 0
234
+ max_field_off = max(self.fields.keys())
235
+ return max_field_off + self.fields[max_field_off].size
236
+
224
237
  @memoize
225
238
  def __repr__(self, memo=None):
226
239
  prefix = "struct"
@@ -430,6 +430,7 @@ class SimEngineVRBase(
430
430
  self.state.variable_manager[self.func_addr].add_variable("register", vvar.oident, variable)
431
431
  elif vvar.was_tmp:
432
432
  # FIXME: we treat all tmp vvars as registers
433
+ assert vvar.tmp_idx is not None
433
434
  variable = SimRegisterVariable(
434
435
  4096 + vvar.tmp_idx,
435
436
  vvar.size,
@@ -503,7 +504,7 @@ class SimEngineVRBase(
503
504
  self.state.variable_manager[self.func_addr].remove_variable_by_atom(codeloc, existing_var, atom)
504
505
 
505
506
  # storing to a location specified by a pointer whose value cannot be determined at this point
506
- self._store_to_variable(richr_addr, size)
507
+ self._store_to_variable(richr_addr, data, size)
507
508
 
508
509
  def _store_to_stack(
509
510
  self, stack_offset, data: RichR[claripy.ast.BV | claripy.ast.FP], size, offset=0, atom=None, endness=None
@@ -669,7 +670,7 @@ class SimEngineVRBase(
669
670
  self.state.add_type_constraint(typevars.Subtype(store_typevar, typeconsts.TopType()))
670
671
  self.state.add_type_constraint(typevars.Subtype(data.typevar, store_typevar))
671
672
 
672
- def _store_to_variable(self, richr_addr: RichR[claripy.ast.BV], size: int):
673
+ def _store_to_variable(self, richr_addr: RichR[claripy.ast.BV], data: RichR, size: int):
673
674
  addr_variable = richr_addr.variable
674
675
  codeloc = self._codeloc()
675
676
 
@@ -691,7 +692,8 @@ class SimEngineVRBase(
691
692
  store_typevar = self._create_access_typevar(base_typevar, True, size, field_offset)
692
693
  if addr_variable is not None:
693
694
  self.state.typevars.add_type_variable(addr_variable, codeloc, typevar)
694
- self.state.add_type_constraint(typevars.Subtype(store_typevar, typeconsts.TopType()))
695
+ data_typevar = data.typevar if data.typevar is not None else typeconsts.TopType()
696
+ self.state.add_type_constraint(typevars.Subtype(store_typevar, data_typevar))
695
697
 
696
698
  def _load(self, richr_addr: RichR[claripy.ast.BV], size: int, expr=None):
697
699
  """
@@ -941,13 +943,13 @@ class SimEngineVRBase(
941
943
  # it's an array!
942
944
  if offset.concrete:
943
945
  concrete_offset = offset.concrete_value * elem_size
944
- load_typevar = self._create_access_typevar(typevar, True, size, concrete_offset)
946
+ load_typevar = self._create_access_typevar(typevar, False, size, concrete_offset)
945
947
  self.state.add_type_constraint(typevars.Subtype(load_typevar, typeconsts.TopType()))
946
948
  else:
947
949
  # FIXME: This is a hack
948
950
  for i in range(4):
949
951
  concrete_offset = size * i
950
- load_typevar = self._create_access_typevar(typevar, True, size, concrete_offset)
952
+ load_typevar = self._create_access_typevar(typevar, False, size, concrete_offset)
951
953
  self.state.add_type_constraint(typevars.Subtype(load_typevar, typeconsts.TopType()))
952
954
 
953
955
  return RichR(self.state.top(size * self.project.arch.byte_width), typevar=typevar)
angr/block.py CHANGED
@@ -130,19 +130,23 @@ class Block(Serializable):
130
130
  BLOCK_MAX_SIZE = 4096
131
131
 
132
132
  __slots__ = [
133
+ "_backup_state",
133
134
  "_bytes",
134
135
  "_capstone",
135
136
  "_collect_data_refs",
136
137
  "_const_prop",
137
138
  "_cross_insn_opt",
138
139
  "_disassembly",
140
+ "_extra_stop_points",
139
141
  "_initial_regs",
140
142
  "_instruction_addrs",
141
143
  "_instructions",
142
144
  "_load_from_ro_regions",
145
+ "_max_size",
143
146
  "_opt_level",
144
147
  "_project",
145
148
  "_strict_block_end",
149
+ "_traceflags",
146
150
  "_vex",
147
151
  "_vex_nostmt",
148
152
  "addr",
@@ -155,11 +159,10 @@ class Block(Serializable):
155
159
  self,
156
160
  addr,
157
161
  project=None,
158
- arch: Arch = None,
162
+ arch: Arch | None = None,
159
163
  size=None,
160
164
  max_size=None,
161
165
  byte_string=None,
162
- vex=None,
163
166
  thumb=False,
164
167
  backup_state=None,
165
168
  extra_stop_points=None,
@@ -174,14 +177,11 @@ class Block(Serializable):
174
177
  initial_regs=None,
175
178
  skip_stmts=False,
176
179
  ):
177
- # set up arch
178
- self.arch: Arch
179
- if project is not None:
180
+ if arch is not None:
181
+ self.arch = arch
182
+ elif project is not None:
180
183
  self.arch = project.arch
181
184
  else:
182
- self.arch = arch
183
-
184
- if self.arch is None:
185
185
  raise ValueError('Either "project" or "arch" has to be specified.')
186
186
 
187
187
  if project is not None and backup_state is None and project.kb.patches.values():
@@ -195,63 +195,23 @@ class Block(Serializable):
195
195
  else:
196
196
  thumb = False
197
197
 
198
- self._project: Project | None = project
199
- self.thumb = thumb
198
+ self._project = project
200
199
  self.addr = addr
200
+ self._backup_state = backup_state
201
+ self.thumb = thumb
201
202
  self._opt_level = opt_level
202
- self._initial_regs: list[tuple[int, int, int]] | None = (
203
- initial_regs if (collect_data_refs or const_prop) else None
204
- )
203
+ self._initial_regs = initial_regs if (collect_data_refs or const_prop) else None
204
+ self._traceflags = traceflags
205
+ self._extra_stop_points = extra_stop_points
206
+ self._max_size = max_size if max_size is not None else self.BLOCK_MAX_SIZE
205
207
 
206
208
  if self._project is None and byte_string is None:
207
209
  raise ValueError('"byte_string" has to be specified if "project" is not provided.')
208
210
 
209
- if size is None:
210
- if byte_string is not None:
211
- size = len(byte_string)
212
- elif vex is not None:
213
- size = vex.size
214
- else:
215
- if self._initial_regs:
216
- self.set_initial_regs()
217
- clemory = None
218
- if project is not None:
219
- clemory = (
220
- project.loader.memory_ro_view
221
- if project.loader.memory_ro_view is not None
222
- else project.loader.memory
223
- )
224
- vex = self._vex_engine.lift_vex(
225
- clemory=clemory,
226
- state=backup_state,
227
- insn_bytes=byte_string,
228
- addr=addr,
229
- size=max_size,
230
- thumb=thumb,
231
- extra_stop_points=extra_stop_points,
232
- opt_level=opt_level,
233
- num_inst=num_inst,
234
- traceflags=traceflags,
235
- strict_block_end=strict_block_end,
236
- collect_data_refs=collect_data_refs,
237
- load_from_ro_regions=load_from_ro_regions,
238
- const_prop=const_prop,
239
- cross_insn_opt=cross_insn_opt,
240
- skip_stmts=skip_stmts,
241
- )
242
- if self._initial_regs:
243
- self.reset_initial_regs()
244
- size = vex.size
245
-
246
- if skip_stmts:
247
- self._vex = None
248
- self._vex_nostmt = vex
249
- else:
250
- self._vex = vex
251
- self._vex_nostmt = None
211
+ self._vex = None
212
+ self._vex_nostmt = None
252
213
  self._disassembly = None
253
214
  self._capstone = None
254
- self.size = size
255
215
  self._collect_data_refs = collect_data_refs
256
216
  self._strict_block_end = strict_block_end
257
217
  self._cross_insn_opt = cross_insn_opt
@@ -261,6 +221,23 @@ class Block(Serializable):
261
221
  self._instructions: int | None = num_inst
262
222
  self._instruction_addrs: list[int] = []
263
223
 
224
+ self._bytes = byte_string
225
+ self.size = size
226
+
227
+ if size is None:
228
+ if byte_string is not None:
229
+ size = len(byte_string)
230
+ else:
231
+ vex = self._lift_nocache(skip_stmts)
232
+ size = vex.size
233
+
234
+ if skip_stmts:
235
+ self._vex_nostmt = vex
236
+ else:
237
+ self._vex = vex
238
+
239
+ self.size = size
240
+
264
241
  if skip_stmts:
265
242
  self._parse_vex_info(self._vex_nostmt)
266
243
  else:
@@ -343,50 +320,7 @@ class Block(Serializable):
343
320
  raise ValueError("Project is not set")
344
321
  return self._project.factory.default_engine # type:ignore
345
322
 
346
- @property
347
- def vex(self) -> IRSB | PcodeIRSB:
348
- if not self._vex:
349
- if self._initial_regs:
350
- self.set_initial_regs()
351
- clemory = None
352
- if self._project is not None:
353
- clemory = (
354
- self._project.loader.memory_ro_view
355
- if self._project.loader.memory_ro_view is not None
356
- else self._project.loader.memory
357
- )
358
- self._vex = self._vex_engine.lift_vex(
359
- clemory=clemory,
360
- insn_bytes=self._bytes,
361
- addr=self.addr,
362
- thumb=self.thumb,
363
- size=self.size,
364
- num_inst=self._instructions,
365
- opt_level=self._opt_level,
366
- arch=self.arch,
367
- collect_data_refs=self._collect_data_refs,
368
- strict_block_end=self._strict_block_end,
369
- cross_insn_opt=self._cross_insn_opt,
370
- load_from_ro_regions=self._load_from_ro_regions,
371
- const_prop=self._const_prop,
372
- )
373
- if self._initial_regs:
374
- self.reset_initial_regs()
375
- self._parse_vex_info(self._vex)
376
-
377
- assert self._vex is not None
378
- return self._vex
379
-
380
- @property
381
- def vex_nostmt(self):
382
- if self._vex_nostmt:
383
- return self._vex_nostmt
384
-
385
- if self._vex:
386
- return self._vex
387
-
388
- if self._initial_regs:
389
- self.set_initial_regs()
323
+ def _lift_nocache(self, skip_stmts: bool) -> IRSB | PcodeIRSB:
390
324
  clemory = None
391
325
  if self._project is not None:
392
326
  clemory = (
@@ -394,25 +328,53 @@ class Block(Serializable):
394
328
  if self._project.loader.memory_ro_view is not None
395
329
  else self._project.loader.memory
396
330
  )
397
- self._vex_nostmt = self._vex_engine.lift_vex(
331
+
332
+ if self._initial_regs:
333
+ self.set_initial_regs()
334
+
335
+ vex = self._vex_engine.lift_vex(
336
+ addr=self.addr,
337
+ state=self._backup_state,
398
338
  clemory=clemory,
399
339
  insn_bytes=self._bytes,
400
- addr=self.addr,
401
- thumb=self.thumb,
340
+ arch=self.arch,
402
341
  size=self.size,
403
342
  num_inst=self._instructions,
343
+ traceflags=self._traceflags,
344
+ thumb=self.thumb,
345
+ extra_stop_points=self._extra_stop_points,
404
346
  opt_level=self._opt_level,
405
- arch=self.arch,
406
- skip_stmts=True,
407
- collect_data_refs=self._collect_data_refs,
408
347
  strict_block_end=self._strict_block_end,
348
+ skip_stmts=skip_stmts,
349
+ collect_data_refs=self._collect_data_refs,
409
350
  cross_insn_opt=self._cross_insn_opt,
410
351
  load_from_ro_regions=self._load_from_ro_regions,
411
352
  const_prop=self._const_prop,
412
353
  )
354
+
413
355
  if self._initial_regs:
414
356
  self.reset_initial_regs()
357
+
358
+ return vex
359
+
360
+ @property
361
+ def vex(self) -> IRSB | PcodeIRSB:
362
+ if not self._vex:
363
+ self._vex = self._lift_nocache(False)
364
+ self._parse_vex_info(self._vex)
365
+
366
+ return self._vex
367
+
368
+ @property
369
+ def vex_nostmt(self):
370
+ if self._vex_nostmt:
371
+ return self._vex_nostmt
372
+ if self._vex:
373
+ return self._vex
374
+
375
+ self._vex_nostmt = self._lift_nocache(True)
415
376
  self._parse_vex_info(self._vex_nostmt)
377
+
416
378
  return self._vex_nostmt
417
379
 
418
380
  @property