angr 9.2.95__py3-none-macosx_10_9_x86_64.whl → 9.2.97__py3-none-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (56) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_fast.py +9 -6
  3. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +6 -1
  4. angr/analyses/complete_calling_conventions.py +27 -11
  5. angr/analyses/decompiler/ail_simplifier.py +30 -8
  6. angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +20 -7
  7. angr/analyses/decompiler/clinic.py +21 -5
  8. angr/analyses/decompiler/condition_processor.py +11 -0
  9. angr/analyses/decompiler/decompiler.py +58 -46
  10. angr/analyses/decompiler/optimization_passes/__init__.py +11 -5
  11. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +13 -7
  12. angr/analyses/decompiler/optimization_passes/optimization_pass.py +31 -11
  13. angr/analyses/decompiler/optimization_passes/{return_duplicator.py → return_duplicator_base.py} +54 -102
  14. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +57 -0
  15. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +121 -0
  16. angr/analyses/decompiler/region_identifier.py +13 -0
  17. angr/analyses/decompiler/seq_to_blocks.py +19 -0
  18. angr/analyses/decompiler/structured_codegen/c.py +21 -0
  19. angr/analyses/decompiler/structuring/phoenix.py +28 -4
  20. angr/analyses/decompiler/structuring/recursive_structurer.py +35 -1
  21. angr/analyses/decompiler/structuring/structurer_base.py +3 -0
  22. angr/analyses/decompiler/utils.py +41 -6
  23. angr/analyses/disassembly.py +4 -1
  24. angr/analyses/find_objects_static.py +15 -10
  25. angr/analyses/forward_analysis/forward_analysis.py +15 -1
  26. angr/analyses/propagator/engine_ail.py +40 -0
  27. angr/analyses/propagator/propagator.py +6 -3
  28. angr/analyses/reaching_definitions/engine_ail.py +16 -24
  29. angr/analyses/reaching_definitions/rd_state.py +14 -1
  30. angr/analyses/reaching_definitions/reaching_definitions.py +19 -2
  31. angr/analyses/variable_recovery/engine_ail.py +6 -6
  32. angr/analyses/variable_recovery/engine_base.py +22 -4
  33. angr/analyses/variable_recovery/variable_recovery_base.py +4 -1
  34. angr/engines/light/engine.py +8 -1
  35. angr/knowledge_plugins/key_definitions/atoms.py +4 -2
  36. angr/knowledge_plugins/key_definitions/environment.py +11 -0
  37. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -8
  38. angr/knowledge_plugins/key_definitions/uses.py +18 -4
  39. angr/knowledge_plugins/propagations/states.py +22 -3
  40. angr/knowledge_plugins/types.py +6 -0
  41. angr/knowledge_plugins/variables/variable_manager.py +54 -5
  42. angr/lib/angr_native.dylib +0 -0
  43. angr/simos/simos.py +2 -0
  44. angr/storage/memory_mixins/__init__.py +3 -0
  45. angr/storage/memory_mixins/multi_value_merger_mixin.py +22 -11
  46. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +20 -2
  47. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +81 -44
  48. angr/utils/cowdict.py +4 -2
  49. angr/utils/funcid.py +6 -0
  50. angr/utils/mp.py +1 -1
  51. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/METADATA +6 -6
  52. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/RECORD +56 -53
  53. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/LICENSE +0 -0
  54. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/WHEEL +0 -0
  55. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/entry_points.txt +0 -0
  56. {angr-9.2.95.dist-info → angr-9.2.97.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # pylint: disable=wildcard-import
2
2
  # pylint: disable=wrong-import-position
3
3
 
4
- __version__ = "9.2.95"
4
+ __version__ = "9.2.97"
5
5
 
6
6
  if bytes is str:
7
7
  raise Exception(
@@ -1948,7 +1948,8 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1948
1948
  entries: List[CFGJob] = []
1949
1949
 
1950
1950
  if (
1951
- self.functions.contains_addr(cfg_job.src_node.addr)
1951
+ cfg_job.src_node is not None
1952
+ and self.functions.contains_addr(cfg_job.src_node.addr)
1952
1953
  and self.functions[cfg_job.src_node.addr].is_default_name
1953
1954
  and cfg_job.src_node.addr not in self.kb.labels
1954
1955
  and cfg_job.jumpkind == "Ijk_Boring"
@@ -3561,14 +3562,16 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
3561
3562
  # Graph utils
3562
3563
  #
3563
3564
 
3564
- def _graph_add_edge(self, cfg_node, src_node, src_jumpkind, src_ins_addr, src_stmt_idx):
3565
+ def _graph_add_edge(
3566
+ self, cfg_node: CFGNode, src_node: Optional[CFGNode], src_jumpkind: str, src_ins_addr: int, src_stmt_idx: int
3567
+ ):
3565
3568
  """
3566
3569
  Add edge between nodes, or add node if entry point
3567
3570
 
3568
- :param CFGNode cfg_node: node which is jumped to
3569
- :param CFGNode src_node: node which is jumped from none if entry point
3570
- :param str src_jumpkind: what type of jump the edge takes
3571
- :param int or str src_stmt_idx: source statements ID
3571
+ :param cfg_node: node which is jumped to
3572
+ :param src_node: node which is jumped from none if entry point
3573
+ :param src_jumpkind: what type of jump the edge takes
3574
+ :param src_stmt_idx: source statements ID
3572
3575
  :return: None
3573
3576
  """
3574
3577
 
@@ -69,6 +69,12 @@ class ConstantResolver(IndirectJumpResolver):
69
69
  :param jumpkind: VEX jumpkind (Ijk_Boring or Ijk_Call)
70
70
  :return: Bool tuple with replacement address
71
71
  """
72
+ if not cfg.functions.contains_addr(func_addr):
73
+ # the function does not exist
74
+ return False, []
75
+
76
+ func = cfg.functions.get_by_addr(func_addr)
77
+
72
78
  vex_block = block.vex
73
79
  if isinstance(vex_block.next, pyvex.expr.RdTmp):
74
80
  # what does the jump rely on? slice it back and see
@@ -105,7 +111,6 @@ class ConstantResolver(IndirectJumpResolver):
105
111
  return False, []
106
112
  break
107
113
 
108
- func = cfg.functions[func_addr]
109
114
  _l.debug("ConstantResolver: Propagating for %r at %#x.", func, addr)
110
115
  prop = self.project.analyses.Propagator(
111
116
  func=func,
@@ -1,3 +1,4 @@
1
+ # pylint:disable=import-outside-toplevel
1
2
  from typing import Tuple, Optional, Callable, Iterable, Dict, Set, TYPE_CHECKING
2
3
  import queue
3
4
  import threading
@@ -10,6 +11,7 @@ import networkx
10
11
  import claripy
11
12
 
12
13
  from angr.utils.graph import GraphUtils
14
+ from angr.simos import SimWindows
13
15
  from ..utils.mp import mp_context, Initializer
14
16
  from ..knowledge_plugins.cfg import CFGModel
15
17
  from . import Analysis, register_analysis, VariableRecoveryFast, CallingConventionAnalysis
@@ -88,11 +90,13 @@ class CompleteCallingConventionsAnalysis(Analysis):
88
90
  self._results = []
89
91
  if workers > 0:
90
92
  self._remaining_funcs = _mp_context.Value("i", 0)
91
- self._func_queue = _mp_context.Queue()
92
93
  self._results = _mp_context.Queue()
94
+ self._results_lock = _mp_context.Lock()
95
+ self._func_queue = _mp_context.Queue()
93
96
  self._func_queue_lock = _mp_context.Lock()
94
97
  else:
95
98
  self._remaining_funcs = None # not needed
99
+ self._results_lock = None # not needed
96
100
  self._func_queue = None # not needed
97
101
  self._func_queue_lock = threading.Lock()
98
102
 
@@ -205,9 +209,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
205
209
  dependents[callee].add(func_addr)
206
210
 
207
211
  # enqueue all leaf functions
208
- for func_addr in list(
209
- k for k in depends_on if not depends_on[k]
210
- ): # pylint:disable=consider-using-dict-items
212
+ for func_addr in [k for k in depends_on if not depends_on[k]]: # pylint:disable=consider-using-dict-items
211
213
  self._func_queue.put((func_addr, None))
212
214
  del depends_on[func_addr]
213
215
 
@@ -215,11 +217,17 @@ class CompleteCallingConventionsAnalysis(Analysis):
215
217
  cc_callback = self._cc_callback
216
218
  self._cc_callback = None
217
219
 
220
+ if self.project.simos is not None and isinstance(self.project.simos, SimWindows):
221
+ # delayed import
222
+ from angr.procedures.definitions import load_win32api_definitions
223
+
224
+ Initializer.get().register(load_win32api_definitions)
225
+
218
226
  # spawn workers to perform the analysis
219
227
  with self._func_queue_lock:
220
228
  procs = [
221
- _mp_context.Process(target=self._worker_routine, args=(Initializer.get(),), daemon=True)
222
- for _ in range(self._workers)
229
+ _mp_context.Process(target=self._worker_routine, args=(worker_id, Initializer.get()), daemon=True)
230
+ for worker_id in range(self._workers)
223
231
  ]
224
232
  for proc_idx, proc in enumerate(procs):
225
233
  self._update_progress(0, text=f"Spawning worker {proc_idx}...")
@@ -231,7 +239,13 @@ class CompleteCallingConventionsAnalysis(Analysis):
231
239
  self._update_progress(0)
232
240
  idx = 0
233
241
  while idx < total_funcs:
234
- func_addr, cc, proto, proto_libname, varman = self._results.get(True)
242
+ try:
243
+ with self._results_lock:
244
+ func_addr, cc, proto, proto_libname, varman = self._results.get(True, timeout=0.01)
245
+ except queue.Empty:
246
+ time.sleep(0.1)
247
+ continue
248
+
235
249
  func = self.kb.functions.get_by_addr(func_addr)
236
250
  if cc is not None or proto is not None:
237
251
  func.calling_convention = cc
@@ -260,13 +274,14 @@ class CompleteCallingConventionsAnalysis(Analysis):
260
274
  depends_on[dependent].discard(func_addr)
261
275
  if not depends_on[dependent]:
262
276
  callee_prototypes = self._get_callees_cc_prototypes(dependent)
263
- self._func_queue.put((dependent, callee_prototypes))
277
+ with self._func_queue_lock:
278
+ self._func_queue.put((dependent, callee_prototypes))
264
279
  del depends_on[dependent]
265
280
 
266
281
  for proc in procs:
267
282
  proc.join()
268
283
 
269
- def _worker_routine(self, initializer: Initializer):
284
+ def _worker_routine(self, worker_id: int, initializer: Initializer):
270
285
  initializer.initialize()
271
286
  idx = 0
272
287
  while self._remaining_funcs.value > 0:
@@ -293,9 +308,10 @@ class CompleteCallingConventionsAnalysis(Analysis):
293
308
  try:
294
309
  cc, proto, proto_libname, varman = self._analyze_core(func_addr)
295
310
  except Exception: # pylint:disable=broad-except
296
- _l.error("Exception occurred during _analyze_core().", exc_info=True)
311
+ _l.error("Worker %d: Exception occurred during _analyze_core().", worker_id, exc_info=True)
297
312
  cc, proto, proto_libname, varman = None, None, None, None
298
- self._results.put((func_addr, cc, proto, proto_libname, varman))
313
+ with self._results_lock:
314
+ self._results.put((func_addr, cc, proto, proto_libname, varman))
299
315
 
300
316
  def _analyze_core(
301
317
  self, func_addr: int
@@ -1,3 +1,4 @@
1
+ # pylint:disable=too-many-boolean-expressions
1
2
  from typing import Set, Dict, List, Tuple, Any, Optional, TYPE_CHECKING
2
3
  from collections import defaultdict
3
4
  import logging
@@ -183,6 +184,7 @@ class AILSimplifier(Analysis):
183
184
  observe_all=False,
184
185
  use_callee_saved_regs_at_return=self._use_callee_saved_regs_at_return,
185
186
  track_tmps=True,
187
+ element_limit=1,
186
188
  ).model
187
189
  self._reaching_definitions = rd
188
190
  return rd
@@ -233,6 +235,16 @@ class AILSimplifier(Analysis):
233
235
  sorted_defs = sorted(rd.all_definitions, key=lambda d: d.codeloc, reverse=True)
234
236
  for def_ in (d_ for d_ in sorted_defs if d_.codeloc.context is None):
235
237
  if isinstance(def_.atom, atoms.Register):
238
+ # only do this for general purpose register
239
+ skip_def = False
240
+ for reg in self.project.arch.register_list:
241
+ if not reg.artificial and reg.vex_offset == def_.atom.reg_offset and not reg.general_purpose:
242
+ skip_def = True
243
+ break
244
+
245
+ if skip_def:
246
+ continue
247
+
236
248
  needs_narrowing, to_size, use_exprs = self._narrowing_needed(def_, rd, addr_and_idx_to_block)
237
249
  if needs_narrowing:
238
250
  # replace the definition
@@ -494,7 +506,9 @@ class AILSimplifier(Analysis):
494
506
 
495
507
  first_op = walker.operations[0]
496
508
  if isinstance(first_op, Convert):
497
- return first_op.to_bits // self.project.arch.byte_width, ("convert", (first_op,))
509
+ if first_op.to_bits >= self.project.arch.byte_width:
510
+ # we need at least one byte!
511
+ return first_op.to_bits // self.project.arch.byte_width, ("convert", (first_op,))
498
512
  if isinstance(first_op, BinaryOp):
499
513
  second_op = None
500
514
  if len(walker.operations) >= 2:
@@ -516,6 +530,7 @@ class AILSimplifier(Analysis):
516
530
  and first_op.op not in {"Shr", "Sar"}
517
531
  and isinstance(second_op, Convert)
518
532
  and second_op.from_bits == expr.bits
533
+ and second_op.to_bits >= self.project.arch.byte_width # we need at least one byte!
519
534
  ):
520
535
  return min(expr.bits, second_op.to_bits) // self.project.arch.byte_width, (
521
536
  "binop-convert",
@@ -711,13 +726,13 @@ class AILSimplifier(Analysis):
711
726
  ):
712
727
  continue
713
728
 
714
- # Make sure the register is never updated across this function
715
- if any(
716
- (def_ != the_def and def_.atom == the_def.atom)
717
- for def_ in rd.all_definitions
718
- if isinstance(def_.atom, atoms.Register) and rd.all_uses.get_uses(def_)
719
- ):
720
- continue
729
+ # Make sure the register is never updated across this function
730
+ if any(
731
+ (def_ != the_def and def_.atom == the_def.atom)
732
+ for def_ in rd.all_definitions
733
+ if isinstance(def_.atom, atoms.Register) and rd.all_uses.get_uses(def_)
734
+ ):
735
+ continue
721
736
 
722
737
  # find all its uses
723
738
  all_arg_copy_var_uses: Set[Tuple[CodeLocation, Any]] = set(
@@ -1204,6 +1219,13 @@ class AILSimplifier(Analysis):
1204
1219
  continue
1205
1220
 
1206
1221
  uses = rd.all_uses.get_uses(def_)
1222
+ if (
1223
+ isinstance(def_.atom, atoms.Register)
1224
+ and def_.atom.reg_offset in self.project.arch.artificial_registers_offsets
1225
+ ):
1226
+ if len(uses) == 1 and next(iter(uses)) == def_.codeloc:
1227
+ # cc_ndep = amd64g_calculate_condition(..., cc_ndep)
1228
+ uses = set()
1207
1229
 
1208
1230
  if not uses:
1209
1231
  if not isinstance(def_.codeloc, ExternalCodeLocation):
@@ -151,14 +151,14 @@ class AMD64CCallRewriter(CCallRewriterBase):
151
151
  **ccall.tags,
152
152
  )
153
153
  return Expr.Convert(None, r.bits, ccall.bits, False, r, **ccall.tags)
154
- elif cond_v == AMD64_CondTypes["CondZ"]:
154
+ elif cond_v in {AMD64_CondTypes["CondZ"], AMD64_CondTypes["CondNZ"]}:
155
155
  if op_v in {
156
156
  AMD64_OpTypes["G_CC_OP_SUBB"],
157
157
  AMD64_OpTypes["G_CC_OP_SUBW"],
158
158
  AMD64_OpTypes["G_CC_OP_SUBL"],
159
159
  AMD64_OpTypes["G_CC_OP_SUBQ"],
160
160
  }:
161
- # dep_1 - dep_2 == 0
161
+ # dep_1 - dep_2 == 0 or dep_1 - dep_2 != 0
162
162
 
163
163
  dep_1 = self._fix_size(
164
164
  dep_1,
@@ -176,8 +176,9 @@ class AMD64CCallRewriter(CCallRewriterBase):
176
176
  AMD64_OpTypes["G_CC_OP_SUBL"],
177
177
  ccall.tags,
178
178
  )
179
+ expr_op = "CmpEQ" if cond_v == AMD64_CondTypes["CondZ"] else "CmpNE"
179
180
 
180
- r = Expr.BinaryOp(ccall.idx, "CmpEQ", (dep_1, dep_2), False, **ccall.tags)
181
+ r = Expr.BinaryOp(ccall.idx, expr_op, (dep_1, dep_2), False, **ccall.tags)
181
182
  return Expr.Convert(None, r.bits, ccall.bits, False, r, **ccall.tags)
182
183
  elif op_v in {
183
184
  AMD64_OpTypes["G_CC_OP_LOGICB"],
@@ -185,7 +186,7 @@ class AMD64CCallRewriter(CCallRewriterBase):
185
186
  AMD64_OpTypes["G_CC_OP_LOGICL"],
186
187
  AMD64_OpTypes["G_CC_OP_LOGICQ"],
187
188
  }:
188
- # dep_1 == 0
189
+ # dep_1 == 0 or dep_1 != 0
189
190
 
190
191
  dep_1 = self._fix_size(
191
192
  dep_1,
@@ -195,9 +196,10 @@ class AMD64CCallRewriter(CCallRewriterBase):
195
196
  AMD64_OpTypes["G_CC_OP_LOGICL"],
196
197
  ccall.tags,
197
198
  )
199
+ expr_op = "CmpEQ" if cond_v == AMD64_CondTypes["CondZ"] else "CmpNE"
198
200
 
199
201
  r = Expr.BinaryOp(
200
- ccall.idx, "CmpEQ", (dep_1, Expr.Const(None, None, 0, dep_1.bits)), False, **ccall.tags
202
+ ccall.idx, expr_op, (dep_1, Expr.Const(None, None, 0, dep_1.bits)), False, **ccall.tags
201
203
  )
202
204
  return Expr.Convert(None, r.bits, ccall.bits, False, r, **ccall.tags)
203
205
  elif op_v in {
@@ -206,7 +208,7 @@ class AMD64CCallRewriter(CCallRewriterBase):
206
208
  AMD64_OpTypes["G_CC_OP_SHRL"],
207
209
  AMD64_OpTypes["G_CC_OP_SHRQ"],
208
210
  }:
209
- # dep_1 == 0
211
+ # dep_1 == 0 or dep_1 != 0
210
212
 
211
213
  dep_1 = self._fix_size(
212
214
  dep_1,
@@ -216,9 +218,20 @@ class AMD64CCallRewriter(CCallRewriterBase):
216
218
  AMD64_OpTypes["G_CC_OP_SHRL"],
217
219
  ccall.tags,
218
220
  )
221
+ expr_op = "CmpEQ" if cond_v == AMD64_CondTypes["CondZ"] else "CmpNE"
219
222
 
220
223
  zero = Expr.Const(None, None, 0, dep_1.bits)
221
- r = Expr.BinaryOp(ccall.idx, "CmpEQ", (dep_1, zero), False, **ccall.tags)
224
+ r = Expr.BinaryOp(ccall.idx, expr_op, (dep_1, zero), False, **ccall.tags)
225
+ return Expr.Convert(None, r.bits, ccall.bits, False, r, **ccall.tags)
226
+ elif op_v == AMD64_OpTypes["G_CC_OP_COPY"]:
227
+ # dep_1 & G_CC_MASK_Z == 0 or dep_1 & G_CC_MASK_Z != 0
228
+
229
+ flag = Expr.Const(None, None, AMD64_CondBitMasks["G_CC_MASK_Z"], dep_1.bits)
230
+ masked_dep = Expr.BinaryOp(None, "And", [dep_1, flag], False, **ccall.tags)
231
+ zero = Expr.Const(None, None, 0, dep_1.bits)
232
+ expr_op = "CmpEQ" if cond_v == AMD64_CondTypes["CondZ"] else "CmpNE"
233
+
234
+ r = Expr.BinaryOp(ccall.idx, expr_op, (masked_dep, zero), False, **ccall.tags)
222
235
  return Expr.Convert(None, r.bits, ccall.bits, False, r, **ccall.tags)
223
236
  elif cond_v == AMD64_CondTypes["CondL"]:
224
237
  if op_v in {
@@ -34,7 +34,13 @@ from .. import Analysis, register_analysis
34
34
  from ..cfg.cfg_base import CFGBase
35
35
  from ..reaching_definitions import ReachingDefinitionsAnalysis
36
36
  from .ailgraph_walker import AILGraphWalker, RemoveNodeNotice
37
- from .optimization_passes import get_default_optimization_passes, OptimizationPassStage, RegisterSaveAreaSimplifier
37
+ from .optimization_passes import (
38
+ get_default_optimization_passes,
39
+ OptimizationPassStage,
40
+ RegisterSaveAreaSimplifier,
41
+ DUPLICATING_OPTS,
42
+ CONDENSING_OPTS,
43
+ )
38
44
 
39
45
  if TYPE_CHECKING:
40
46
  from angr.knowledge_plugins.cfg import CFGModel
@@ -102,6 +108,7 @@ class Clinic(Analysis):
102
108
 
103
109
  self.graph = None
104
110
  self.cc_graph: Optional[networkx.DiGraph] = None
111
+ self.unoptimized_graph: Optional[networkx.DiGraph] = None
105
112
  self.arg_list = None
106
113
  self.variable_kb = variable_kb
107
114
  self.externs: Set[SimMemoryVariable] = set()
@@ -297,7 +304,7 @@ class Clinic(Analysis):
297
304
  _, stackarg_offsets = self._make_callsites(ail_graph, stack_pointer_tracker=spt)
298
305
 
299
306
  # Run simplification passes
300
- self._update_progress(65.0, text="Running simplifications 2")
307
+ self._update_progress(53.0, text="Running simplifications 2")
301
308
  ail_graph = self._run_simplification_passes(ail_graph, stage=OptimizationPassStage.AFTER_MAKING_CALLSITES)
302
309
 
303
310
  # Simplify the entire function for the second time
@@ -443,7 +450,8 @@ class Clinic(Analysis):
443
450
  self.externs = None
444
451
  self.data_refs: Dict[int, List[DataRefDesc]] = self._collect_data_refs(ail_graph)
445
452
 
446
- def copy_graph(self) -> networkx.DiGraph:
453
+ @staticmethod
454
+ def _copy_graph(graph: networkx.DiGraph) -> networkx.DiGraph:
447
455
  """
448
456
  Copy AIL Graph.
449
457
 
@@ -452,7 +460,7 @@ class Clinic(Analysis):
452
460
  graph_copy = networkx.DiGraph()
453
461
  block_mapping = {}
454
462
  # copy all blocks
455
- for block in self.graph.nodes():
463
+ for block in graph.nodes():
456
464
  new_block = copy.copy(block)
457
465
  new_stmts = copy.copy(block.statements)
458
466
  new_block.statements = new_stmts
@@ -460,12 +468,15 @@ class Clinic(Analysis):
460
468
  graph_copy.add_node(new_block)
461
469
 
462
470
  # copy all edges
463
- for src, dst, data in self.graph.edges(data=True):
471
+ for src, dst, data in graph.edges(data=True):
464
472
  new_src = block_mapping[src]
465
473
  new_dst = block_mapping[dst]
466
474
  graph_copy.add_edge(new_src, new_dst, **data)
467
475
  return graph_copy
468
476
 
477
+ def copy_graph(self) -> networkx.DiGraph:
478
+ return self._copy_graph(self.graph)
479
+
469
480
  @timethis
470
481
  def _set_function_graph(self):
471
482
  self._func_graph = self.function.graph_ex(exception_edges=self._exception_edges)
@@ -926,6 +937,11 @@ class Clinic(Analysis):
926
937
  if pass_.STAGE != stage:
927
938
  continue
928
939
 
940
+ if pass_ in DUPLICATING_OPTS + CONDENSING_OPTS and self.unoptimized_graph is None:
941
+ # we should save a copy at the first time any optimization that could alter the structure
942
+ # of the graph is applied
943
+ self.unoptimized_graph = self._copy_graph(ail_graph)
944
+
929
945
  a = pass_(
930
946
  self.function,
931
947
  blocks_by_addr=addr_to_blocks,
@@ -77,6 +77,12 @@ def _dummy_bvs(condition, condition_mapping, name_suffix=""):
77
77
  return var
78
78
 
79
79
 
80
+ def _dummy_bools(condition, condition_mapping, name_suffix=""):
81
+ var = claripy.BoolS(f"ailexpr_{repr(condition)}{name_suffix}", explicit_name=True)
82
+ condition_mapping[var.args[0]] = condition
83
+ return var
84
+
85
+
80
86
  _ail2claripy_op_mapping = {
81
87
  "LogicalAnd": lambda expr, conv, _: claripy.And(conv(expr.operands[0]), conv(expr.operands[1])),
82
88
  "LogicalOr": lambda expr, conv, _: claripy.Or(conv(expr.operands[0]), conv(expr.operands[1])),
@@ -117,6 +123,7 @@ _ail2claripy_op_mapping = {
117
123
  "Carry": lambda expr, _, m: _dummy_bvs(expr, m),
118
124
  "SCarry": lambda expr, _, m: _dummy_bvs(expr, m),
119
125
  "SBorrow": lambda expr, _, m: _dummy_bvs(expr, m),
126
+ "ExpCmpNE": lambda expr, _, m: _dummy_bools(expr, m),
120
127
  }
121
128
 
122
129
  #
@@ -482,6 +489,8 @@ class ConditionProcessor:
482
489
  continue
483
490
  raise EmptyBlockNotice()
484
491
  if type(block) is LoopNode:
492
+ if block.sequence_node is None:
493
+ raise EmptyBlockNotice()
485
494
  return cls.get_last_statements(block.sequence_node)
486
495
  if type(block) is ConditionalBreakNode:
487
496
  return [block]
@@ -757,6 +766,8 @@ class ConditionProcessor:
757
766
  var = claripy.BoolV(condition.value)
758
767
  else:
759
768
  var = claripy.BVV(condition.value, condition.bits)
769
+ if isinstance(var, claripy.Bits) and var.size() == 1:
770
+ var = claripy.true if var.concrete_value == 1 else claripy.false
760
771
  return var
761
772
  elif isinstance(condition, ailment.Expr.Tmp):
762
773
  l.warning("Left-over ailment.Tmp variable %s.", condition)
@@ -63,6 +63,7 @@ class Decompiler(Analysis):
63
63
  decompile=True,
64
64
  regen_clinic=True,
65
65
  update_memory_data: bool = True,
66
+ generate_code: bool = True,
66
67
  ):
67
68
  if not isinstance(func, Function):
68
69
  func = self.kb.functions[func]
@@ -85,13 +86,15 @@ class Decompiler(Analysis):
85
86
  self._binop_operators = binop_operators
86
87
  self._regen_clinic = regen_clinic
87
88
  self._update_memory_data = update_memory_data
89
+ self._generate_code = generate_code
88
90
 
89
91
  self.clinic = None # mostly for debugging purposes
90
92
  self.codegen: Optional["CStructuredCodeGenerator"] = None
91
93
  self.cache: Optional[DecompilationCache] = None
92
94
  self.options_by_class = None
93
- self.seq_node = None
94
- self.unmodified_clinic_graph = None
95
+ self.seq_node: Optional["SequenceNode"] = None
96
+ self.unoptimized_ail_graph: Optional[networkx.DiGraph] = None
97
+ self.ail_graph: Optional[networkx.DiGraph] = None
95
98
 
96
99
  if decompile:
97
100
  self._decompile()
@@ -185,9 +188,11 @@ class Decompiler(Analysis):
185
188
  # the function is empty
186
189
  return
187
190
 
188
- # expose a copy of the graph before structuring optimizations happen
191
+ # expose a copy of the graph before any optimizations that may change the graph occur;
189
192
  # use this graph if you need a reference of exact mapping of instructions to AIL statements
190
- self.unmodified_clinic_graph = clinic.copy_graph()
193
+ self.unoptimized_ail_graph = (
194
+ clinic.unoptimized_graph if clinic.unoptimized_graph is not None else clinic.copy_graph()
195
+ )
191
196
  cond_proc = ConditionProcessor(self.project.arch)
192
197
 
193
198
  clinic.graph = self._run_graph_simplification_passes(
@@ -212,54 +217,61 @@ class Decompiler(Analysis):
212
217
 
213
218
  # save the graph before structuring happens (for AIL view)
214
219
  clinic.cc_graph = remove_labels(clinic.copy_graph())
215
- self._update_progress(75.0, text="Structuring code")
216
-
217
- # structure it
218
- rs = self.project.analyses[RecursiveStructurer].prep(kb=self.kb)(
219
- ri.region,
220
- cond_proc=cond_proc,
221
- func=self.func,
222
- **self._recursive_structurer_params,
223
- )
224
- self._update_progress(80.0, text="Simplifying regions")
225
220
 
226
- # simplify it
227
- s = self.project.analyses.RegionSimplifier(
228
- self.func,
229
- rs.result,
230
- kb=self.kb,
231
- variable_kb=clinic.variable_kb,
232
- **self.options_to_params(self.options_by_class["region_simplifier"]),
233
- )
234
- seq_node = s.result
235
- seq_node = self._run_post_structuring_simplification_passes(
236
- seq_node, binop_operators=cache.binop_operators, goto_manager=s.goto_manager, graph=clinic.graph
237
- )
238
- self._update_progress(85.0, text="Generating code")
221
+ codegen = None
222
+ seq_node = None
223
+ # in the event that the decompiler is used without code generation as the target, we should avoid all
224
+ # heavy analysis that is used only for the purpose of code generation
225
+ if self._generate_code:
226
+ self._update_progress(75.0, text="Structuring code")
227
+
228
+ # structure it
229
+ rs = self.project.analyses[RecursiveStructurer].prep(kb=self.kb)(
230
+ ri.region,
231
+ cond_proc=cond_proc,
232
+ func=self.func,
233
+ **self._recursive_structurer_params,
234
+ )
235
+ self._update_progress(80.0, text="Simplifying regions")
236
+
237
+ # simplify it
238
+ s = self.project.analyses.RegionSimplifier(
239
+ self.func,
240
+ rs.result,
241
+ kb=self.kb,
242
+ variable_kb=clinic.variable_kb,
243
+ **self.options_to_params(self.options_by_class["region_simplifier"]),
244
+ )
245
+ seq_node = s.result
246
+ seq_node = self._run_post_structuring_simplification_passes(
247
+ seq_node, binop_operators=cache.binop_operators, goto_manager=s.goto_manager, graph=clinic.graph
248
+ )
249
+ # update memory data
250
+ if self._cfg is not None and self._update_memory_data:
251
+ self.find_data_references_and_update_memory_data(seq_node)
239
252
 
240
- # update memory data
241
- if self._cfg is not None and self._update_memory_data:
242
- self.find_data_references_and_update_memory_data(seq_node)
253
+ self._update_progress(85.0, text="Generating code")
254
+ codegen = self.project.analyses.StructuredCodeGenerator(
255
+ self.func,
256
+ seq_node,
257
+ cfg=self._cfg,
258
+ ail_graph=clinic.graph,
259
+ flavor=self._flavor,
260
+ func_args=clinic.arg_list,
261
+ kb=self.kb,
262
+ variable_kb=clinic.variable_kb,
263
+ expr_comments=old_codegen.expr_comments if old_codegen is not None else None,
264
+ stmt_comments=old_codegen.stmt_comments if old_codegen is not None else None,
265
+ const_formats=old_codegen.const_formats if old_codegen is not None else None,
266
+ externs=clinic.externs,
267
+ **self.options_to_params(self.options_by_class["codegen"]),
268
+ )
243
269
 
244
- codegen = self.project.analyses.StructuredCodeGenerator(
245
- self.func,
246
- seq_node,
247
- cfg=self._cfg,
248
- ail_graph=clinic.graph,
249
- flavor=self._flavor,
250
- func_args=clinic.arg_list,
251
- kb=self.kb,
252
- variable_kb=clinic.variable_kb,
253
- expr_comments=old_codegen.expr_comments if old_codegen is not None else None,
254
- stmt_comments=old_codegen.stmt_comments if old_codegen is not None else None,
255
- const_formats=old_codegen.const_formats if old_codegen is not None else None,
256
- externs=clinic.externs,
257
- **self.options_to_params(self.options_by_class["codegen"]),
258
- )
259
270
  self._update_progress(90.0, text="Finishing up")
260
-
261
271
  self.seq_node = seq_node
262
272
  self.codegen = codegen
273
+ # save a copy of the AIL graph that is optimized but not modified by region identification
274
+ self.ail_graph = clinic.cc_graph
263
275
  self.cache.codegen = codegen
264
276
  self.cache.clinic = self.clinic
265
277
 
@@ -13,7 +13,8 @@ from .lowered_switch_simplifier import LoweredSwitchSimplifier
13
13
  from .multi_simplifier import MultiSimplifier
14
14
  from .div_simplifier import DivSimplifier
15
15
  from .mod_simplifier import ModSimplifier
16
- from .return_duplicator import ReturnDuplicator
16
+ from .return_duplicator_low import ReturnDuplicatorLow
17
+ from .return_duplicator_high import ReturnDuplicatorHigh
17
18
  from .const_derefs import ConstantDereferencesSimplifier
18
19
  from .register_save_area_simplifier import RegisterSaveAreaSimplifier
19
20
  from .ret_addr_save_simplifier import RetAddrSaveSimplifier
@@ -40,9 +41,10 @@ _all_optimization_passes = [
40
41
  (ITERegionConverter, True),
41
42
  (ITEExprConverter, True),
42
43
  (ExprOpSwapper, True),
44
+ (ReturnDuplicatorHigh, True),
43
45
  (SwitchDefaultCaseDuplicator, True),
44
- (ReturnDuplicator, True),
45
46
  (LoweredSwitchSimplifier, False),
47
+ (ReturnDuplicatorLow, True),
46
48
  (ReturnDeduplicator, True),
47
49
  (CodeMotionOptimization, True),
48
50
  (CrossJumpReverter, True),
@@ -50,7 +52,7 @@ _all_optimization_passes = [
50
52
  ]
51
53
 
52
54
  # these passes may duplicate code to remove gotos or improve the structure of the graph
53
- DUPLICATING_OPTS = [ReturnDuplicator, CrossJumpReverter]
55
+ DUPLICATING_OPTS = [ReturnDuplicatorLow, ReturnDuplicatorHigh, CrossJumpReverter]
54
56
  # these passes may destroy blocks by merging them into semantically equivalent blocks
55
57
  CONDENSING_OPTS = [CodeMotionOptimization, ReturnDeduplicator]
56
58
 
@@ -74,7 +76,9 @@ def get_optimization_passes(arch, platform):
74
76
  return passes
75
77
 
76
78
 
77
- def get_default_optimization_passes(arch: Union[Arch, str], platform: Optional[str]):
79
+ def get_default_optimization_passes(
80
+ arch: Union[Arch, str], platform: Optional[str], enable_opts=None, disable_opts=None
81
+ ):
78
82
  if isinstance(arch, Arch):
79
83
  arch = arch.name
80
84
 
@@ -84,8 +88,10 @@ def get_default_optimization_passes(arch: Union[Arch, str], platform: Optional[s
84
88
  platform = "windows" # sigh
85
89
 
86
90
  passes = []
91
+ enable_opts = enable_opts or []
92
+ disable_opts = disable_opts or []
87
93
  for pass_, default in _all_optimization_passes:
88
- if not default:
94
+ if (not default and pass_ not in enable_opts) or pass_ in disable_opts:
89
95
  continue
90
96
  if (pass_.ARCHES is None or arch in pass_.ARCHES) and (
91
97
  pass_.PLATFORMS is None or platform is None or platform in pass_.PLATFORMS