angr 9.2.96__py3-none-win_amd64.whl → 9.2.98__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (52) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +14 -1
  3. angr/analyses/cfg/indirect_jump_resolvers/propagator_utils.py +10 -6
  4. angr/analyses/complete_calling_conventions.py +27 -11
  5. angr/analyses/decompiler/ail_simplifier.py +20 -8
  6. angr/analyses/decompiler/condition_processor.py +2 -0
  7. angr/analyses/decompiler/optimization_passes/__init__.py +2 -0
  8. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +380 -0
  9. angr/analyses/decompiler/optimization_passes/x86_gcc_getpc_simplifier.py +4 -1
  10. angr/analyses/decompiler/peephole_optimizations/__init__.py +1 -0
  11. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +71 -3
  12. angr/analyses/decompiler/peephole_optimizations/inlined_wstrcpy.py +162 -0
  13. angr/analyses/decompiler/structured_codegen/__init__.py +1 -1
  14. angr/analyses/decompiler/structured_codegen/c.py +72 -99
  15. angr/analyses/decompiler/utils.py +5 -1
  16. angr/analyses/find_objects_static.py +15 -10
  17. angr/analyses/forward_analysis/forward_analysis.py +15 -1
  18. angr/analyses/propagator/engine_ail.py +2 -0
  19. angr/analyses/propagator/engine_vex.py +15 -0
  20. angr/analyses/propagator/propagator.py +6 -3
  21. angr/analyses/reaching_definitions/engine_vex.py +6 -0
  22. angr/analyses/reaching_definitions/rd_state.py +14 -1
  23. angr/analyses/reaching_definitions/reaching_definitions.py +19 -2
  24. angr/analyses/variable_recovery/engine_ail.py +6 -6
  25. angr/analyses/variable_recovery/engine_vex.py +6 -0
  26. angr/analyses/variable_recovery/irsb_scanner.py +12 -0
  27. angr/analyses/variable_recovery/variable_recovery_base.py +4 -1
  28. angr/engines/light/engine.py +134 -16
  29. angr/knowledge_plugins/functions/function.py +4 -0
  30. angr/knowledge_plugins/key_definitions/environment.py +11 -0
  31. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -8
  32. angr/knowledge_plugins/key_definitions/uses.py +18 -4
  33. angr/knowledge_plugins/propagations/states.py +22 -3
  34. angr/knowledge_plugins/types.py +6 -0
  35. angr/knowledge_plugins/variables/variable_manager.py +31 -5
  36. angr/lib/angr_native.dll +0 -0
  37. angr/simos/simos.py +2 -0
  38. angr/storage/memory_mixins/__init__.py +3 -0
  39. angr/storage/memory_mixins/multi_value_merger_mixin.py +22 -11
  40. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +20 -2
  41. angr/storage/memory_mixins/paged_memory/pages/list_page.py +20 -5
  42. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +82 -44
  43. angr/storage/memory_mixins/simple_interface_mixin.py +4 -0
  44. angr/utils/cowdict.py +4 -2
  45. angr/utils/funcid.py +6 -0
  46. angr/utils/mp.py +1 -1
  47. {angr-9.2.96.dist-info → angr-9.2.98.dist-info}/METADATA +6 -6
  48. {angr-9.2.96.dist-info → angr-9.2.98.dist-info}/RECORD +52 -50
  49. {angr-9.2.96.dist-info → angr-9.2.98.dist-info}/LICENSE +0 -0
  50. {angr-9.2.96.dist-info → angr-9.2.98.dist-info}/WHEEL +0 -0
  51. {angr-9.2.96.dist-info → angr-9.2.98.dist-info}/entry_points.txt +0 -0
  52. {angr-9.2.96.dist-info → angr-9.2.98.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # pylint: disable=wildcard-import
2
2
  # pylint: disable=wrong-import-position
3
3
 
4
- __version__ = "9.2.96"
4
+ __version__ = "9.2.98"
5
5
 
6
6
  if bytes is str:
7
7
  raise Exception(
@@ -8,7 +8,7 @@ from sortedcontainers import SortedDict
8
8
 
9
9
  import pyvex
10
10
  from claripy.utils.orderedset import OrderedSet
11
- from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec
11
+ from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec, XBE
12
12
  from cle.backends import NamedRegion
13
13
  import archinfo
14
14
  from archinfo.arch_soot import SootAddressDescriptor
@@ -778,6 +778,17 @@ class CFGBase(Analysis):
778
778
  tpl = (section.min_addr, section.max_addr + 1)
779
779
  memory_regions.append(tpl)
780
780
 
781
+ elif isinstance(b, XBE):
782
+ # some XBE files will mark the data sections as executable
783
+ for section in b.sections:
784
+ if (
785
+ section.is_executable
786
+ and not section.is_writable
787
+ and section.name not in {".data", ".rdata", ".rodata"}
788
+ ):
789
+ tpl = (section.min_addr, section.max_addr + 1)
790
+ memory_regions.append(tpl)
791
+
781
792
  elif isinstance(b, MachO):
782
793
  if b.segments:
783
794
  # Get all executable segments
@@ -797,9 +808,11 @@ class CFGBase(Analysis):
797
808
  # a blob is entirely executable
798
809
  tpl = (b.min_addr, b.max_addr + 1)
799
810
  memory_regions.append(tpl)
811
+
800
812
  elif isinstance(b, NamedRegion):
801
813
  # NamedRegions have no content! Ignore
802
814
  pass
815
+
803
816
  elif isinstance(b, self._cle_pseudo_objects):
804
817
  pass
805
818
 
@@ -13,10 +13,14 @@ class PropagatorLoadCallback:
13
13
  # only allow loading if the address falls into a read-only region
14
14
  if isinstance(addr, claripy.ast.BV) and addr.op == "BVV":
15
15
  addr_v = addr.args[0]
16
- section = self.project.loader.find_section_containing(addr_v)
17
- if section is not None:
18
- return section.is_readable and not section.is_writable
19
- segment = self.project.loader.find_segment_containing(addr_v)
20
- if segment is not None:
21
- return segment.is_readable and not segment.is_writable
16
+ elif isinstance(addr, int):
17
+ addr_v = addr
18
+ else:
19
+ return False
20
+ section = self.project.loader.find_section_containing(addr_v)
21
+ if section is not None:
22
+ return section.is_readable and not section.is_writable
23
+ segment = self.project.loader.find_segment_containing(addr_v)
24
+ if segment is not None:
25
+ return segment.is_readable and not segment.is_writable
22
26
  return False
@@ -1,3 +1,4 @@
1
+ # pylint:disable=import-outside-toplevel
1
2
  from typing import Tuple, Optional, Callable, Iterable, Dict, Set, TYPE_CHECKING
2
3
  import queue
3
4
  import threading
@@ -10,6 +11,7 @@ import networkx
10
11
  import claripy
11
12
 
12
13
  from angr.utils.graph import GraphUtils
14
+ from angr.simos import SimWindows
13
15
  from ..utils.mp import mp_context, Initializer
14
16
  from ..knowledge_plugins.cfg import CFGModel
15
17
  from . import Analysis, register_analysis, VariableRecoveryFast, CallingConventionAnalysis
@@ -88,11 +90,13 @@ class CompleteCallingConventionsAnalysis(Analysis):
88
90
  self._results = []
89
91
  if workers > 0:
90
92
  self._remaining_funcs = _mp_context.Value("i", 0)
91
- self._func_queue = _mp_context.Queue()
92
93
  self._results = _mp_context.Queue()
94
+ self._results_lock = _mp_context.Lock()
95
+ self._func_queue = _mp_context.Queue()
93
96
  self._func_queue_lock = _mp_context.Lock()
94
97
  else:
95
98
  self._remaining_funcs = None # not needed
99
+ self._results_lock = None # not needed
96
100
  self._func_queue = None # not needed
97
101
  self._func_queue_lock = threading.Lock()
98
102
 
@@ -205,9 +209,7 @@ class CompleteCallingConventionsAnalysis(Analysis):
205
209
  dependents[callee].add(func_addr)
206
210
 
207
211
  # enqueue all leaf functions
208
- for func_addr in list(
209
- k for k in depends_on if not depends_on[k]
210
- ): # pylint:disable=consider-using-dict-items
212
+ for func_addr in [k for k in depends_on if not depends_on[k]]: # pylint:disable=consider-using-dict-items
211
213
  self._func_queue.put((func_addr, None))
212
214
  del depends_on[func_addr]
213
215
 
@@ -215,11 +217,17 @@ class CompleteCallingConventionsAnalysis(Analysis):
215
217
  cc_callback = self._cc_callback
216
218
  self._cc_callback = None
217
219
 
220
+ if self.project.simos is not None and isinstance(self.project.simos, SimWindows):
221
+ # delayed import
222
+ from angr.procedures.definitions import load_win32api_definitions
223
+
224
+ Initializer.get().register(load_win32api_definitions)
225
+
218
226
  # spawn workers to perform the analysis
219
227
  with self._func_queue_lock:
220
228
  procs = [
221
- _mp_context.Process(target=self._worker_routine, args=(Initializer.get(),), daemon=True)
222
- for _ in range(self._workers)
229
+ _mp_context.Process(target=self._worker_routine, args=(worker_id, Initializer.get()), daemon=True)
230
+ for worker_id in range(self._workers)
223
231
  ]
224
232
  for proc_idx, proc in enumerate(procs):
225
233
  self._update_progress(0, text=f"Spawning worker {proc_idx}...")
@@ -231,7 +239,13 @@ class CompleteCallingConventionsAnalysis(Analysis):
231
239
  self._update_progress(0)
232
240
  idx = 0
233
241
  while idx < total_funcs:
234
- func_addr, cc, proto, proto_libname, varman = self._results.get(True)
242
+ try:
243
+ with self._results_lock:
244
+ func_addr, cc, proto, proto_libname, varman = self._results.get(True, timeout=0.01)
245
+ except queue.Empty:
246
+ time.sleep(0.1)
247
+ continue
248
+
235
249
  func = self.kb.functions.get_by_addr(func_addr)
236
250
  if cc is not None or proto is not None:
237
251
  func.calling_convention = cc
@@ -260,13 +274,14 @@ class CompleteCallingConventionsAnalysis(Analysis):
260
274
  depends_on[dependent].discard(func_addr)
261
275
  if not depends_on[dependent]:
262
276
  callee_prototypes = self._get_callees_cc_prototypes(dependent)
263
- self._func_queue.put((dependent, callee_prototypes))
277
+ with self._func_queue_lock:
278
+ self._func_queue.put((dependent, callee_prototypes))
264
279
  del depends_on[dependent]
265
280
 
266
281
  for proc in procs:
267
282
  proc.join()
268
283
 
269
- def _worker_routine(self, initializer: Initializer):
284
+ def _worker_routine(self, worker_id: int, initializer: Initializer):
270
285
  initializer.initialize()
271
286
  idx = 0
272
287
  while self._remaining_funcs.value > 0:
@@ -293,9 +308,10 @@ class CompleteCallingConventionsAnalysis(Analysis):
293
308
  try:
294
309
  cc, proto, proto_libname, varman = self._analyze_core(func_addr)
295
310
  except Exception: # pylint:disable=broad-except
296
- _l.error("Exception occurred during _analyze_core().", exc_info=True)
311
+ _l.error("Worker %d: Exception occurred during _analyze_core().", worker_id, exc_info=True)
297
312
  cc, proto, proto_libname, varman = None, None, None, None
298
- self._results.put((func_addr, cc, proto, proto_libname, varman))
313
+ with self._results_lock:
314
+ self._results.put((func_addr, cc, proto, proto_libname, varman))
299
315
 
300
316
  def _analyze_core(
301
317
  self, func_addr: int
@@ -1,3 +1,4 @@
1
+ # pylint:disable=too-many-boolean-expressions
1
2
  from typing import Set, Dict, List, Tuple, Any, Optional, TYPE_CHECKING
2
3
  from collections import defaultdict
3
4
  import logging
@@ -183,6 +184,7 @@ class AILSimplifier(Analysis):
183
184
  observe_all=False,
184
185
  use_callee_saved_regs_at_return=self._use_callee_saved_regs_at_return,
185
186
  track_tmps=True,
187
+ element_limit=1,
186
188
  ).model
187
189
  self._reaching_definitions = rd
188
190
  return rd
@@ -504,7 +506,9 @@ class AILSimplifier(Analysis):
504
506
 
505
507
  first_op = walker.operations[0]
506
508
  if isinstance(first_op, Convert):
507
- return first_op.to_bits // self.project.arch.byte_width, ("convert", (first_op,))
509
+ if first_op.to_bits >= self.project.arch.byte_width:
510
+ # we need at least one byte!
511
+ return first_op.to_bits // self.project.arch.byte_width, ("convert", (first_op,))
508
512
  if isinstance(first_op, BinaryOp):
509
513
  second_op = None
510
514
  if len(walker.operations) >= 2:
@@ -526,6 +530,7 @@ class AILSimplifier(Analysis):
526
530
  and first_op.op not in {"Shr", "Sar"}
527
531
  and isinstance(second_op, Convert)
528
532
  and second_op.from_bits == expr.bits
533
+ and second_op.to_bits >= self.project.arch.byte_width # we need at least one byte!
529
534
  ):
530
535
  return min(expr.bits, second_op.to_bits) // self.project.arch.byte_width, (
531
536
  "binop-convert",
@@ -721,13 +726,13 @@ class AILSimplifier(Analysis):
721
726
  ):
722
727
  continue
723
728
 
724
- # Make sure the register is never updated across this function
725
- if any(
726
- (def_ != the_def and def_.atom == the_def.atom)
727
- for def_ in rd.all_definitions
728
- if isinstance(def_.atom, atoms.Register) and rd.all_uses.get_uses(def_)
729
- ):
730
- continue
729
+ # Make sure the register is never updated across this function
730
+ if any(
731
+ (def_ != the_def and def_.atom == the_def.atom)
732
+ for def_ in rd.all_definitions
733
+ if isinstance(def_.atom, atoms.Register) and rd.all_uses.get_uses(def_)
734
+ ):
735
+ continue
731
736
 
732
737
  # find all its uses
733
738
  all_arg_copy_var_uses: Set[Tuple[CodeLocation, Any]] = set(
@@ -1214,6 +1219,13 @@ class AILSimplifier(Analysis):
1214
1219
  continue
1215
1220
 
1216
1221
  uses = rd.all_uses.get_uses(def_)
1222
+ if (
1223
+ isinstance(def_.atom, atoms.Register)
1224
+ and def_.atom.reg_offset in self.project.arch.artificial_registers_offsets
1225
+ ):
1226
+ if len(uses) == 1 and next(iter(uses)) == def_.codeloc:
1227
+ # cc_ndep = amd64g_calculate_condition(..., cc_ndep)
1228
+ uses = set()
1217
1229
 
1218
1230
  if not uses:
1219
1231
  if not isinstance(def_.codeloc, ExternalCodeLocation):
@@ -766,6 +766,8 @@ class ConditionProcessor:
766
766
  var = claripy.BoolV(condition.value)
767
767
  else:
768
768
  var = claripy.BVV(condition.value, condition.bits)
769
+ if isinstance(var, claripy.Bits) and var.size() == 1:
770
+ var = claripy.true if var.concrete_value == 1 else claripy.false
769
771
  return var
770
772
  elif isinstance(condition, ailment.Expr.Tmp):
771
773
  l.warning("Left-over ailment.Tmp variable %s.", condition)
@@ -25,6 +25,7 @@ from .win_stack_canary_simplifier import WinStackCanarySimplifier
25
25
  from .cross_jump_reverter import CrossJumpReverter
26
26
  from .code_motion import CodeMotionOptimization
27
27
  from .switch_default_case_duplicator import SwitchDefaultCaseDuplicator
28
+ from .inlined_string_transformation_simplifier import InlinedStringTransformationSimplifier
28
29
 
29
30
  # order matters!
30
31
  _all_optimization_passes = [
@@ -49,6 +50,7 @@ _all_optimization_passes = [
49
50
  (CodeMotionOptimization, True),
50
51
  (CrossJumpReverter, True),
51
52
  (FlipBooleanCmp, True),
53
+ (InlinedStringTransformationSimplifier, True),
52
54
  ]
53
55
 
54
56
  # these passes may duplicate code to remove gotos or improve the structure of the graph
@@ -0,0 +1,380 @@
1
+ # pylint:disable=arguments-renamed,too-many-boolean-expressions,no-self-use
2
+ from __future__ import annotations
3
+ from typing import Any, DefaultDict
4
+ from collections import defaultdict
5
+
6
+ from archinfo import Endness
7
+ from ailment.expression import Const, Register, Load, StackBaseOffset, Convert, BinaryOp
8
+ from ailment.statement import Store, ConditionalJump, Jump
9
+ import claripy
10
+
11
+ from angr.engines.light import SimEngineLightAILMixin
12
+ from angr.storage.memory_mixins import (
13
+ SimpleInterfaceMixin,
14
+ DefaultFillerMixin,
15
+ PagedMemoryMixin,
16
+ UltraPagesMixin,
17
+ )
18
+ from angr.code_location import CodeLocation
19
+ from angr.errors import SimMemoryMissingError
20
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
21
+
22
+
23
+ class FasterMemory(
24
+ SimpleInterfaceMixin,
25
+ DefaultFillerMixin,
26
+ UltraPagesMixin,
27
+ PagedMemoryMixin,
28
+ ):
29
+ """
30
+ A fast memory model used in InlinedStringTransformationState.
31
+ """
32
+
33
+
34
+ class InlinedStringTransformationState:
35
+ """
36
+ The abstract state used in InlinedStringTransformationAILEngine.
37
+ """
38
+
39
+ def __init__(self, project):
40
+ self.arch = project.arch
41
+ self.project = project
42
+
43
+ self.registers = FasterMemory(memory_id="reg")
44
+ self.memory = FasterMemory(memory_id="mem")
45
+
46
+ self.registers.set_state(self)
47
+ self.memory.set_state(self)
48
+
49
+ def _get_weakref(self):
50
+ return self
51
+
52
+ def reg_store(self, reg: Register, value: claripy.Bits) -> None:
53
+ self.registers.store(
54
+ reg.reg_offset, value, size=value.size() // self.arch.byte_width, endness=str(self.arch.register_endness)
55
+ )
56
+
57
+ def reg_load(self, reg: Register) -> claripy.Bits | None:
58
+ try:
59
+ return self.registers.load(
60
+ reg.reg_offset, size=reg.size, endness=self.arch.register_endness, fill_missing=False
61
+ )
62
+ except SimMemoryMissingError:
63
+ return None
64
+
65
+ def mem_store(self, addr: int, value: claripy.Bits, endness: str) -> None:
66
+ self.memory.store(addr, value, size=value.size() // self.arch.byte_width, endness=endness)
67
+
68
+ def mem_load(self, addr: int, size: int, endness) -> claripy.Bits | None:
69
+ try:
70
+ return self.memory.load(addr, size=size, endness=str(endness), fill_missing=False)
71
+ except SimMemoryMissingError:
72
+ return None
73
+
74
+
75
+ class InlinedStringTransformationAILEngine(SimEngineLightAILMixin):
76
+ """
77
+ A simple AIL execution engine
78
+ """
79
+
80
+ def __init__(self, project, nodes: dict[int, Any], start: int, end: int, step_limit: int):
81
+ super().__init__()
82
+
83
+ self.arch = project.arch
84
+ self.nodes: dict[int, Any] = nodes
85
+ self.start: int = start
86
+ self.end: int = end
87
+ self.step_limit: int = step_limit
88
+
89
+ self.STACK_BASE = 0x7FFF_FFF0 if self.arch.bits == 32 else 0x7FFF_FFFF_F000
90
+ self.MASK = 0xFFFF_FFFF if self.arch.bits == 32 else 0xFFFF_FFFF_FFFF_FFFF
91
+
92
+ state = InlinedStringTransformationState(project)
93
+ self.stack_accesses: DefaultDict[int, list[tuple[str, CodeLocation, claripy.Bits]]] = defaultdict(list)
94
+ self.finished: bool = False
95
+
96
+ i = 0
97
+ self.pc = self.start
98
+ while i < self.step_limit:
99
+ if self.pc not in self.nodes:
100
+ # jumped to a node that we do not know about
101
+ break
102
+ block = self.nodes[self.pc]
103
+ self._process(state, None, block=block)
104
+ if self.pc is None:
105
+ # not sure where to jump...
106
+ break
107
+ if self.pc == self.end:
108
+ # we reach the end of execution!
109
+ self.finished = True
110
+ break
111
+ i += 1
112
+
113
+ def _process_address(self, addr: Const | StackBaseOffset) -> tuple[int, str] | None:
114
+ if isinstance(addr, Const):
115
+ return addr.value, "mem"
116
+ if isinstance(addr, StackBaseOffset):
117
+ return (addr.offset + self.STACK_BASE) & self.MASK, "stack"
118
+ if isinstance(addr, BinaryOp) and isinstance(addr.operands[0], StackBaseOffset):
119
+ v0_and_type = self._process_address(addr.operands[0])
120
+ if v0_and_type is not None:
121
+ v0 = v0_and_type[0]
122
+ v1 = self._expr(addr.operands[1])
123
+ if isinstance(v1, claripy.Bits) and v1.concrete:
124
+ return (v0 + v1.concrete_value) & self.MASK, "stack"
125
+ return None
126
+
127
+ def _handle_Assignment(self, stmt):
128
+ if isinstance(stmt.dst, Register):
129
+ val = self._expr(stmt.src)
130
+ if isinstance(val, claripy.Bits):
131
+ self.state.reg_store(stmt.dst, val)
132
+
133
+ def _handle_Store(self, stmt):
134
+ addr_and_type = self._process_address(stmt.addr)
135
+ if addr_and_type is not None:
136
+ addr, addr_type = addr_and_type
137
+ val = self._expr(stmt.data)
138
+ if isinstance(val, claripy.ast.BV):
139
+ self.state.mem_store(addr, val, stmt.endness)
140
+ # log it
141
+ if addr_type == "stack":
142
+ for i in range(0, val.size() // self.arch.byte_width):
143
+ byte_off = i
144
+ if self.arch.memory_endness == Endness.LE:
145
+ byte_off = val.size() // self.arch.byte_width - i - 1
146
+ self.stack_accesses[addr + i].append(("store", self._codeloc(), val.get_byte(byte_off)))
147
+
148
+ def _handle_Jump(self, stmt):
149
+ if isinstance(stmt.target, Const):
150
+ self.pc = stmt.target.value
151
+ else:
152
+ self.pc = None
153
+
154
+ def _handle_ConditionalJump(self, stmt):
155
+ self.pc = None
156
+ if isinstance(stmt.true_target, Const) and isinstance(stmt.false_target, Const):
157
+ cond = self._expr(stmt.condition)
158
+ if cond is not None:
159
+ if isinstance(cond, claripy.Bits) and cond.concrete_value == 1:
160
+ self.pc = stmt.true_target.value
161
+ elif isinstance(cond, claripy.Bits) and cond.concrete_value == 0:
162
+ self.pc = stmt.false_target.value
163
+
164
+ def _handle_Const(self, expr):
165
+ return claripy.BVV(expr.value, expr.bits)
166
+
167
+ def _handle_Load(self, expr: Load):
168
+ addr_and_type = self._process_address(expr.addr)
169
+ if addr_and_type is not None:
170
+ addr, addr_type = addr_and_type
171
+ v = self.state.mem_load(addr, expr.size, expr.endness)
172
+ # log it
173
+ if addr_type == "stack" and isinstance(v, claripy.ast.BV):
174
+ for i in range(0, expr.size):
175
+ byte_off = i
176
+ if self.arch.memory_endness == Endness.LE:
177
+ byte_off = expr.size - i - 1
178
+ self.stack_accesses[addr + i].append(("load", self._codeloc(), v.get_byte(byte_off)))
179
+ return v
180
+ return None
181
+
182
+ def _handle_Register(self, expr: Register):
183
+ return self.state.reg_load(expr)
184
+
185
+ def _handle_Convert(self, expr: Convert):
186
+ v = self._expr(expr.operand)
187
+ if isinstance(v, claripy.Bits):
188
+ if expr.to_bits > expr.from_bits:
189
+ if not expr.is_signed:
190
+ return claripy.ZeroExt(expr.to_bits - expr.from_bits, v)
191
+ return claripy.SignExt(expr.to_bits - expr.from_bits, v)
192
+ elif expr.to_bits < expr.from_bits:
193
+ return claripy.Extract(expr.to_bits - 1, 0, v)
194
+ else:
195
+ return v
196
+ return None
197
+
198
+ def _handle_CmpEQ(self, expr):
199
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
200
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
201
+ return claripy.BVV(1, 1) if op0.concrete_value == op1.concrete_value else claripy.BVV(0, 1)
202
+ return None
203
+
204
+ def _handle_CmpNE(self, expr):
205
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
206
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
207
+ return claripy.BVV(1, 1) if op0.concrete_value != op1.concrete_value else claripy.BVV(0, 1)
208
+ return None
209
+
210
+ def _handle_CmpLT(self, expr):
211
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
212
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
213
+ return claripy.BVV(1, 1) if op0.concrete_value < op1.concrete_value else claripy.BVV(0, 1)
214
+ return None
215
+
216
+ def _handle_CmpLE(self, expr):
217
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
218
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
219
+ return claripy.BVV(1, 1) if op0.concrete_value <= op1.concrete_value else claripy.BVV(0, 1)
220
+ return None
221
+
222
+ def _handle_CmpGT(self, expr):
223
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
224
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
225
+ return claripy.BVV(1, 1) if op0.concrete_value > op1.concrete_value else claripy.BVV(0, 1)
226
+ return None
227
+
228
+ def _handle_CmpGE(self, expr):
229
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
230
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
231
+ return claripy.BVV(1, 1) if op0.concrete_value >= op1.concrete_value else claripy.BVV(0, 1)
232
+ return None
233
+
234
+
235
+ class InlineStringTransformationDescriptor:
236
+ """
237
+ Describes an instance of inline string transformation.
238
+ """
239
+
240
+ def __init__(self, store_block, loop_body, stack_accesses, beginning_stack_offset):
241
+ self.store_block = store_block
242
+ self.loop_body = loop_body
243
+ self.stack_accesses = stack_accesses
244
+ self.beginning_stack_offset = beginning_stack_offset
245
+
246
+
247
+ class InlinedStringTransformationSimplifier(OptimizationPass):
248
+ """
249
+ Simplifies inlined string transformation routines.
250
+ """
251
+
252
+ ARCHES = None
253
+ PLATFORMS = None
254
+ STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
255
+ NAME = "Simplify string transformations"
256
+ DESCRIPTION = "Simplify string transformations that are commonly used in obfuscated functions."
257
+
258
+ def __init__(self, func, **kwargs):
259
+ super().__init__(func, **kwargs)
260
+ self.analyze()
261
+
262
+ def _check(self):
263
+ string_transformation_descs = self._find_string_transformation_loops()
264
+
265
+ return bool(string_transformation_descs), {"descs": string_transformation_descs}
266
+
267
+ def _analyze(self, cache=None):
268
+ if not cache or "descs" not in cache:
269
+ return
270
+
271
+ for desc in cache["descs"]:
272
+ desc: InlineStringTransformationDescriptor
273
+
274
+ # remove the original statements
275
+ skip_stmt_indices = set()
276
+ for stack_accesses in desc.stack_accesses:
277
+ # the first element is the initial storing statement
278
+ codeloc = stack_accesses[0][1]
279
+ assert codeloc.block_addr == desc.store_block.addr
280
+ skip_stmt_indices.add(codeloc.stmt_idx)
281
+ new_statements = [
282
+ stmt for idx, stmt in enumerate(desc.store_block.statements) if idx not in skip_stmt_indices
283
+ ]
284
+
285
+ # add new statements
286
+ store_statements = []
287
+ for off, stack_accesses in enumerate(desc.stack_accesses):
288
+ # the last element is the final storing statement
289
+ stack_addr = StackBaseOffset(None, self.project.arch.bits, desc.beginning_stack_offset + off)
290
+ new_value_ast = stack_accesses[-1][2]
291
+ new_value = Const(None, None, new_value_ast.concrete_value, self.project.arch.byte_width)
292
+ stmt = Store(
293
+ None,
294
+ stack_addr,
295
+ new_value,
296
+ 1,
297
+ "Iend_LE",
298
+ ins_addr=desc.store_block.addr + desc.store_block.original_size - 1,
299
+ )
300
+ store_statements.append(stmt)
301
+ if new_statements and isinstance(new_statements[-1], (ConditionalJump, Jump)):
302
+ new_statements = new_statements[:-1] + store_statements + new_statements[-1:]
303
+ else:
304
+ new_statements += store_statements
305
+
306
+ new_store_block = desc.store_block.copy(statements=new_statements)
307
+ self._update_block(desc.store_block, new_store_block)
308
+
309
+ # remote the loop node
310
+ # since the loop node has exactly one external predecessor and one external successor, we can get rid of it
311
+ pred = next(iter(nn for nn in self.out_graph.predecessors(desc.loop_body) if nn is not desc.loop_body))
312
+ succ = next(iter(nn for nn in self.out_graph.successors(desc.loop_body) if nn is not desc.loop_body))
313
+
314
+ self.out_graph.remove_node(desc.loop_body)
315
+ self.out_graph.add_edge(pred, succ)
316
+
317
+ if pred.statements and isinstance(pred.statements[-1], ConditionalJump):
318
+ pred.statements[-1] = Jump(
319
+ None,
320
+ Const(None, None, succ.addr, self.project.arch.bits),
321
+ succ.idx,
322
+ **pred.statements[-1].tags,
323
+ )
324
+
325
+ def _find_string_transformation_loops(self):
326
+ # find self loops
327
+ self_loops = []
328
+ for node in self._graph.nodes:
329
+ preds = list(self._graph.predecessors(node))
330
+ succs = list(self._graph.successors(node))
331
+ if len(preds) == 2 and len(succs) == 2 and node in preds and node in succs:
332
+ pred = next(iter(nn for nn in preds if nn is not node))
333
+ succ = next(iter(nn for nn in succs if nn is not node))
334
+ if (
335
+ self._graph.out_degree[pred] == 1
336
+ and self._graph.in_degree[succ] == 1
337
+ or self._graph.out_degree[pred] == 2
338
+ and self._graph.in_degree[succ] == 2
339
+ and self._graph.has_edge(pred, succ)
340
+ ):
341
+ # found it
342
+ self_loops.append(node)
343
+
344
+ if not self_loops:
345
+ return []
346
+
347
+ descs = []
348
+ for loop_node in self_loops:
349
+ pred = next(iter(nn for nn in self._graph.predecessors(loop_node) if nn is not loop_node))
350
+ succ = next(iter(nn for nn in self._graph.successors(loop_node) if nn is not loop_node))
351
+ engine = InlinedStringTransformationAILEngine(
352
+ self.project, {pred.addr: pred, loop_node.addr: loop_node}, pred.addr, succ.addr, 1024
353
+ )
354
+ if engine.finished:
355
+ # find the longest slide where the stack accesses are like the following:
356
+ # "store", code_location_a, value_a
357
+ # "load", code_location_b, value_a
358
+ # "store", code_location_b, value_b
359
+ # where value_a and value_b may be the same
360
+ candidate_stack_addrs = []
361
+ for stack_addr in sorted(engine.stack_accesses.keys()):
362
+ stack_accesses = engine.stack_accesses[stack_addr]
363
+ if len(stack_accesses) == 3:
364
+ item0, item1, item2 = stack_accesses
365
+ if item0[0] == "store" and item1[0] == "load" and item2[0] == "store":
366
+ if item0[1] != item1[1] and item1[1] == item2[1]:
367
+ if item0[2] is item1[2]:
368
+ # found one!
369
+ candidate_stack_addrs.append(stack_addr)
370
+
371
+ if (
372
+ len(candidate_stack_addrs) >= 2
373
+ and candidate_stack_addrs[-1] == candidate_stack_addrs[0] + len(candidate_stack_addrs) - 1
374
+ ):
375
+ filtered_stack_accesses = [engine.stack_accesses[a] for a in candidate_stack_addrs]
376
+ stack_offset = candidate_stack_addrs[0] - engine.STACK_BASE
377
+ info = InlineStringTransformationDescriptor(pred, loop_node, filtered_stack_accesses, stack_offset)
378
+ descs.append(info)
379
+
380
+ return descs
@@ -76,7 +76,10 @@ class X86GccGetPcSimplifier(OptimizationPass):
76
76
  and isinstance(block.statements[-1].target, ailment.Expr.Const)
77
77
  ):
78
78
  call_func_addr = block.statements[-1].target.value
79
- call_func = self.kb.functions.get_by_addr(call_func_addr)
79
+ try:
80
+ call_func = self.kb.functions.get_by_addr(call_func_addr)
81
+ except KeyError:
82
+ continue
80
83
  if "get_pc" in call_func.info:
81
84
  results.append(
82
85
  (key, len(block.statements) - 1, call_func.info["get_pc"], block.addr + block.original_size),
@@ -42,6 +42,7 @@ from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalC
42
42
  from .rol_ror import RolRorRewriter
43
43
  from .inlined_strcpy import InlinedStrcpy
44
44
  from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
45
+ from .inlined_wstrcpy import InlinedWstrcpy
45
46
 
46
47
  from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase, PeepholeOptimizationMultiStmtBase
47
48