angr 9.2.97__py3-none-win_amd64.whl → 9.2.99__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (33) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +14 -1
  3. angr/analyses/cfg/cfg_fast.py +3 -3
  4. angr/analyses/cfg/indirect_jump_resolvers/propagator_utils.py +10 -6
  5. angr/analyses/decompiler/clinic.py +2 -40
  6. angr/analyses/decompiler/optimization_passes/__init__.py +2 -0
  7. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +380 -0
  8. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +10 -2
  9. angr/analyses/decompiler/optimization_passes/x86_gcc_getpc_simplifier.py +4 -1
  10. angr/analyses/decompiler/peephole_optimizations/__init__.py +1 -0
  11. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +71 -3
  12. angr/analyses/decompiler/peephole_optimizations/inlined_wstrcpy.py +162 -0
  13. angr/analyses/decompiler/region_simplifiers/expr_folding.py +5 -3
  14. angr/analyses/decompiler/return_maker.py +71 -0
  15. angr/analyses/decompiler/structured_codegen/__init__.py +1 -1
  16. angr/analyses/decompiler/structured_codegen/c.py +72 -99
  17. angr/analyses/decompiler/utils.py +5 -1
  18. angr/analyses/propagator/engine_vex.py +15 -0
  19. angr/analyses/reaching_definitions/engine_vex.py +6 -0
  20. angr/analyses/variable_recovery/engine_vex.py +6 -0
  21. angr/analyses/variable_recovery/irsb_scanner.py +12 -0
  22. angr/engines/light/engine.py +126 -15
  23. angr/knowledge_plugins/functions/function.py +4 -0
  24. angr/lib/angr_native.dll +0 -0
  25. angr/storage/memory_mixins/paged_memory/pages/list_page.py +20 -5
  26. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -1
  27. angr/storage/memory_mixins/simple_interface_mixin.py +4 -0
  28. {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/METADATA +6 -6
  29. {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/RECORD +33 -30
  30. {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/LICENSE +0 -0
  31. {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/WHEEL +0 -0
  32. {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/entry_points.txt +0 -0
  33. {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # pylint: disable=wildcard-import
2
2
  # pylint: disable=wrong-import-position
3
3
 
4
- __version__ = "9.2.97"
4
+ __version__ = "9.2.99"
5
5
 
6
6
  if bytes is str:
7
7
  raise Exception(
@@ -8,7 +8,7 @@ from sortedcontainers import SortedDict
8
8
 
9
9
  import pyvex
10
10
  from claripy.utils.orderedset import OrderedSet
11
- from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec
11
+ from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec, XBE
12
12
  from cle.backends import NamedRegion
13
13
  import archinfo
14
14
  from archinfo.arch_soot import SootAddressDescriptor
@@ -778,6 +778,17 @@ class CFGBase(Analysis):
778
778
  tpl = (section.min_addr, section.max_addr + 1)
779
779
  memory_regions.append(tpl)
780
780
 
781
+ elif isinstance(b, XBE):
782
+ # some XBE files will mark the data sections as executable
783
+ for section in b.sections:
784
+ if (
785
+ section.is_executable
786
+ and not section.is_writable
787
+ and section.name not in {".data", ".rdata", ".rodata"}
788
+ ):
789
+ tpl = (section.min_addr, section.max_addr + 1)
790
+ memory_regions.append(tpl)
791
+
781
792
  elif isinstance(b, MachO):
782
793
  if b.segments:
783
794
  # Get all executable segments
@@ -797,9 +808,11 @@ class CFGBase(Analysis):
797
808
  # a blob is entirely executable
798
809
  tpl = (b.min_addr, b.max_addr + 1)
799
810
  memory_regions.append(tpl)
811
+
800
812
  elif isinstance(b, NamedRegion):
801
813
  # NamedRegions have no content! Ignore
802
814
  pass
815
+
803
816
  elif isinstance(b, self._cle_pseudo_objects):
804
817
  pass
805
818
 
@@ -3287,7 +3287,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
3287
3287
 
3288
3288
  removed_nodes = set()
3289
3289
 
3290
- a = None # it always hold the very recent non-removed node
3290
+ a = None # it always holds the very recent non-removed node
3291
3291
  is_arm = is_arm_arch(self.project.arch)
3292
3292
 
3293
3293
  for i in range(len(sorted_nodes)): # pylint:disable=consider-using-enumerate
@@ -3341,7 +3341,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
3341
3341
  # but somehow we thought b is the beginning
3342
3342
  if a.addr + a.size == b.addr + b.size:
3343
3343
  in_edges = len([_ for _, _, data in self.graph.in_edges([b], data=True)])
3344
- if in_edges == 0:
3344
+ if in_edges == 0 and b in self.graph:
3345
3345
  # we use node a to replace node b
3346
3346
  # link all successors of b to a
3347
3347
  for _, dst, data in self.graph.out_edges([b], data=True):
@@ -3360,7 +3360,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
3360
3360
 
3361
3361
  # next case - if b is directly from function prologue detection, or a basic block that is a successor of
3362
3362
  # a wrongly identified basic block, we might be totally misdecoding b
3363
- if b.instruction_addrs[0] not in a.instruction_addrs:
3363
+ if b.instruction_addrs[0] not in a.instruction_addrs and b in self.graph:
3364
3364
  # use a, truncate b
3365
3365
 
3366
3366
  new_b_addr = a.addr + a.size # b starts right after a terminates
@@ -13,10 +13,14 @@ class PropagatorLoadCallback:
13
13
  # only allow loading if the address falls into a read-only region
14
14
  if isinstance(addr, claripy.ast.BV) and addr.op == "BVV":
15
15
  addr_v = addr.args[0]
16
- section = self.project.loader.find_section_containing(addr_v)
17
- if section is not None:
18
- return section.is_readable and not section.is_writable
19
- segment = self.project.loader.find_segment_containing(addr_v)
20
- if segment is not None:
21
- return segment.is_readable and not segment.is_writable
16
+ elif isinstance(addr, int):
17
+ addr_v = addr
18
+ else:
19
+ return False
20
+ section = self.project.loader.find_section_containing(addr_v)
21
+ if section is not None:
22
+ return section.is_readable and not section.is_writable
23
+ segment = self.project.loader.find_segment_containing(addr_v)
24
+ if segment is not None:
25
+ return segment.is_readable and not segment.is_writable
22
26
  return False
@@ -33,6 +33,7 @@ from ...procedures.stubs.UnresolvableJumpTarget import UnresolvableJumpTarget
33
33
  from .. import Analysis, register_analysis
34
34
  from ..cfg.cfg_base import CFGBase
35
35
  from ..reaching_definitions import ReachingDefinitionsAnalysis
36
+ from .return_maker import ReturnMaker
36
37
  from .ailgraph_walker import AILGraphWalker, RemoveNodeNotice
37
38
  from .optimization_passes import (
38
39
  get_default_optimization_passes,
@@ -1054,46 +1055,7 @@ class Clinic(Analysis):
1054
1055
  # unknown calling convention. cannot do much about return expressions.
1055
1056
  return ail_graph
1056
1057
 
1057
- # Block walker
1058
-
1059
- def _handle_Return(
1060
- stmt_idx: int, stmt: ailment.Stmt.Return, block: Optional[ailment.Block]
1061
- ): # pylint:disable=unused-argument
1062
- if (
1063
- block is not None
1064
- and not stmt.ret_exprs
1065
- and self.function.prototype is not None
1066
- and self.function.prototype.returnty is not None
1067
- and type(self.function.prototype.returnty) is not SimTypeBottom
1068
- ):
1069
- new_stmt = stmt.copy()
1070
- ret_val = self.function.calling_convention.return_val(self.function.prototype.returnty)
1071
- if isinstance(ret_val, SimRegArg):
1072
- reg = self.project.arch.registers[ret_val.reg_name]
1073
- new_stmt.ret_exprs.append(
1074
- ailment.Expr.Register(
1075
- self._next_atom(),
1076
- None,
1077
- reg[0],
1078
- ret_val.size * self.project.arch.byte_width,
1079
- reg_name=self.project.arch.translate_register_name(reg[0], ret_val.size),
1080
- )
1081
- )
1082
- else:
1083
- l.warning("Unsupported type of return expression %s.", type(ret_val))
1084
- block.statements[stmt_idx] = new_stmt
1085
-
1086
- def _handler(block):
1087
- walker = ailment.AILBlockWalker()
1088
- # we don't need to handle any statement besides Returns
1089
- walker.stmt_handlers.clear()
1090
- walker.expr_handlers.clear()
1091
- walker.stmt_handlers[ailment.Stmt.Return] = _handle_Return
1092
- walker.walk(block)
1093
-
1094
- # Graph walker
1095
-
1096
- AILGraphWalker(ail_graph, _handler, replace_nodes=True).walk()
1058
+ ReturnMaker(self._ail_manager, self.project.arch, self.function, ail_graph)
1097
1059
 
1098
1060
  return ail_graph
1099
1061
 
@@ -25,6 +25,7 @@ from .win_stack_canary_simplifier import WinStackCanarySimplifier
25
25
  from .cross_jump_reverter import CrossJumpReverter
26
26
  from .code_motion import CodeMotionOptimization
27
27
  from .switch_default_case_duplicator import SwitchDefaultCaseDuplicator
28
+ from .inlined_string_transformation_simplifier import InlinedStringTransformationSimplifier
28
29
 
29
30
  # order matters!
30
31
  _all_optimization_passes = [
@@ -49,6 +50,7 @@ _all_optimization_passes = [
49
50
  (CodeMotionOptimization, True),
50
51
  (CrossJumpReverter, True),
51
52
  (FlipBooleanCmp, True),
53
+ (InlinedStringTransformationSimplifier, True),
52
54
  ]
53
55
 
54
56
  # these passes may duplicate code to remove gotos or improve the structure of the graph
@@ -0,0 +1,380 @@
1
+ # pylint:disable=arguments-renamed,too-many-boolean-expressions,no-self-use
2
+ from __future__ import annotations
3
+ from typing import Any, DefaultDict
4
+ from collections import defaultdict
5
+
6
+ from archinfo import Endness
7
+ from ailment.expression import Const, Register, Load, StackBaseOffset, Convert, BinaryOp
8
+ from ailment.statement import Store, ConditionalJump, Jump
9
+ import claripy
10
+
11
+ from angr.engines.light import SimEngineLightAILMixin
12
+ from angr.storage.memory_mixins import (
13
+ SimpleInterfaceMixin,
14
+ DefaultFillerMixin,
15
+ PagedMemoryMixin,
16
+ UltraPagesMixin,
17
+ )
18
+ from angr.code_location import CodeLocation
19
+ from angr.errors import SimMemoryMissingError
20
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
21
+
22
+
23
+ class FasterMemory(
24
+ SimpleInterfaceMixin,
25
+ DefaultFillerMixin,
26
+ UltraPagesMixin,
27
+ PagedMemoryMixin,
28
+ ):
29
+ """
30
+ A fast memory model used in InlinedStringTransformationState.
31
+ """
32
+
33
+
34
+ class InlinedStringTransformationState:
35
+ """
36
+ The abstract state used in InlinedStringTransformationAILEngine.
37
+ """
38
+
39
+ def __init__(self, project):
40
+ self.arch = project.arch
41
+ self.project = project
42
+
43
+ self.registers = FasterMemory(memory_id="reg")
44
+ self.memory = FasterMemory(memory_id="mem")
45
+
46
+ self.registers.set_state(self)
47
+ self.memory.set_state(self)
48
+
49
+ def _get_weakref(self):
50
+ return self
51
+
52
+ def reg_store(self, reg: Register, value: claripy.Bits) -> None:
53
+ self.registers.store(
54
+ reg.reg_offset, value, size=value.size() // self.arch.byte_width, endness=str(self.arch.register_endness)
55
+ )
56
+
57
+ def reg_load(self, reg: Register) -> claripy.Bits | None:
58
+ try:
59
+ return self.registers.load(
60
+ reg.reg_offset, size=reg.size, endness=self.arch.register_endness, fill_missing=False
61
+ )
62
+ except SimMemoryMissingError:
63
+ return None
64
+
65
+ def mem_store(self, addr: int, value: claripy.Bits, endness: str) -> None:
66
+ self.memory.store(addr, value, size=value.size() // self.arch.byte_width, endness=endness)
67
+
68
+ def mem_load(self, addr: int, size: int, endness) -> claripy.Bits | None:
69
+ try:
70
+ return self.memory.load(addr, size=size, endness=str(endness), fill_missing=False)
71
+ except SimMemoryMissingError:
72
+ return None
73
+
74
+
75
+ class InlinedStringTransformationAILEngine(SimEngineLightAILMixin):
76
+ """
77
+ A simple AIL execution engine
78
+ """
79
+
80
+ def __init__(self, project, nodes: dict[int, Any], start: int, end: int, step_limit: int):
81
+ super().__init__()
82
+
83
+ self.arch = project.arch
84
+ self.nodes: dict[int, Any] = nodes
85
+ self.start: int = start
86
+ self.end: int = end
87
+ self.step_limit: int = step_limit
88
+
89
+ self.STACK_BASE = 0x7FFF_FFF0 if self.arch.bits == 32 else 0x7FFF_FFFF_F000
90
+ self.MASK = 0xFFFF_FFFF if self.arch.bits == 32 else 0xFFFF_FFFF_FFFF_FFFF
91
+
92
+ state = InlinedStringTransformationState(project)
93
+ self.stack_accesses: DefaultDict[int, list[tuple[str, CodeLocation, claripy.Bits]]] = defaultdict(list)
94
+ self.finished: bool = False
95
+
96
+ i = 0
97
+ self.pc = self.start
98
+ while i < self.step_limit:
99
+ if self.pc not in self.nodes:
100
+ # jumped to a node that we do not know about
101
+ break
102
+ block = self.nodes[self.pc]
103
+ self._process(state, None, block=block)
104
+ if self.pc is None:
105
+ # not sure where to jump...
106
+ break
107
+ if self.pc == self.end:
108
+ # we reach the end of execution!
109
+ self.finished = True
110
+ break
111
+ i += 1
112
+
113
+ def _process_address(self, addr: Const | StackBaseOffset) -> tuple[int, str] | None:
114
+ if isinstance(addr, Const):
115
+ return addr.value, "mem"
116
+ if isinstance(addr, StackBaseOffset):
117
+ return (addr.offset + self.STACK_BASE) & self.MASK, "stack"
118
+ if isinstance(addr, BinaryOp) and isinstance(addr.operands[0], StackBaseOffset):
119
+ v0_and_type = self._process_address(addr.operands[0])
120
+ if v0_and_type is not None:
121
+ v0 = v0_and_type[0]
122
+ v1 = self._expr(addr.operands[1])
123
+ if isinstance(v1, claripy.Bits) and v1.concrete:
124
+ return (v0 + v1.concrete_value) & self.MASK, "stack"
125
+ return None
126
+
127
+ def _handle_Assignment(self, stmt):
128
+ if isinstance(stmt.dst, Register):
129
+ val = self._expr(stmt.src)
130
+ if isinstance(val, claripy.Bits):
131
+ self.state.reg_store(stmt.dst, val)
132
+
133
+ def _handle_Store(self, stmt):
134
+ addr_and_type = self._process_address(stmt.addr)
135
+ if addr_and_type is not None:
136
+ addr, addr_type = addr_and_type
137
+ val = self._expr(stmt.data)
138
+ if isinstance(val, claripy.ast.BV):
139
+ self.state.mem_store(addr, val, stmt.endness)
140
+ # log it
141
+ if addr_type == "stack":
142
+ for i in range(0, val.size() // self.arch.byte_width):
143
+ byte_off = i
144
+ if self.arch.memory_endness == Endness.LE:
145
+ byte_off = val.size() // self.arch.byte_width - i - 1
146
+ self.stack_accesses[addr + i].append(("store", self._codeloc(), val.get_byte(byte_off)))
147
+
148
+ def _handle_Jump(self, stmt):
149
+ if isinstance(stmt.target, Const):
150
+ self.pc = stmt.target.value
151
+ else:
152
+ self.pc = None
153
+
154
+ def _handle_ConditionalJump(self, stmt):
155
+ self.pc = None
156
+ if isinstance(stmt.true_target, Const) and isinstance(stmt.false_target, Const):
157
+ cond = self._expr(stmt.condition)
158
+ if cond is not None:
159
+ if isinstance(cond, claripy.Bits) and cond.concrete_value == 1:
160
+ self.pc = stmt.true_target.value
161
+ elif isinstance(cond, claripy.Bits) and cond.concrete_value == 0:
162
+ self.pc = stmt.false_target.value
163
+
164
+ def _handle_Const(self, expr):
165
+ return claripy.BVV(expr.value, expr.bits)
166
+
167
+ def _handle_Load(self, expr: Load):
168
+ addr_and_type = self._process_address(expr.addr)
169
+ if addr_and_type is not None:
170
+ addr, addr_type = addr_and_type
171
+ v = self.state.mem_load(addr, expr.size, expr.endness)
172
+ # log it
173
+ if addr_type == "stack" and isinstance(v, claripy.ast.BV):
174
+ for i in range(0, expr.size):
175
+ byte_off = i
176
+ if self.arch.memory_endness == Endness.LE:
177
+ byte_off = expr.size - i - 1
178
+ self.stack_accesses[addr + i].append(("load", self._codeloc(), v.get_byte(byte_off)))
179
+ return v
180
+ return None
181
+
182
+ def _handle_Register(self, expr: Register):
183
+ return self.state.reg_load(expr)
184
+
185
+ def _handle_Convert(self, expr: Convert):
186
+ v = self._expr(expr.operand)
187
+ if isinstance(v, claripy.Bits):
188
+ if expr.to_bits > expr.from_bits:
189
+ if not expr.is_signed:
190
+ return claripy.ZeroExt(expr.to_bits - expr.from_bits, v)
191
+ return claripy.SignExt(expr.to_bits - expr.from_bits, v)
192
+ elif expr.to_bits < expr.from_bits:
193
+ return claripy.Extract(expr.to_bits - 1, 0, v)
194
+ else:
195
+ return v
196
+ return None
197
+
198
+ def _handle_CmpEQ(self, expr):
199
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
200
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
201
+ return claripy.BVV(1, 1) if op0.concrete_value == op1.concrete_value else claripy.BVV(0, 1)
202
+ return None
203
+
204
+ def _handle_CmpNE(self, expr):
205
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
206
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
207
+ return claripy.BVV(1, 1) if op0.concrete_value != op1.concrete_value else claripy.BVV(0, 1)
208
+ return None
209
+
210
+ def _handle_CmpLT(self, expr):
211
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
212
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
213
+ return claripy.BVV(1, 1) if op0.concrete_value < op1.concrete_value else claripy.BVV(0, 1)
214
+ return None
215
+
216
+ def _handle_CmpLE(self, expr):
217
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
218
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
219
+ return claripy.BVV(1, 1) if op0.concrete_value <= op1.concrete_value else claripy.BVV(0, 1)
220
+ return None
221
+
222
+ def _handle_CmpGT(self, expr):
223
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
224
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
225
+ return claripy.BVV(1, 1) if op0.concrete_value > op1.concrete_value else claripy.BVV(0, 1)
226
+ return None
227
+
228
+ def _handle_CmpGE(self, expr):
229
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
230
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
231
+ return claripy.BVV(1, 1) if op0.concrete_value >= op1.concrete_value else claripy.BVV(0, 1)
232
+ return None
233
+
234
+
235
+ class InlineStringTransformationDescriptor:
236
+ """
237
+ Describes an instance of inline string transformation.
238
+ """
239
+
240
+ def __init__(self, store_block, loop_body, stack_accesses, beginning_stack_offset):
241
+ self.store_block = store_block
242
+ self.loop_body = loop_body
243
+ self.stack_accesses = stack_accesses
244
+ self.beginning_stack_offset = beginning_stack_offset
245
+
246
+
247
+ class InlinedStringTransformationSimplifier(OptimizationPass):
248
+ """
249
+ Simplifies inlined string transformation routines.
250
+ """
251
+
252
+ ARCHES = None
253
+ PLATFORMS = None
254
+ STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
255
+ NAME = "Simplify string transformations"
256
+ DESCRIPTION = "Simplify string transformations that are commonly used in obfuscated functions."
257
+
258
+ def __init__(self, func, **kwargs):
259
+ super().__init__(func, **kwargs)
260
+ self.analyze()
261
+
262
+ def _check(self):
263
+ string_transformation_descs = self._find_string_transformation_loops()
264
+
265
+ return bool(string_transformation_descs), {"descs": string_transformation_descs}
266
+
267
+ def _analyze(self, cache=None):
268
+ if not cache or "descs" not in cache:
269
+ return
270
+
271
+ for desc in cache["descs"]:
272
+ desc: InlineStringTransformationDescriptor
273
+
274
+ # remove the original statements
275
+ skip_stmt_indices = set()
276
+ for stack_accesses in desc.stack_accesses:
277
+ # the first element is the initial storing statement
278
+ codeloc = stack_accesses[0][1]
279
+ assert codeloc.block_addr == desc.store_block.addr
280
+ skip_stmt_indices.add(codeloc.stmt_idx)
281
+ new_statements = [
282
+ stmt for idx, stmt in enumerate(desc.store_block.statements) if idx not in skip_stmt_indices
283
+ ]
284
+
285
+ # add new statements
286
+ store_statements = []
287
+ for off, stack_accesses in enumerate(desc.stack_accesses):
288
+ # the last element is the final storing statement
289
+ stack_addr = StackBaseOffset(None, self.project.arch.bits, desc.beginning_stack_offset + off)
290
+ new_value_ast = stack_accesses[-1][2]
291
+ new_value = Const(None, None, new_value_ast.concrete_value, self.project.arch.byte_width)
292
+ stmt = Store(
293
+ None,
294
+ stack_addr,
295
+ new_value,
296
+ 1,
297
+ "Iend_LE",
298
+ ins_addr=desc.store_block.addr + desc.store_block.original_size - 1,
299
+ )
300
+ store_statements.append(stmt)
301
+ if new_statements and isinstance(new_statements[-1], (ConditionalJump, Jump)):
302
+ new_statements = new_statements[:-1] + store_statements + new_statements[-1:]
303
+ else:
304
+ new_statements += store_statements
305
+
306
+ new_store_block = desc.store_block.copy(statements=new_statements)
307
+ self._update_block(desc.store_block, new_store_block)
308
+
309
+ # remote the loop node
310
+ # since the loop node has exactly one external predecessor and one external successor, we can get rid of it
311
+ pred = next(iter(nn for nn in self.out_graph.predecessors(desc.loop_body) if nn is not desc.loop_body))
312
+ succ = next(iter(nn for nn in self.out_graph.successors(desc.loop_body) if nn is not desc.loop_body))
313
+
314
+ self.out_graph.remove_node(desc.loop_body)
315
+ self.out_graph.add_edge(pred, succ)
316
+
317
+ if pred.statements and isinstance(pred.statements[-1], ConditionalJump):
318
+ pred.statements[-1] = Jump(
319
+ None,
320
+ Const(None, None, succ.addr, self.project.arch.bits),
321
+ succ.idx,
322
+ **pred.statements[-1].tags,
323
+ )
324
+
325
+ def _find_string_transformation_loops(self):
326
+ # find self loops
327
+ self_loops = []
328
+ for node in self._graph.nodes:
329
+ preds = list(self._graph.predecessors(node))
330
+ succs = list(self._graph.successors(node))
331
+ if len(preds) == 2 and len(succs) == 2 and node in preds and node in succs:
332
+ pred = next(iter(nn for nn in preds if nn is not node))
333
+ succ = next(iter(nn for nn in succs if nn is not node))
334
+ if (
335
+ self._graph.out_degree[pred] == 1
336
+ and self._graph.in_degree[succ] == 1
337
+ or self._graph.out_degree[pred] == 2
338
+ and self._graph.in_degree[succ] == 2
339
+ and self._graph.has_edge(pred, succ)
340
+ ):
341
+ # found it
342
+ self_loops.append(node)
343
+
344
+ if not self_loops:
345
+ return []
346
+
347
+ descs = []
348
+ for loop_node in self_loops:
349
+ pred = next(iter(nn for nn in self._graph.predecessors(loop_node) if nn is not loop_node))
350
+ succ = next(iter(nn for nn in self._graph.successors(loop_node) if nn is not loop_node))
351
+ engine = InlinedStringTransformationAILEngine(
352
+ self.project, {pred.addr: pred, loop_node.addr: loop_node}, pred.addr, succ.addr, 1024
353
+ )
354
+ if engine.finished:
355
+ # find the longest slide where the stack accesses are like the following:
356
+ # "store", code_location_a, value_a
357
+ # "load", code_location_b, value_a
358
+ # "store", code_location_b, value_b
359
+ # where value_a and value_b may be the same
360
+ candidate_stack_addrs = []
361
+ for stack_addr in sorted(engine.stack_accesses.keys()):
362
+ stack_accesses = engine.stack_accesses[stack_addr]
363
+ if len(stack_accesses) == 3:
364
+ item0, item1, item2 = stack_accesses
365
+ if item0[0] == "store" and item1[0] == "load" and item2[0] == "store":
366
+ if item0[1] != item1[1] and item1[1] == item2[1]:
367
+ if item0[2] is item1[2]:
368
+ # found one!
369
+ candidate_stack_addrs.append(stack_addr)
370
+
371
+ if (
372
+ len(candidate_stack_addrs) >= 2
373
+ and candidate_stack_addrs[-1] == candidate_stack_addrs[0] + len(candidate_stack_addrs) - 1
374
+ ):
375
+ filtered_stack_accesses = [engine.stack_accesses[a] for a in candidate_stack_addrs]
376
+ stack_offset = candidate_stack_addrs[0] - engine.STACK_BASE
377
+ info = InlineStringTransformationDescriptor(pred, loop_node, filtered_stack_accesses, stack_offset)
378
+ descs.append(info)
379
+
380
+ return descs
@@ -2,7 +2,7 @@
2
2
  import logging
3
3
 
4
4
  from ailment.statement import ConditionalJump, Assignment, Jump
5
- from ailment.expression import ITE
5
+ from ailment.expression import ITE, Const
6
6
 
7
7
  from ....utils.graph import subgraph_between_nodes
8
8
  from ..utils import remove_labels, to_ail_supergraph
@@ -146,10 +146,11 @@ class ITERegionConverter(OptimizationPass):
146
146
  #
147
147
 
148
148
  new_region_head = region_head.copy()
149
+ conditional_jump: ConditionalJump = region_head.statements[-1]
149
150
  addr_obj = true_stmt.src if "ins_addr" in true_stmt.src.tags else true_stmt
150
151
  ternary_expr = ITE(
151
152
  None,
152
- region_head.statements[-1].condition,
153
+ conditional_jump.condition,
153
154
  true_stmt.src,
154
155
  false_stmt.src,
155
156
  ins_addr=addr_obj.ins_addr,
@@ -160,6 +161,13 @@ class ITERegionConverter(OptimizationPass):
160
161
  new_assignment.src = ternary_expr
161
162
  new_region_head.statements[-1] = new_assignment
162
163
 
164
+ # add a goto statement to the region tail so it can be transformed into a break or other types of control-flow
165
+ # transitioning statement in the future
166
+ goto_stmt = Jump(
167
+ None, Const(None, None, region_tail.addr, self.project.arch.bits), region_tail.idx, **conditional_jump.tags
168
+ )
169
+ new_region_head.statements.append(goto_stmt)
170
+
163
171
  #
164
172
  # destroy all the old region blocks
165
173
  #
@@ -76,7 +76,10 @@ class X86GccGetPcSimplifier(OptimizationPass):
76
76
  and isinstance(block.statements[-1].target, ailment.Expr.Const)
77
77
  ):
78
78
  call_func_addr = block.statements[-1].target.value
79
- call_func = self.kb.functions.get_by_addr(call_func_addr)
79
+ try:
80
+ call_func = self.kb.functions.get_by_addr(call_func_addr)
81
+ except KeyError:
82
+ continue
80
83
  if "get_pc" in call_func.info:
81
84
  results.append(
82
85
  (key, len(block.statements) - 1, call_func.info["get_pc"], block.addr + block.original_size),
@@ -42,6 +42,7 @@ from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalC
42
42
  from .rol_ror import RolRorRewriter
43
43
  from .inlined_strcpy import InlinedStrcpy
44
44
  from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
45
+ from .inlined_wstrcpy import InlinedWstrcpy
45
46
 
46
47
  from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase, PeepholeOptimizationMultiStmtBase
47
48