angr 9.2.97__py3-none-manylinux2014_x86_64.whl → 9.2.98__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (27) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +14 -1
  3. angr/analyses/cfg/indirect_jump_resolvers/propagator_utils.py +10 -6
  4. angr/analyses/decompiler/optimization_passes/__init__.py +2 -0
  5. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +380 -0
  6. angr/analyses/decompiler/optimization_passes/x86_gcc_getpc_simplifier.py +4 -1
  7. angr/analyses/decompiler/peephole_optimizations/__init__.py +1 -0
  8. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +71 -3
  9. angr/analyses/decompiler/peephole_optimizations/inlined_wstrcpy.py +162 -0
  10. angr/analyses/decompiler/structured_codegen/__init__.py +1 -1
  11. angr/analyses/decompiler/structured_codegen/c.py +72 -99
  12. angr/analyses/decompiler/utils.py +5 -1
  13. angr/analyses/propagator/engine_vex.py +15 -0
  14. angr/analyses/reaching_definitions/engine_vex.py +6 -0
  15. angr/analyses/variable_recovery/engine_vex.py +6 -0
  16. angr/analyses/variable_recovery/irsb_scanner.py +12 -0
  17. angr/engines/light/engine.py +126 -15
  18. angr/knowledge_plugins/functions/function.py +4 -0
  19. angr/storage/memory_mixins/paged_memory/pages/list_page.py +20 -5
  20. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -1
  21. angr/storage/memory_mixins/simple_interface_mixin.py +4 -0
  22. {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/METADATA +6 -6
  23. {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/RECORD +27 -25
  24. {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/LICENSE +0 -0
  25. {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/WHEEL +0 -0
  26. {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/entry_points.txt +0 -0
  27. {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # pylint: disable=wildcard-import
2
2
  # pylint: disable=wrong-import-position
3
3
 
4
- __version__ = "9.2.97"
4
+ __version__ = "9.2.98"
5
5
 
6
6
  if bytes is str:
7
7
  raise Exception(
@@ -8,7 +8,7 @@ from sortedcontainers import SortedDict
8
8
 
9
9
  import pyvex
10
10
  from claripy.utils.orderedset import OrderedSet
11
- from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec
11
+ from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec, XBE
12
12
  from cle.backends import NamedRegion
13
13
  import archinfo
14
14
  from archinfo.arch_soot import SootAddressDescriptor
@@ -778,6 +778,17 @@ class CFGBase(Analysis):
778
778
  tpl = (section.min_addr, section.max_addr + 1)
779
779
  memory_regions.append(tpl)
780
780
 
781
+ elif isinstance(b, XBE):
782
+ # some XBE files will mark the data sections as executable
783
+ for section in b.sections:
784
+ if (
785
+ section.is_executable
786
+ and not section.is_writable
787
+ and section.name not in {".data", ".rdata", ".rodata"}
788
+ ):
789
+ tpl = (section.min_addr, section.max_addr + 1)
790
+ memory_regions.append(tpl)
791
+
781
792
  elif isinstance(b, MachO):
782
793
  if b.segments:
783
794
  # Get all executable segments
@@ -797,9 +808,11 @@ class CFGBase(Analysis):
797
808
  # a blob is entirely executable
798
809
  tpl = (b.min_addr, b.max_addr + 1)
799
810
  memory_regions.append(tpl)
811
+
800
812
  elif isinstance(b, NamedRegion):
801
813
  # NamedRegions have no content! Ignore
802
814
  pass
815
+
803
816
  elif isinstance(b, self._cle_pseudo_objects):
804
817
  pass
805
818
 
@@ -13,10 +13,14 @@ class PropagatorLoadCallback:
13
13
  # only allow loading if the address falls into a read-only region
14
14
  if isinstance(addr, claripy.ast.BV) and addr.op == "BVV":
15
15
  addr_v = addr.args[0]
16
- section = self.project.loader.find_section_containing(addr_v)
17
- if section is not None:
18
- return section.is_readable and not section.is_writable
19
- segment = self.project.loader.find_segment_containing(addr_v)
20
- if segment is not None:
21
- return segment.is_readable and not segment.is_writable
16
+ elif isinstance(addr, int):
17
+ addr_v = addr
18
+ else:
19
+ return False
20
+ section = self.project.loader.find_section_containing(addr_v)
21
+ if section is not None:
22
+ return section.is_readable and not section.is_writable
23
+ segment = self.project.loader.find_segment_containing(addr_v)
24
+ if segment is not None:
25
+ return segment.is_readable and not segment.is_writable
22
26
  return False
@@ -25,6 +25,7 @@ from .win_stack_canary_simplifier import WinStackCanarySimplifier
25
25
  from .cross_jump_reverter import CrossJumpReverter
26
26
  from .code_motion import CodeMotionOptimization
27
27
  from .switch_default_case_duplicator import SwitchDefaultCaseDuplicator
28
+ from .inlined_string_transformation_simplifier import InlinedStringTransformationSimplifier
28
29
 
29
30
  # order matters!
30
31
  _all_optimization_passes = [
@@ -49,6 +50,7 @@ _all_optimization_passes = [
49
50
  (CodeMotionOptimization, True),
50
51
  (CrossJumpReverter, True),
51
52
  (FlipBooleanCmp, True),
53
+ (InlinedStringTransformationSimplifier, True),
52
54
  ]
53
55
 
54
56
  # these passes may duplicate code to remove gotos or improve the structure of the graph
@@ -0,0 +1,380 @@
1
+ # pylint:disable=arguments-renamed,too-many-boolean-expressions,no-self-use
2
+ from __future__ import annotations
3
+ from typing import Any, DefaultDict
4
+ from collections import defaultdict
5
+
6
+ from archinfo import Endness
7
+ from ailment.expression import Const, Register, Load, StackBaseOffset, Convert, BinaryOp
8
+ from ailment.statement import Store, ConditionalJump, Jump
9
+ import claripy
10
+
11
+ from angr.engines.light import SimEngineLightAILMixin
12
+ from angr.storage.memory_mixins import (
13
+ SimpleInterfaceMixin,
14
+ DefaultFillerMixin,
15
+ PagedMemoryMixin,
16
+ UltraPagesMixin,
17
+ )
18
+ from angr.code_location import CodeLocation
19
+ from angr.errors import SimMemoryMissingError
20
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
21
+
22
+
23
+ class FasterMemory(
24
+ SimpleInterfaceMixin,
25
+ DefaultFillerMixin,
26
+ UltraPagesMixin,
27
+ PagedMemoryMixin,
28
+ ):
29
+ """
30
+ A fast memory model used in InlinedStringTransformationState.
31
+ """
32
+
33
+
34
+ class InlinedStringTransformationState:
35
+ """
36
+ The abstract state used in InlinedStringTransformationAILEngine.
37
+ """
38
+
39
+ def __init__(self, project):
40
+ self.arch = project.arch
41
+ self.project = project
42
+
43
+ self.registers = FasterMemory(memory_id="reg")
44
+ self.memory = FasterMemory(memory_id="mem")
45
+
46
+ self.registers.set_state(self)
47
+ self.memory.set_state(self)
48
+
49
+ def _get_weakref(self):
50
+ return self
51
+
52
+ def reg_store(self, reg: Register, value: claripy.Bits) -> None:
53
+ self.registers.store(
54
+ reg.reg_offset, value, size=value.size() // self.arch.byte_width, endness=str(self.arch.register_endness)
55
+ )
56
+
57
+ def reg_load(self, reg: Register) -> claripy.Bits | None:
58
+ try:
59
+ return self.registers.load(
60
+ reg.reg_offset, size=reg.size, endness=self.arch.register_endness, fill_missing=False
61
+ )
62
+ except SimMemoryMissingError:
63
+ return None
64
+
65
+ def mem_store(self, addr: int, value: claripy.Bits, endness: str) -> None:
66
+ self.memory.store(addr, value, size=value.size() // self.arch.byte_width, endness=endness)
67
+
68
+ def mem_load(self, addr: int, size: int, endness) -> claripy.Bits | None:
69
+ try:
70
+ return self.memory.load(addr, size=size, endness=str(endness), fill_missing=False)
71
+ except SimMemoryMissingError:
72
+ return None
73
+
74
+
75
+ class InlinedStringTransformationAILEngine(SimEngineLightAILMixin):
76
+ """
77
+ A simple AIL execution engine
78
+ """
79
+
80
+ def __init__(self, project, nodes: dict[int, Any], start: int, end: int, step_limit: int):
81
+ super().__init__()
82
+
83
+ self.arch = project.arch
84
+ self.nodes: dict[int, Any] = nodes
85
+ self.start: int = start
86
+ self.end: int = end
87
+ self.step_limit: int = step_limit
88
+
89
+ self.STACK_BASE = 0x7FFF_FFF0 if self.arch.bits == 32 else 0x7FFF_FFFF_F000
90
+ self.MASK = 0xFFFF_FFFF if self.arch.bits == 32 else 0xFFFF_FFFF_FFFF_FFFF
91
+
92
+ state = InlinedStringTransformationState(project)
93
+ self.stack_accesses: DefaultDict[int, list[tuple[str, CodeLocation, claripy.Bits]]] = defaultdict(list)
94
+ self.finished: bool = False
95
+
96
+ i = 0
97
+ self.pc = self.start
98
+ while i < self.step_limit:
99
+ if self.pc not in self.nodes:
100
+ # jumped to a node that we do not know about
101
+ break
102
+ block = self.nodes[self.pc]
103
+ self._process(state, None, block=block)
104
+ if self.pc is None:
105
+ # not sure where to jump...
106
+ break
107
+ if self.pc == self.end:
108
+ # we reach the end of execution!
109
+ self.finished = True
110
+ break
111
+ i += 1
112
+
113
+ def _process_address(self, addr: Const | StackBaseOffset) -> tuple[int, str] | None:
114
+ if isinstance(addr, Const):
115
+ return addr.value, "mem"
116
+ if isinstance(addr, StackBaseOffset):
117
+ return (addr.offset + self.STACK_BASE) & self.MASK, "stack"
118
+ if isinstance(addr, BinaryOp) and isinstance(addr.operands[0], StackBaseOffset):
119
+ v0_and_type = self._process_address(addr.operands[0])
120
+ if v0_and_type is not None:
121
+ v0 = v0_and_type[0]
122
+ v1 = self._expr(addr.operands[1])
123
+ if isinstance(v1, claripy.Bits) and v1.concrete:
124
+ return (v0 + v1.concrete_value) & self.MASK, "stack"
125
+ return None
126
+
127
+ def _handle_Assignment(self, stmt):
128
+ if isinstance(stmt.dst, Register):
129
+ val = self._expr(stmt.src)
130
+ if isinstance(val, claripy.Bits):
131
+ self.state.reg_store(stmt.dst, val)
132
+
133
+ def _handle_Store(self, stmt):
134
+ addr_and_type = self._process_address(stmt.addr)
135
+ if addr_and_type is not None:
136
+ addr, addr_type = addr_and_type
137
+ val = self._expr(stmt.data)
138
+ if isinstance(val, claripy.ast.BV):
139
+ self.state.mem_store(addr, val, stmt.endness)
140
+ # log it
141
+ if addr_type == "stack":
142
+ for i in range(0, val.size() // self.arch.byte_width):
143
+ byte_off = i
144
+ if self.arch.memory_endness == Endness.LE:
145
+ byte_off = val.size() // self.arch.byte_width - i - 1
146
+ self.stack_accesses[addr + i].append(("store", self._codeloc(), val.get_byte(byte_off)))
147
+
148
+ def _handle_Jump(self, stmt):
149
+ if isinstance(stmt.target, Const):
150
+ self.pc = stmt.target.value
151
+ else:
152
+ self.pc = None
153
+
154
+ def _handle_ConditionalJump(self, stmt):
155
+ self.pc = None
156
+ if isinstance(stmt.true_target, Const) and isinstance(stmt.false_target, Const):
157
+ cond = self._expr(stmt.condition)
158
+ if cond is not None:
159
+ if isinstance(cond, claripy.Bits) and cond.concrete_value == 1:
160
+ self.pc = stmt.true_target.value
161
+ elif isinstance(cond, claripy.Bits) and cond.concrete_value == 0:
162
+ self.pc = stmt.false_target.value
163
+
164
+ def _handle_Const(self, expr):
165
+ return claripy.BVV(expr.value, expr.bits)
166
+
167
+ def _handle_Load(self, expr: Load):
168
+ addr_and_type = self._process_address(expr.addr)
169
+ if addr_and_type is not None:
170
+ addr, addr_type = addr_and_type
171
+ v = self.state.mem_load(addr, expr.size, expr.endness)
172
+ # log it
173
+ if addr_type == "stack" and isinstance(v, claripy.ast.BV):
174
+ for i in range(0, expr.size):
175
+ byte_off = i
176
+ if self.arch.memory_endness == Endness.LE:
177
+ byte_off = expr.size - i - 1
178
+ self.stack_accesses[addr + i].append(("load", self._codeloc(), v.get_byte(byte_off)))
179
+ return v
180
+ return None
181
+
182
+ def _handle_Register(self, expr: Register):
183
+ return self.state.reg_load(expr)
184
+
185
+ def _handle_Convert(self, expr: Convert):
186
+ v = self._expr(expr.operand)
187
+ if isinstance(v, claripy.Bits):
188
+ if expr.to_bits > expr.from_bits:
189
+ if not expr.is_signed:
190
+ return claripy.ZeroExt(expr.to_bits - expr.from_bits, v)
191
+ return claripy.SignExt(expr.to_bits - expr.from_bits, v)
192
+ elif expr.to_bits < expr.from_bits:
193
+ return claripy.Extract(expr.to_bits - 1, 0, v)
194
+ else:
195
+ return v
196
+ return None
197
+
198
+ def _handle_CmpEQ(self, expr):
199
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
200
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
201
+ return claripy.BVV(1, 1) if op0.concrete_value == op1.concrete_value else claripy.BVV(0, 1)
202
+ return None
203
+
204
+ def _handle_CmpNE(self, expr):
205
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
206
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
207
+ return claripy.BVV(1, 1) if op0.concrete_value != op1.concrete_value else claripy.BVV(0, 1)
208
+ return None
209
+
210
+ def _handle_CmpLT(self, expr):
211
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
212
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
213
+ return claripy.BVV(1, 1) if op0.concrete_value < op1.concrete_value else claripy.BVV(0, 1)
214
+ return None
215
+
216
+ def _handle_CmpLE(self, expr):
217
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
218
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
219
+ return claripy.BVV(1, 1) if op0.concrete_value <= op1.concrete_value else claripy.BVV(0, 1)
220
+ return None
221
+
222
+ def _handle_CmpGT(self, expr):
223
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
224
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
225
+ return claripy.BVV(1, 1) if op0.concrete_value > op1.concrete_value else claripy.BVV(0, 1)
226
+ return None
227
+
228
+ def _handle_CmpGE(self, expr):
229
+ op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
230
+ if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
231
+ return claripy.BVV(1, 1) if op0.concrete_value >= op1.concrete_value else claripy.BVV(0, 1)
232
+ return None
233
+
234
+
235
+ class InlineStringTransformationDescriptor:
236
+ """
237
+ Describes an instance of inline string transformation.
238
+ """
239
+
240
+ def __init__(self, store_block, loop_body, stack_accesses, beginning_stack_offset):
241
+ self.store_block = store_block
242
+ self.loop_body = loop_body
243
+ self.stack_accesses = stack_accesses
244
+ self.beginning_stack_offset = beginning_stack_offset
245
+
246
+
247
+ class InlinedStringTransformationSimplifier(OptimizationPass):
248
+ """
249
+ Simplifies inlined string transformation routines.
250
+ """
251
+
252
+ ARCHES = None
253
+ PLATFORMS = None
254
+ STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
255
+ NAME = "Simplify string transformations"
256
+ DESCRIPTION = "Simplify string transformations that are commonly used in obfuscated functions."
257
+
258
+ def __init__(self, func, **kwargs):
259
+ super().__init__(func, **kwargs)
260
+ self.analyze()
261
+
262
+ def _check(self):
263
+ string_transformation_descs = self._find_string_transformation_loops()
264
+
265
+ return bool(string_transformation_descs), {"descs": string_transformation_descs}
266
+
267
+ def _analyze(self, cache=None):
268
+ if not cache or "descs" not in cache:
269
+ return
270
+
271
+ for desc in cache["descs"]:
272
+ desc: InlineStringTransformationDescriptor
273
+
274
+ # remove the original statements
275
+ skip_stmt_indices = set()
276
+ for stack_accesses in desc.stack_accesses:
277
+ # the first element is the initial storing statement
278
+ codeloc = stack_accesses[0][1]
279
+ assert codeloc.block_addr == desc.store_block.addr
280
+ skip_stmt_indices.add(codeloc.stmt_idx)
281
+ new_statements = [
282
+ stmt for idx, stmt in enumerate(desc.store_block.statements) if idx not in skip_stmt_indices
283
+ ]
284
+
285
+ # add new statements
286
+ store_statements = []
287
+ for off, stack_accesses in enumerate(desc.stack_accesses):
288
+ # the last element is the final storing statement
289
+ stack_addr = StackBaseOffset(None, self.project.arch.bits, desc.beginning_stack_offset + off)
290
+ new_value_ast = stack_accesses[-1][2]
291
+ new_value = Const(None, None, new_value_ast.concrete_value, self.project.arch.byte_width)
292
+ stmt = Store(
293
+ None,
294
+ stack_addr,
295
+ new_value,
296
+ 1,
297
+ "Iend_LE",
298
+ ins_addr=desc.store_block.addr + desc.store_block.original_size - 1,
299
+ )
300
+ store_statements.append(stmt)
301
+ if new_statements and isinstance(new_statements[-1], (ConditionalJump, Jump)):
302
+ new_statements = new_statements[:-1] + store_statements + new_statements[-1:]
303
+ else:
304
+ new_statements += store_statements
305
+
306
+ new_store_block = desc.store_block.copy(statements=new_statements)
307
+ self._update_block(desc.store_block, new_store_block)
308
+
309
+ # remote the loop node
310
+ # since the loop node has exactly one external predecessor and one external successor, we can get rid of it
311
+ pred = next(iter(nn for nn in self.out_graph.predecessors(desc.loop_body) if nn is not desc.loop_body))
312
+ succ = next(iter(nn for nn in self.out_graph.successors(desc.loop_body) if nn is not desc.loop_body))
313
+
314
+ self.out_graph.remove_node(desc.loop_body)
315
+ self.out_graph.add_edge(pred, succ)
316
+
317
+ if pred.statements and isinstance(pred.statements[-1], ConditionalJump):
318
+ pred.statements[-1] = Jump(
319
+ None,
320
+ Const(None, None, succ.addr, self.project.arch.bits),
321
+ succ.idx,
322
+ **pred.statements[-1].tags,
323
+ )
324
+
325
+ def _find_string_transformation_loops(self):
326
+ # find self loops
327
+ self_loops = []
328
+ for node in self._graph.nodes:
329
+ preds = list(self._graph.predecessors(node))
330
+ succs = list(self._graph.successors(node))
331
+ if len(preds) == 2 and len(succs) == 2 and node in preds and node in succs:
332
+ pred = next(iter(nn for nn in preds if nn is not node))
333
+ succ = next(iter(nn for nn in succs if nn is not node))
334
+ if (
335
+ self._graph.out_degree[pred] == 1
336
+ and self._graph.in_degree[succ] == 1
337
+ or self._graph.out_degree[pred] == 2
338
+ and self._graph.in_degree[succ] == 2
339
+ and self._graph.has_edge(pred, succ)
340
+ ):
341
+ # found it
342
+ self_loops.append(node)
343
+
344
+ if not self_loops:
345
+ return []
346
+
347
+ descs = []
348
+ for loop_node in self_loops:
349
+ pred = next(iter(nn for nn in self._graph.predecessors(loop_node) if nn is not loop_node))
350
+ succ = next(iter(nn for nn in self._graph.successors(loop_node) if nn is not loop_node))
351
+ engine = InlinedStringTransformationAILEngine(
352
+ self.project, {pred.addr: pred, loop_node.addr: loop_node}, pred.addr, succ.addr, 1024
353
+ )
354
+ if engine.finished:
355
+ # find the longest slide where the stack accesses are like the following:
356
+ # "store", code_location_a, value_a
357
+ # "load", code_location_b, value_a
358
+ # "store", code_location_b, value_b
359
+ # where value_a and value_b may be the same
360
+ candidate_stack_addrs = []
361
+ for stack_addr in sorted(engine.stack_accesses.keys()):
362
+ stack_accesses = engine.stack_accesses[stack_addr]
363
+ if len(stack_accesses) == 3:
364
+ item0, item1, item2 = stack_accesses
365
+ if item0[0] == "store" and item1[0] == "load" and item2[0] == "store":
366
+ if item0[1] != item1[1] and item1[1] == item2[1]:
367
+ if item0[2] is item1[2]:
368
+ # found one!
369
+ candidate_stack_addrs.append(stack_addr)
370
+
371
+ if (
372
+ len(candidate_stack_addrs) >= 2
373
+ and candidate_stack_addrs[-1] == candidate_stack_addrs[0] + len(candidate_stack_addrs) - 1
374
+ ):
375
+ filtered_stack_accesses = [engine.stack_accesses[a] for a in candidate_stack_addrs]
376
+ stack_offset = candidate_stack_addrs[0] - engine.STACK_BASE
377
+ info = InlineStringTransformationDescriptor(pred, loop_node, filtered_stack_accesses, stack_offset)
378
+ descs.append(info)
379
+
380
+ return descs
@@ -76,7 +76,10 @@ class X86GccGetPcSimplifier(OptimizationPass):
76
76
  and isinstance(block.statements[-1].target, ailment.Expr.Const)
77
77
  ):
78
78
  call_func_addr = block.statements[-1].target.value
79
- call_func = self.kb.functions.get_by_addr(call_func_addr)
79
+ try:
80
+ call_func = self.kb.functions.get_by_addr(call_func_addr)
81
+ except KeyError:
82
+ continue
80
83
  if "get_pc" in call_func.info:
81
84
  results.append(
82
85
  (key, len(block.statements) - 1, call_func.info["get_pc"], block.addr + block.original_size),
@@ -42,6 +42,7 @@ from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalC
42
42
  from .rol_ror import RolRorRewriter
43
43
  from .inlined_strcpy import InlinedStrcpy
44
44
  from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
45
+ from .inlined_wstrcpy import InlinedWstrcpy
45
46
 
46
47
  from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase, PeepholeOptimizationMultiStmtBase
47
48
 
@@ -1,10 +1,10 @@
1
1
  # pylint:disable=arguments-differ
2
- from typing import Tuple, Optional
2
+ from typing import Tuple, Optional, Dict, List
3
3
  import string
4
4
 
5
5
  from archinfo import Endness
6
6
 
7
- from ailment.expression import Const
7
+ from ailment.expression import Const, StackBaseOffset
8
8
  from ailment.statement import Call, Store
9
9
 
10
10
  from .base import PeepholeOptimizationStmtBase
@@ -24,7 +24,7 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
24
24
  NAME = "Simplifying inlined strcpy"
25
25
  stmt_classes = (Store,)
26
26
 
27
- def optimize(self, stmt: Store, **kwargs):
27
+ def optimize(self, stmt: Store, stmt_idx: int = None, block=None, **kwargs):
28
28
  if isinstance(stmt.data, Const):
29
29
  r, s = self.is_integer_likely_a_string(stmt.data.value, stmt.data.size, stmt.endness)
30
30
  if r:
@@ -41,8 +41,76 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
41
41
  **stmt.tags,
42
42
  )
43
43
 
44
+ # scan forward in the current block to find all consecutive constant stores
45
+ if block is not None and stmt_idx is not None:
46
+ all_constant_stores: Dict[int, Tuple[int, Optional[Const]]] = self.collect_constant_stores(
47
+ block, stmt_idx
48
+ )
49
+ if all_constant_stores:
50
+ offsets = sorted(all_constant_stores.keys())
51
+ next_offset = min(offsets)
52
+ stride = []
53
+ for offset in offsets:
54
+ if next_offset is not None and offset != next_offset:
55
+ next_offset = None
56
+ stride = []
57
+ stmt_idx_, v = all_constant_stores[offset]
58
+ if v is not None:
59
+ stride.append((offset, stmt_idx_, v))
60
+ next_offset = offset + v.size
61
+ else:
62
+ next_offset = None
63
+ stride = []
64
+
65
+ integer, size = self.stride_to_int(stride)
66
+ r, s = self.is_integer_likely_a_string(integer, size, Endness.BE)
67
+ if r:
68
+ # we remove all involved statements whose statement IDs are greater than the current one
69
+ for _, stmt_idx_, _ in reversed(stride):
70
+ if stmt_idx_ <= stmt_idx:
71
+ continue
72
+ block.statements[stmt_idx_] = None
73
+ block.statements = [ss for ss in block.statements if ss is not None]
74
+
75
+ str_id = self.kb.custom_strings.allocate(s.encode("ascii"))
76
+ return Call(
77
+ stmt.idx,
78
+ "strncpy",
79
+ args=[
80
+ stmt.addr,
81
+ Const(None, None, str_id, stmt.addr.bits, custom_string=True),
82
+ Const(None, None, len(s), self.project.arch.bits),
83
+ ],
84
+ **stmt.tags,
85
+ )
86
+
44
87
  return None
45
88
 
89
+ @staticmethod
90
+ def stride_to_int(stride: List[Tuple[int, int, Const]]) -> Tuple[int, int]:
91
+ stride = sorted(stride, key=lambda x: x[0])
92
+ n = 0
93
+ size = 0
94
+ for _, _, v in stride:
95
+ size += v.size
96
+ n <<= v.bits
97
+ n |= v.value
98
+ return n, size
99
+
100
+ @staticmethod
101
+ def collect_constant_stores(block, starting_stmt_idx: int) -> Dict[int, Tuple[int, Optional[Const]]]:
102
+ r = {}
103
+ for idx, stmt in enumerate(block.statements):
104
+ if idx < starting_stmt_idx:
105
+ continue
106
+ if isinstance(stmt, Store) and isinstance(stmt.addr, StackBaseOffset) and isinstance(stmt.addr.offset, int):
107
+ if isinstance(stmt.data, Const):
108
+ r[stmt.addr.offset] = idx, stmt.data
109
+ else:
110
+ r[stmt.addr.offset] = idx, None
111
+
112
+ return r
113
+
46
114
  @staticmethod
47
115
  def is_integer_likely_a_string(
48
116
  v: int, size: int, endness: Endness, min_length: int = 4