angr 9.2.164__cp310-abi3-macosx_11_0_arm64.whl → 9.2.165__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.164"
5
+ __version__ = "9.2.165"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
angr/analyses/analysis.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
  import functools
3
+ import os
3
4
  import sys
4
5
  import contextlib
5
6
  from collections import defaultdict
@@ -14,6 +15,8 @@ import logging
14
15
  import time
15
16
  import typing
16
17
 
18
+ import psutil
19
+
17
20
  from rich import progress
18
21
 
19
22
  from angr.misc.plugins import PluginVendor, VendorPreset
@@ -287,6 +290,8 @@ class Analysis:
287
290
  _name: str
288
291
  errors: list[AnalysisLogEntry] = []
289
292
  named_errors: defaultdict[str, list[AnalysisLogEntry]] = defaultdict(list)
293
+ _ram_usage: float | None = None
294
+ _last_ramusage_update: float = 0.0
290
295
  _progress_callback = None
291
296
  _show_progressbar = False
292
297
  _progressbar = None
@@ -295,7 +300,7 @@ class Analysis:
295
300
  _PROGRESS_WIDGETS = [
296
301
  progress.TaskProgressColumn(),
297
302
  progress.BarColumn(),
298
- progress.TextColumn("Elapsed Time:"),
303
+ progress.TextColumn("Elapsed:"),
299
304
  progress.TimeElapsedColumn(),
300
305
  progress.TextColumn("Time:"),
301
306
  progress.TimeRemainingColumn(),
@@ -311,7 +316,9 @@ class Analysis:
311
316
  raise
312
317
  else:
313
318
  error = AnalysisLogEntry("exception occurred", exc_info=True)
314
- l.error("Caught and logged %s with resilience: %s", error.exc_type.__name__, error.exc_value)
319
+ l.error(
320
+ "Caught and logged %s with resilience: %s", error.exc_type.__name__, error.exc_value # type:ignore
321
+ )
315
322
  if name is None:
316
323
  self.errors.append(error)
317
324
  else:
@@ -342,10 +349,12 @@ class Analysis:
342
349
  if self._progressbar is None:
343
350
  self._initialize_progressbar()
344
351
 
352
+ assert self._task is not None
353
+ assert self._progressbar is not None
345
354
  self._progressbar.update(self._task, completed=percentage)
346
355
 
347
- if text is not None and self._progressbar:
348
- self._progressbar.update(self._task, description=text)
356
+ if text is not None and self._progressbar:
357
+ self._progressbar.update(self._task, description=text)
349
358
 
350
359
  if self._progress_callback is not None:
351
360
  self._progress_callback(percentage, text=text, **kwargs) # pylint:disable=not-callable
@@ -360,6 +369,7 @@ class Analysis:
360
369
  if self._progressbar is None:
361
370
  self._initialize_progressbar()
362
371
  if self._progressbar is not None:
372
+ assert self._task is not None
363
373
  self._progressbar.update(self._task, completed=100)
364
374
  self._progressbar.stop()
365
375
  self._progressbar = None
@@ -384,6 +394,19 @@ class Analysis:
384
394
  if ctr != 0 and ctr % freq == 0:
385
395
  time.sleep(sleep_time)
386
396
 
397
+ @property
398
+ def ram_usage(self) -> float:
399
+ """
400
+ Return the current RAM usage of the Python process, in bytes. The value is updated at most once per second.
401
+ """
402
+
403
+ if time.time() - self._last_ramusage_update > 1:
404
+ self._last_ramusage_update = time.time()
405
+ proc = psutil.Process(os.getpid())
406
+ meminfo = proc.memory_info()
407
+ self._ram_usage = meminfo.rss
408
+ return self._ram_usage if self._ram_usage is not None else -0.1
409
+
387
410
  def __getstate__(self):
388
411
  d = dict(self.__dict__)
389
412
  d.pop("_progressbar", None)
@@ -846,6 +846,8 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
846
846
  # exception handling
847
847
  self._exception_handling_by_endaddr = SortedDict()
848
848
 
849
+ self.stage: str = ""
850
+
849
851
  #
850
852
  # Variables used during analysis
851
853
  #
@@ -1361,6 +1363,8 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1361
1363
  return job.addr
1362
1364
 
1363
1365
  def _pre_analysis(self):
1366
+ self.stage = "Pre-analysis"
1367
+
1364
1368
  # Create a read-only memory view in loader for faster data loading
1365
1369
  self.project.loader.gen_ro_memview()
1366
1370
 
@@ -1446,6 +1450,8 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1446
1450
 
1447
1451
  self._job_ctr = 0
1448
1452
 
1453
+ self.stage = "Analysis (Stage 1)"
1454
+
1449
1455
  def _pre_job_handling(self, job: CFGJob): # pylint:disable=arguments-differ
1450
1456
  """
1451
1457
  Some pre job-processing tasks, like update progress bar.
@@ -1481,7 +1487,13 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1481
1487
  percentage = min(
1482
1488
  self._seg_list.occupied_size * max_percentage_stage_1 / self._regions_size, max_percentage_stage_1
1483
1489
  )
1484
- self._update_progress(percentage, cfg=self)
1490
+ ram_usage = self.ram_usage / (1024 * 1024)
1491
+ text = (
1492
+ f"{self.stage} | {len(self.functions)} funcs, {len(self.graph)} blocks | "
1493
+ f"{len(self._indirect_jumps_to_resolve)}/{len(self.indirect_jumps)} IJs | "
1494
+ f"{ram_usage:0.2f} MB RAM"
1495
+ )
1496
+ self._update_progress(percentage, text=text, cfg=self)
1485
1497
 
1486
1498
  def _intra_analysis(self):
1487
1499
  pass
@@ -1780,6 +1792,9 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1780
1792
  self._model.edges_to_repair = remaining_edges_to_repair
1781
1793
 
1782
1794
  def _post_analysis(self):
1795
+
1796
+ self.stage = "Analysis (Stage 2)"
1797
+
1783
1798
  self._repair_edges()
1784
1799
 
1785
1800
  self._make_completed_functions()
@@ -13,8 +13,6 @@ from unique_log_filter import UniqueLogFilter
13
13
 
14
14
 
15
15
  from angr.utils.graph import GraphUtils
16
- from angr.utils.lazy_import import lazy_import
17
- from angr.utils import is_pyinstaller
18
16
  from angr.utils.graph import dominates, inverted_idoms
19
17
  from angr.utils.ail import is_head_controlled_loop_block
20
18
  from angr.block import Block, BlockNode
@@ -37,12 +35,6 @@ from .structuring.structurer_nodes import (
37
35
  from .graph_region import GraphRegion
38
36
  from .utils import peephole_optimize_expr
39
37
 
40
- if is_pyinstaller():
41
- # PyInstaller is not happy with lazy import
42
- import sympy
43
- else:
44
- sympy = lazy_import("sympy")
45
-
46
38
 
47
39
  l = logging.getLogger(__name__)
48
40
  l.addFilter(UniqueLogFilter())
@@ -953,6 +945,9 @@ class ConditionProcessor:
953
945
 
954
946
  @staticmethod
955
947
  def claripy_ast_to_sympy_expr(ast, memo=None):
948
+
949
+ import sympy # pylint:disable=import-outside-toplevel
950
+
956
951
  if ast.op == "And":
957
952
  return sympy.And(*(ConditionProcessor.claripy_ast_to_sympy_expr(arg, memo=memo) for arg in ast.args))
958
953
  if ast.op == "Or":
@@ -974,6 +969,9 @@ class ConditionProcessor:
974
969
 
975
970
  @staticmethod
976
971
  def sympy_expr_to_claripy_ast(expr, memo: dict):
972
+
973
+ import sympy # pylint:disable=import-outside-toplevel
974
+
977
975
  if expr.is_Symbol:
978
976
  return memo[expr]
979
977
  if isinstance(expr, sympy.Or):
@@ -990,6 +988,9 @@ class ConditionProcessor:
990
988
 
991
989
  @staticmethod
992
990
  def simplify_condition(cond, depth_limit=8, variables_limit=8):
991
+
992
+ import sympy # pylint:disable=import-outside-toplevel
993
+
993
994
  memo = {}
994
995
  if cond.depth > depth_limit or len(cond.variables) > variables_limit:
995
996
  return cond
@@ -43,6 +43,7 @@ from .sar_to_signed_div import SarToSignedDiv
43
43
  from .tidy_stack_addr import TidyStackAddr
44
44
  from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalConjunctionsAndDisjunctions
45
45
  from .rol_ror import RolRorRewriter
46
+ from .inlined_memcpy import InlinedMemcpy
46
47
  from .inlined_strcpy import InlinedStrcpy
47
48
  from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
48
49
  from .inlined_wstrcpy import InlinedWstrcpy
@@ -99,6 +100,7 @@ ALL_PEEPHOLE_OPTS: list[type[PeepholeOptimizationExprBase]] = [
99
100
  TidyStackAddr,
100
101
  InvertNegatedLogicalConjunctionsAndDisjunctions,
101
102
  RolRorRewriter,
103
+ InlinedMemcpy,
102
104
  InlinedStrcpy,
103
105
  InlinedStrcpyConsolidation,
104
106
  InlinedWstrcpy,
@@ -0,0 +1,78 @@
1
+ # pylint:disable=arguments-differ
2
+ from __future__ import annotations
3
+
4
+ from angr.ailment.expression import Const, StackBaseOffset, VirtualVariable, Load, UnaryOp
5
+ from angr.ailment.statement import Call, Assignment, Store
6
+ from angr import SIM_LIBRARIES
7
+ from .base import PeepholeOptimizationStmtBase
8
+
9
+
10
+ class InlinedMemcpy(PeepholeOptimizationStmtBase):
11
+ """
12
+ Simplifies inlined data copying logic into calls to memcpy.
13
+ """
14
+
15
+ __slots__ = ()
16
+
17
+ NAME = "Simplifying inlined strcpy"
18
+ stmt_classes = (Assignment, Store)
19
+
20
+ def optimize(self, stmt: Assignment | Store, stmt_idx: int | None = None, block=None, **kwargs):
21
+ should_replace = False
22
+ dst_offset, src_offset, store_size = None, None, None
23
+ if (
24
+ isinstance(stmt, Assignment)
25
+ and isinstance(stmt.dst, VirtualVariable)
26
+ and stmt.dst.was_stack
27
+ and stmt.dst.size == 16
28
+ and isinstance(stmt.src, Load)
29
+ ):
30
+ dst_offset = stmt.dst.stack_offset
31
+ store_size = stmt.dst.size
32
+ if (
33
+ isinstance(stmt.src.addr, UnaryOp)
34
+ and stmt.src.addr.op == "Reference"
35
+ and isinstance(stmt.src.addr.operand, VirtualVariable)
36
+ ):
37
+ should_replace = True
38
+ src_offset = stmt.src.addr.operand.stack_offset
39
+ elif isinstance(stmt.src.addr, StackBaseOffset):
40
+ should_replace = True
41
+ src_offset = stmt.src.addr.offset
42
+
43
+ if (
44
+ isinstance(stmt, Store)
45
+ and isinstance(stmt.addr, StackBaseOffset)
46
+ and stmt.size == 16
47
+ and isinstance(stmt.data, Load)
48
+ ):
49
+ dst_offset = stmt.addr.offset
50
+ store_size = stmt.size
51
+ if (
52
+ isinstance(stmt.data.addr, UnaryOp)
53
+ and stmt.data.addr.op == "Reference"
54
+ and isinstance(stmt.data.addr.operand, VirtualVariable)
55
+ ):
56
+ should_replace = True
57
+ src_offset = stmt.data.addr.operand.stack_offset
58
+ elif isinstance(stmt.data.addr, StackBaseOffset):
59
+ should_replace = True
60
+ src_offset = stmt.data.addr.offset
61
+
62
+ if should_replace:
63
+ assert dst_offset is not None and src_offset is not None and store_size is not None
64
+ # replace it with a call to memcpy
65
+ assert self.project is not None
66
+ return Call(
67
+ stmt.idx,
68
+ "memcpy",
69
+ args=[
70
+ StackBaseOffset(None, self.project.arch.bits, dst_offset),
71
+ StackBaseOffset(None, self.project.arch.bits, src_offset),
72
+ Const(None, None, store_size, self.project.arch.bits),
73
+ ],
74
+ prototype=SIM_LIBRARIES["libc.so"][0].get_prototype("memcpy"),
75
+ **stmt.tags,
76
+ )
77
+
78
+ return None
@@ -1,11 +1,11 @@
1
- # pylint:disable=arguments-differ
1
+ # pylint:disable=arguments-differ,too-many-boolean-expressions
2
2
  from __future__ import annotations
3
3
  import string
4
4
 
5
5
  from archinfo import Endness
6
6
 
7
- from angr.ailment.expression import Const, StackBaseOffset, VirtualVariable
8
- from angr.ailment.statement import Call, Assignment
7
+ from angr.ailment.expression import Const, StackBaseOffset, VirtualVariable, UnaryOp
8
+ from angr.ailment.statement import Call, Assignment, Store, Statement
9
9
 
10
10
  from angr import SIM_LIBRARIES
11
11
  from angr.utils.endness import ail_const_to_be
@@ -24,24 +24,54 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
24
24
  __slots__ = ()
25
25
 
26
26
  NAME = "Simplifying inlined strcpy"
27
- stmt_classes = (Assignment,)
27
+ stmt_classes = (Assignment, Store)
28
+
29
+ def optimize(self, stmt: Assignment | Store, stmt_idx: int | None = None, block=None, **kwargs):
30
+ inlined_strcpy_candidate = False
31
+ src: Const | None = None
32
+ strcpy_dst: StackBaseOffset | UnaryOp | None = None
33
+
34
+ assert self.project is not None
28
35
 
29
- def optimize(self, stmt: Assignment, stmt_idx: int | None = None, block=None, **kwargs):
30
36
  if (
31
- isinstance(stmt.dst, VirtualVariable)
37
+ isinstance(stmt, Assignment)
38
+ and isinstance(stmt.dst, VirtualVariable)
32
39
  and stmt.dst.was_stack
33
40
  and isinstance(stmt.src, Const)
34
41
  and isinstance(stmt.src.value, int)
35
42
  ):
36
- r, s = self.is_integer_likely_a_string(stmt.src.value, stmt.src.size, self.project.arch.memory_endness)
43
+ inlined_strcpy_candidate = True
44
+ src = stmt.src
45
+ strcpy_dst = StackBaseOffset(None, self.project.arch.bits, stmt.dst.stack_offset)
46
+ elif (
47
+ isinstance(stmt, Store)
48
+ and isinstance(stmt.addr, UnaryOp)
49
+ and stmt.addr.op == "Reference"
50
+ and isinstance(stmt.addr.operand, VirtualVariable)
51
+ and stmt.addr.operand.was_stack
52
+ and isinstance(stmt.data, Const)
53
+ and isinstance(stmt.data.value, int)
54
+ ):
55
+ inlined_strcpy_candidate = True
56
+ src = stmt.data
57
+ strcpy_dst = stmt.addr
58
+
59
+ if inlined_strcpy_candidate:
60
+ assert src is not None and strcpy_dst is not None
61
+ assert isinstance(src.value, int)
62
+ assert self.kb is not None
63
+
64
+ r, s = self.is_integer_likely_a_string(src.value, src.size, self.project.arch.memory_endness)
37
65
  if r:
66
+ assert s is not None
67
+
38
68
  # replace it with a call to strncpy
39
69
  str_id = self.kb.custom_strings.allocate(s.encode("ascii"))
40
70
  return Call(
41
71
  stmt.idx,
42
72
  "strncpy",
43
73
  args=[
44
- StackBaseOffset(None, self.project.arch.bits, stmt.dst.stack_offset),
74
+ strcpy_dst,
45
75
  Const(None, None, str_id, self.project.arch.bits, custom_string=True),
46
76
  Const(None, None, len(s), self.project.arch.bits),
47
77
  ],
@@ -68,9 +98,21 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
68
98
  next_offset = None
69
99
  stride = []
70
100
 
101
+ if not stride:
102
+ return None
103
+ min_stride_stmt_idx = min(stmt_idx_ for _, stmt_idx_, _ in stride)
104
+ if min_stride_stmt_idx > stmt_idx:
105
+ # the current statement is not involved in the stride. we can't simplify here, otherwise we
106
+ # will incorrectly remove the current statement
107
+ return None
108
+
71
109
  integer, size = self.stride_to_int(stride)
72
- r, s = self.is_integer_likely_a_string(integer, size, Endness.BE)
110
+ prev_stmt = None if stmt_idx == 0 else block.statements[stmt_idx - 1]
111
+ min_str_length = 1 if prev_stmt is not None and self.is_inlined_strcpy(prev_stmt) else 4
112
+ r, s = self.is_integer_likely_a_string(integer, size, Endness.BE, min_length=min_str_length)
73
113
  if r:
114
+ assert s is not None
115
+
74
116
  # we remove all involved statements whose statement IDs are greater than the current one
75
117
  for _, stmt_idx_, _ in reversed(stride):
76
118
  if stmt_idx_ <= stmt_idx:
@@ -83,7 +125,7 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
83
125
  stmt.idx,
84
126
  "strncpy",
85
127
  args=[
86
- StackBaseOffset(None, self.project.arch.bits, stmt.dst.stack_offset),
128
+ strcpy_dst,
87
129
  Const(None, None, str_id, self.project.arch.bits, custom_string=True),
88
130
  Const(None, None, len(s), self.project.arch.bits),
89
131
  ],
@@ -101,10 +143,13 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
101
143
  for _, _, v in stride:
102
144
  size += v.size
103
145
  n <<= v.bits
146
+ assert isinstance(v.value, int)
104
147
  n |= v.value
105
148
  return n, size
106
149
 
107
150
  def collect_constant_stores(self, block, starting_stmt_idx: int) -> dict[int, tuple[int, Const | None]]:
151
+ assert self.project is not None
152
+
108
153
  r = {}
109
154
  for idx, stmt in enumerate(block.statements):
110
155
  if idx < starting_stmt_idx:
@@ -158,3 +203,15 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
158
203
  return False, None
159
204
  return True, "".join(chars)
160
205
  return False, None
206
+
207
+ @staticmethod
208
+ def is_inlined_strcpy(stmt: Statement) -> bool:
209
+ return (
210
+ isinstance(stmt, Call)
211
+ and isinstance(stmt.target, str)
212
+ and stmt.target == "strncpy"
213
+ and stmt.args is not None
214
+ and len(stmt.args) == 3
215
+ and isinstance(stmt.args[1], Const)
216
+ and hasattr(stmt.args[1], "custom_string")
217
+ )
@@ -1,7 +1,7 @@
1
1
  # pylint:disable=arguments-differ
2
2
  from __future__ import annotations
3
3
 
4
- from angr.ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset
4
+ from angr.ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset, UnaryOp, VirtualVariable
5
5
  from angr.ailment.statement import Call, Store
6
6
 
7
7
  from angr import SIM_LIBRARIES
@@ -21,12 +21,12 @@ class InlinedStrcpyConsolidation(PeepholeOptimizationMultiStmtBase):
21
21
 
22
22
  def optimize(self, stmts: list[Call], **kwargs):
23
23
  last_stmt, stmt = stmts
24
- if InlinedStrcpyConsolidation._is_inlined_strcpy(last_stmt):
24
+ if InlinedStrcpy.is_inlined_strcpy(last_stmt):
25
25
  s_last: bytes = self.kb.custom_strings[last_stmt.args[1].value]
26
26
  addr_last = last_stmt.args[0]
27
27
  new_str = None # will be set if consolidation should happen
28
28
 
29
- if isinstance(stmt, Call) and InlinedStrcpyConsolidation._is_inlined_strcpy(stmt):
29
+ if isinstance(stmt, Call) and InlinedStrcpy.is_inlined_strcpy(stmt):
30
30
  # consolidating two calls
31
31
  s_curr: bytes = self.kb.custom_strings[stmt.args[1].value]
32
32
  addr_curr = stmt.args[0]
@@ -74,22 +74,19 @@ class InlinedStrcpyConsolidation(PeepholeOptimizationMultiStmtBase):
74
74
 
75
75
  return None
76
76
 
77
- @staticmethod
78
- def _is_inlined_strcpy(stmt: Call):
79
- return (
80
- isinstance(stmt.target, str)
81
- and stmt.target == "strncpy"
82
- and len(stmt.args) == 3
83
- and isinstance(stmt.args[1], Const)
84
- and hasattr(stmt.args[1], "custom_string")
85
- )
86
-
87
77
  @staticmethod
88
78
  def _parse_addr(addr: Expression) -> tuple[Expression, int]:
89
79
  if isinstance(addr, Register):
90
80
  return addr, 0
91
81
  if isinstance(addr, StackBaseOffset):
92
82
  return StackBaseOffset(None, addr.bits, 0), addr.offset
83
+ if (
84
+ isinstance(addr, UnaryOp)
85
+ and addr.op == "Reference"
86
+ and isinstance(addr.operand, VirtualVariable)
87
+ and addr.operand.was_stack
88
+ ):
89
+ return StackBaseOffset(None, addr.bits, 0), addr.operand.stack_offset
93
90
  if isinstance(addr, BinaryOp):
94
91
  if addr.op == "Add" and isinstance(addr.operands[1], Const):
95
92
  base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr.operands[0])