angr 9.2.77__py3-none-win_amd64.whl → 9.2.79__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (46) hide show
  1. angr/__init__.py +1 -1
  2. angr/__main__.py +34 -0
  3. angr/analyses/calling_convention.py +15 -12
  4. angr/analyses/cfg/cfg_fast.py +12 -0
  5. angr/analyses/complete_calling_conventions.py +5 -2
  6. angr/analyses/decompiler/ail_simplifier.py +2 -2
  7. angr/analyses/decompiler/block_simplifier.py +25 -5
  8. angr/analyses/decompiler/clinic.py +27 -17
  9. angr/analyses/decompiler/optimization_passes/__init__.py +2 -0
  10. angr/analyses/decompiler/optimization_passes/engine_base.py +2 -2
  11. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +2 -2
  12. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +105 -12
  13. angr/analyses/decompiler/peephole_optimizations/__init__.py +11 -2
  14. angr/analyses/decompiler/peephole_optimizations/base.py +29 -2
  15. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  16. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +83 -0
  17. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +103 -0
  18. angr/analyses/decompiler/structured_codegen/c.py +20 -4
  19. angr/analyses/decompiler/utils.py +128 -2
  20. angr/analyses/disassembly.py +8 -1
  21. angr/analyses/propagator/engine_ail.py +9 -2
  22. angr/analyses/proximity_graph.py +30 -0
  23. angr/analyses/variable_recovery/engine_ail.py +1 -1
  24. angr/analyses/variable_recovery/engine_vex.py +10 -1
  25. angr/blade.py +14 -2
  26. angr/block.py +4 -0
  27. angr/knowledge_plugins/__init__.py +1 -0
  28. angr/knowledge_plugins/custom_strings.py +40 -0
  29. angr/knowledge_plugins/functions/function.py +58 -38
  30. angr/knowledge_plugins/key_definitions/live_definitions.py +1 -1
  31. angr/knowledge_plugins/propagations/prop_value.py +6 -2
  32. angr/knowledge_plugins/variables/variable_manager.py +1 -1
  33. angr/lib/angr_native.dll +0 -0
  34. angr/sim_state.py +0 -2
  35. angr/sim_type.py +3 -0
  36. angr/storage/memory_mixins/__init__.pyi +49 -0
  37. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +7 -1
  38. angr/utils/graph.py +20 -4
  39. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/METADATA +6 -6
  40. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/RECORD +46 -40
  41. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/WHEEL +1 -1
  42. angr-9.2.79.dist-info/entry_points.txt +2 -0
  43. tests/analyses/cfg/test_cfgemulated.py +1 -1
  44. tests/storage/test_multivalues.py +18 -0
  45. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/LICENSE +0 -0
  46. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,103 @@
1
+ # pylint:disable=arguments-differ
2
+ from typing import List, Tuple, Optional
3
+
4
+ from ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset
5
+ from ailment.statement import Call, Store
6
+
7
+ from .base import PeepholeOptimizationMultiStmtBase
8
+ from .inlined_strcpy import InlinedStrcpy
9
+
10
+
11
+ class InlinedStrcpyConsolidation(PeepholeOptimizationMultiStmtBase):
12
+ """
13
+ Consolidate multiple inlined strcpy calls.
14
+ """
15
+
16
+ __slots__ = ()
17
+
18
+ NAME = "Consolidate multiple inlined strcpy calls"
19
+ stmt_classes = ((Call, Call), (Call, Store))
20
+
21
+ def optimize(self, stmts: List[Call], **kwargs):
22
+ last_stmt, stmt = stmts
23
+ if InlinedStrcpyConsolidation._is_inlined_strcpy(last_stmt):
24
+ s_last: bytes = self.kb.custom_strings[last_stmt.args[1].value]
25
+ addr_last = last_stmt.args[0]
26
+ new_str = None # will be set if consolidation should happen
27
+
28
+ if isinstance(stmt, Call) and InlinedStrcpyConsolidation._is_inlined_strcpy(stmt):
29
+ # consolidating two calls
30
+ s_curr: bytes = self.kb.custom_strings[stmt.args[1].value]
31
+ addr_curr = stmt.args[0]
32
+ # determine if the two addresses are consecutive
33
+ delta = self._get_delta(addr_last, addr_curr)
34
+ if delta is not None and delta == len(s_last):
35
+ # consolidate both calls!
36
+ new_str = s_last + s_curr
37
+ elif isinstance(stmt, Store) and isinstance(stmt.data, Const):
38
+ # consolidating a call and a store, in case the store statement is storing the suffix of a string (but
39
+ # the suffix is too short to qualify an inlined strcpy optimization)
40
+ addr_curr = stmt.addr
41
+ delta = self._get_delta(addr_last, addr_curr)
42
+ if delta is not None and delta == len(s_last):
43
+ if stmt.size == 1 and stmt.data.value == 0:
44
+ # it's probably the terminating null byte
45
+ r, s = True, "\x00"
46
+ else:
47
+ r, s = InlinedStrcpy.is_integer_likely_a_string(
48
+ stmt.data.value, stmt.size, stmt.endness, min_length=1
49
+ )
50
+ if r:
51
+ new_str = s_last + s.encode("ascii")
52
+
53
+ if new_str is not None:
54
+ if new_str.endswith(b"\x00"):
55
+ call_name = "strcpy"
56
+ new_str_idx = self.kb.custom_strings.allocate(new_str[:-1])
57
+ args = [
58
+ last_stmt.args[0],
59
+ Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True),
60
+ ]
61
+ else:
62
+ call_name = "strncpy"
63
+ new_str_idx = self.kb.custom_strings.allocate(new_str)
64
+ args = [
65
+ last_stmt.args[0],
66
+ Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True),
67
+ Const(None, None, len(new_str), self.project.arch.bits),
68
+ ]
69
+
70
+ return [Call(stmt.idx, call_name, args=args, **stmt.tags)]
71
+
72
+ return None
73
+
74
+ @staticmethod
75
+ def _is_inlined_strcpy(stmt: Call):
76
+ if isinstance(stmt.target, str) and stmt.target == "strncpy":
77
+ if len(stmt.args) == 3 and isinstance(stmt.args[1], Const) and hasattr(stmt.args[1], "custom_string"):
78
+ return True
79
+ return False
80
+
81
+ @staticmethod
82
+ def _parse_addr(addr: Expression) -> Tuple[Expression, int]:
83
+ if isinstance(addr, Register):
84
+ return addr, 0
85
+ if isinstance(addr, StackBaseOffset):
86
+ return StackBaseOffset(None, addr.bits, 0), addr.offset
87
+ if isinstance(addr, BinaryOp):
88
+ if addr.op == "Add" and isinstance(addr.operands[1], Const):
89
+ base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr.operands[0])
90
+ return base_0, offset_0 + addr.operands[1].value
91
+ if addr.op == "Sub" and isinstance(addr.operands[1], Const):
92
+ base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr.operands[0])
93
+ return base_0, offset_0 - addr.operands[1].value
94
+
95
+ return addr, 0
96
+
97
+ @staticmethod
98
+ def _get_delta(addr_0: Expression, addr_1: Expression) -> Optional[int]:
99
+ base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr_0)
100
+ base_1, offset_1 = InlinedStrcpyConsolidation._parse_addr(addr_1)
101
+ if base_0.likes(base_1):
102
+ return offset_1 - offset_0
103
+ return None
@@ -2037,11 +2037,22 @@ class CConstant(CExpression):
2037
2037
  return
2038
2038
  yield hex(self.reference_values[self._type]), self
2039
2039
  elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeChar):
2040
- refval = self.reference_values[self._type] # angr.knowledge_plugin.cfg.MemoryData
2041
- yield CConstant.str_to_c_str(refval.content.decode("utf-8")), self
2040
+ refval = self.reference_values[self._type]
2041
+ if isinstance(refval, MemoryData):
2042
+ v = refval.content.decode("utf-8")
2043
+ else:
2044
+ # it's a string
2045
+ assert isinstance(v, str)
2046
+ v = refval
2047
+ yield CConstant.str_to_c_str(v), self
2042
2048
  elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeWideChar):
2043
- refval = self.reference_values[self._type] # angr.knowledge_plugin.cfg.MemoryData
2044
- yield CConstant.str_to_c_str(refval.content.decode("utf_16_le"), prefix="L"), self
2049
+ refval = self.reference_values[self._type]
2050
+ if isinstance(refval, MemoryData):
2051
+ v = refval.content.decode("utf_16_le")
2052
+ else:
2053
+ # it's a string
2054
+ v = refval
2055
+ yield CConstant.str_to_c_str(v, prefix="L"), self
2045
2056
  else:
2046
2057
  if isinstance(self.reference_values[self._type], int):
2047
2058
  yield self.fmt_int(self.reference_values[self._type]), self
@@ -3199,6 +3210,11 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3199
3210
  inline_string = False
3200
3211
  function_pointer = False
3201
3212
 
3213
+ if reference_values is None and hasattr(expr, "reference_values"):
3214
+ reference_values = expr.reference_values.copy()
3215
+ if reference_values:
3216
+ type_ = next(iter(reference_values))
3217
+
3202
3218
  if reference_values is None:
3203
3219
  reference_values = {}
3204
3220
  type_ = unpack_typeref(type_)
@@ -1,9 +1,14 @@
1
- # pylint:disable=wrong-import-position
2
- from typing import Optional, Tuple, Any, Union, List
1
+ import pathlib
2
+ from typing import Optional, Tuple, Any, Union, List, Iterable
3
+ import logging
3
4
 
4
5
  import networkx
6
+ from rich.progress import track
5
7
 
6
8
  import ailment
9
+ import angr
10
+
11
+ _l = logging.getLogger(__name__)
7
12
 
8
13
 
9
14
  def remove_last_statement(node):
@@ -533,6 +538,127 @@ def peephole_optimize_stmts(block, stmt_opts):
533
538
  return statements, any_update
534
539
 
535
540
 
541
+ def match_stmt_classes(all_stmts: List, idx: int, stmt_class_seq: Iterable[type]) -> bool:
542
+ for i, cls in enumerate(stmt_class_seq):
543
+ if idx + i >= len(all_stmts):
544
+ return False
545
+ if not isinstance(all_stmts[idx + i], cls):
546
+ return False
547
+ return True
548
+
549
+
550
+ def peephole_optimize_multistmts(block, stmt_opts):
551
+ any_update = False
552
+ statements = block.statements[::]
553
+
554
+ # run multi-statement optimizers
555
+ stmt_idx = 0
556
+ while stmt_idx < len(statements):
557
+ redo = True
558
+ while redo and stmt_idx < len(statements):
559
+ redo = False
560
+ for opt in stmt_opts:
561
+ matched = False
562
+ stmt_seq_len = None
563
+ for stmt_class_seq in opt.stmt_classes:
564
+ if match_stmt_classes(statements, stmt_idx, stmt_class_seq):
565
+ stmt_seq_len = len(stmt_class_seq)
566
+ matched = True
567
+ break
568
+
569
+ if matched:
570
+ matched_stmts = statements[stmt_idx : stmt_idx + stmt_seq_len]
571
+ r = opt.optimize(matched_stmts, stmt_idx=stmt_idx, block=block)
572
+ if r is not None:
573
+ # update statements
574
+ statements = statements[:stmt_idx] + r + statements[stmt_idx + stmt_seq_len :]
575
+ any_update = True
576
+ redo = True
577
+ break
578
+
579
+ # move on to the next statement
580
+ stmt_idx += 1
581
+
582
+ return statements, any_update
583
+
584
+
585
+ def decompile_functions(path, functions=None, structurer=None, catch_errors=True) -> Optional[str]:
586
+ """
587
+ Decompile a binary into a set of functions.
588
+
589
+ :param path: The path to the binary to decompile.
590
+ :param functions: The functions to decompile. If None, all functions will be decompiled.
591
+ :param structurer: The structuring algorithms to use.
592
+ :param catch_errors: The structuring algorithms to use.
593
+ :return: The decompilation of all functions appended in order.
594
+ """
595
+ # delayed imports to avoid circular imports
596
+ from angr.analyses.decompiler.decompilation_options import PARAM_TO_OPTION
597
+
598
+ structurer = structurer or "phoenix"
599
+ path = pathlib.Path(path).resolve().absolute()
600
+ proj = angr.Project(path, auto_load_libs=False)
601
+ cfg = proj.analyses.CFG(normalize=True, data_references=True)
602
+ proj.analyses.CompleteCallingConventions(recover_variables=True, analyze_callsites=True)
603
+
604
+ # collect all functions when None are provided
605
+ if functions is None:
606
+ functions = cfg.functions.values()
607
+
608
+ # normalize the functions that could be ints as names
609
+ normalized_functions = []
610
+ for func in functions:
611
+ try:
612
+ normalized_name = int(func, 0)
613
+ except ValueError:
614
+ normalized_name = func
615
+ normalized_functions.append(normalized_name)
616
+ functions = normalized_functions
617
+
618
+ # verify that all functions exist
619
+ for func in functions:
620
+ if func not in cfg.functions:
621
+ raise ValueError(f"Function {func} does not exist in the CFG.")
622
+
623
+ # decompile all functions
624
+ decompilation = ""
625
+ dec_options = [
626
+ (PARAM_TO_OPTION["structurer_cls"], structurer),
627
+ ]
628
+ for func in track(functions, description="Decompiling functions", transient=True):
629
+ f = cfg.functions[func]
630
+ if f is None or f.is_plt:
631
+ continue
632
+
633
+ exception_string = ""
634
+ if not catch_errors:
635
+ dec = proj.analyses.Decompiler(f, cfg=cfg, options=dec_options)
636
+ else:
637
+ try:
638
+ # TODO: add a timeout
639
+ dec = proj.analyses.Decompiler(f, cfg=cfg, options=dec_options)
640
+ except Exception as e:
641
+ exception_string = str(e).replace("\n", " ")
642
+ dec = None
643
+
644
+ # do sanity checks on decompilation, skip checks if we already errored
645
+ if not exception_string:
646
+ if dec is None or not dec.codegen or not dec.codegen.text:
647
+ exception_string = "Decompilation had no code output (failed in Dec)"
648
+ elif "{\n}" in dec.codegen.text:
649
+ exception_string = "Decompilation outputted an empty function (failed in structuring)"
650
+ elif structurer in ["dream", "combing"] and "goto" in dec.codegen.text:
651
+ exception_string = "Decompilation outputted a goto for a Gotoless algorithm (failed in structuring)"
652
+
653
+ if exception_string:
654
+ _l.critical("Failed to decompile %s because %s", str(func), exception_string)
655
+ decompilation += f"// [error: {func} | {exception_string}]\n"
656
+ else:
657
+ decompilation += dec.codegen.text + "\n"
658
+
659
+ return decompilation
660
+
661
+
536
662
  # delayed import
537
663
  from .structuring.structurer_nodes import (
538
664
  MultiNode,
@@ -219,6 +219,14 @@ class Instruction(DisassemblyPiece):
219
219
  for operand in dummy_operands:
220
220
  opr_pieces = self.split_op_string(operand)
221
221
  cur_operand = []
222
+
223
+ if not (operand and opr_pieces):
224
+ # opr_pieces may contain empty string when invalid disasm
225
+ # result is generated by capstone
226
+ l.error(f'Failed to parse insn "{self.insn}". Please report.')
227
+ self.operands.clear()
228
+ break
229
+
222
230
  if opr_pieces[0][0].isalpha() and opr_pieces[0] in self.arch.registers:
223
231
  cur_operand.append(Register(opr_pieces[0]))
224
232
  # handle register's suffix (e.g. "sp!", "d0[1]", "v0.16b")
@@ -269,7 +277,6 @@ class Instruction(DisassemblyPiece):
269
277
 
270
278
  if len(self.operands) == 0 and len(self.insn.operands) != 0:
271
279
  l.error("Operand parsing failed for instruction %s at address %x", str(self.insn), self.insn.address)
272
- self.operands = []
273
280
  return
274
281
 
275
282
  @staticmethod
@@ -302,8 +302,15 @@ class SimEnginePropagatorAIL(
302
302
  if 0 in current_reg_value.offset_and_details:
303
303
  detail = current_reg_value.offset_and_details[0]
304
304
  if detail.def_at == def_at:
305
- l.debug("Add a replacement: %s with %s", expr, reg_atom)
306
- self.state.add_replacement(self._codeloc(), expr, reg_atom)
305
+ outdated = False
306
+ outdated_, has_avoid_ = self.is_using_outdated_def(
307
+ detail.expr, detail.def_at, self._codeloc(), avoid=expr
308
+ )
309
+ if outdated_ or has_avoid_:
310
+ outdated = True
311
+ if not outdated:
312
+ l.debug("Add a replacement: %s with %s", expr, reg_atom)
313
+ self.state.add_replacement(self._codeloc(), expr, reg_atom)
307
314
  top = self.state.top(expr.size * self.arch.byte_width)
308
315
  return PropValue.from_value_and_details(top, expr.size, expr, self._codeloc())
309
316
 
@@ -180,6 +180,33 @@ class ProximityGraphAnalysis(Analysis):
180
180
 
181
181
  self._work()
182
182
 
183
+ def _condense_blank_nodes(self, graph: networkx.DiGraph) -> None:
184
+ nodes = list(graph.nodes)
185
+ blank_nodes: List[BaseProxiNode] = []
186
+
187
+ for node in nodes:
188
+ if isinstance(node, BaseProxiNode) and node.type_ == ProxiNodeTypes.Empty:
189
+ blank_nodes.append(node)
190
+ else:
191
+ if blank_nodes:
192
+ self._merge_nodes(graph, blank_nodes)
193
+ blank_nodes = []
194
+
195
+ if blank_nodes:
196
+ self._merge_nodes(graph, blank_nodes)
197
+
198
+ def _merge_nodes(self, graph: networkx.DiGraph, nodes: List[BaseProxiNode]) -> None:
199
+ for node in nodes:
200
+ predecessors = set(graph.predecessors(node))
201
+ successors = set(graph.successors(node))
202
+
203
+ for pred in predecessors:
204
+ for succ in successors:
205
+ edge_data = graph.get_edge_data(pred, node) or {}
206
+ graph.add_edge(pred, succ, **edge_data)
207
+
208
+ graph.remove_node(node)
209
+
183
210
  def _work(self):
184
211
  self.graph = networkx.DiGraph()
185
212
 
@@ -210,6 +237,9 @@ class ProximityGraphAnalysis(Analysis):
210
237
  self.graph.add_nodes_from(subgraph.nodes())
211
238
  self.graph.add_edges_from(subgraph.edges())
212
239
 
240
+ # condense blank nodes after the graph has been constructed
241
+ self._condense_blank_nodes(self.graph)
242
+
213
243
  def _endnode_connector(self, func: "Function", subgraph: networkx.DiGraph):
214
244
  """
215
245
  Properly connect expanded function call's to proximity graph.
@@ -124,7 +124,7 @@ class SimEngineVRAIL(
124
124
  prototype = stmt.prototype
125
125
  elif isinstance(stmt.target, ailment.Expr.Const):
126
126
  func_addr = stmt.target.value
127
- if func_addr in self.kb.functions:
127
+ if isinstance(func_addr, self.kb.functions.address_types) and func_addr in self.kb.functions:
128
128
  func = self.kb.functions[func_addr]
129
129
  prototype = func.prototype
130
130
 
@@ -179,7 +179,16 @@ class SimEngineVRVEX(
179
179
  if func.prototype is None or func.calling_convention is None:
180
180
  return
181
181
 
182
- for arg_loc in func.calling_convention.arg_locs(func.prototype):
182
+ try:
183
+ arg_locs = func.calling_convention.arg_locs(func.prototype)
184
+ except (TypeError, ValueError):
185
+ func.prototype = None
186
+ return
187
+
188
+ if None in arg_locs:
189
+ return
190
+
191
+ for arg_loc in arg_locs:
183
192
  for loc in arg_loc.get_footprint():
184
193
  if isinstance(loc, SimRegArg):
185
194
  self._read_from_register(self.arch.registers[loc.reg_name][0] + loc.reg_offset, loc.size)
angr/blade.py CHANGED
@@ -1,3 +1,4 @@
1
+ # pylint:disable=unnecessary-dunder-call
1
2
  import itertools
2
3
 
3
4
  import networkx
@@ -10,6 +11,12 @@ from .utils.constants import DEFAULT_STATEMENT
10
11
  from .slicer import SimSlicer
11
12
 
12
13
 
14
+ class BadJumpkindNotification(Exception):
15
+ """
16
+ Notifies the caller that the jumpkind is bad (e.g., Ijk_NoDecode)
17
+ """
18
+
19
+
13
20
  class Blade:
14
21
  """
15
22
  Blade is a light-weight program slicer that works with networkx DiGraph containing CFGNodes.
@@ -177,6 +184,8 @@ class Blade:
177
184
  irsb = self.project.factory.block(
178
185
  v, cross_insn_opt=self._cross_insn_opt, backup_state=self._base_state
179
186
  ).vex
187
+ if irsb.jumpkind == "Ijk_NoDecode":
188
+ raise BadJumpkindNotification()
180
189
  self._run_cache[v] = irsb
181
190
  return irsb
182
191
  else:
@@ -248,7 +257,7 @@ class Blade:
248
257
  # Retrieve the target: are we slicing from a register(IRStmt.Put), or a temp(IRStmt.WrTmp)?
249
258
  try:
250
259
  stmts = self._get_irsb(self._dst_run).statements
251
- except SimTranslationError:
260
+ except (SimTranslationError, BadJumpkindNotification):
252
261
  return
253
262
 
254
263
  if self._dst_stmt_idx != -1:
@@ -337,7 +346,10 @@ class Blade:
337
346
  regs = regs.copy()
338
347
 
339
348
  irsb_addr = self._get_addr(run)
340
- stmts = self._get_irsb(run).statements
349
+ try:
350
+ stmts = self._get_irsb(run).statements
351
+ except (SimTranslationError, BadJumpkindNotification):
352
+ return
341
353
 
342
354
  if exit_stmt_idx is None or exit_stmt_idx == DEFAULT_STATEMENT:
343
355
  # Initialize the temps set with whatever in the `next` attribute of this irsb
angr/block.py CHANGED
@@ -427,6 +427,10 @@ class Block(Serializable):
427
427
 
428
428
  @property
429
429
  def instruction_addrs(self):
430
+ if self.size == 0:
431
+ # hooks and other pseudo-functions
432
+ return []
433
+
430
434
  if not self._instruction_addrs and self._vex is None:
431
435
  # initialize instruction addrs
432
436
  _ = self.vex
@@ -15,3 +15,4 @@ from .propagations import PropagationManager
15
15
  from .structured_code import StructuredCodeManager
16
16
  from .types import TypesStore
17
17
  from .callsite_prototypes import CallsitePrototypes
18
+ from .custom_strings import CustomStrings
@@ -0,0 +1,40 @@
1
+ from typing import Dict
2
+
3
+ from .plugin import KnowledgeBasePlugin
4
+
5
+
6
+ class CustomStrings(KnowledgeBasePlugin):
7
+ """
8
+ Store new strings that are recovered during various analysis. Each string has a unique ID associated.
9
+ """
10
+
11
+ def __init__(self, kb):
12
+ super().__init__()
13
+ self._kb = kb
14
+
15
+ self.string_id = 0
16
+ self.strings: Dict[int, bytes] = {}
17
+
18
+ def allocate(self, s: bytes) -> int:
19
+ # de-duplication
20
+ # TODO: Use a reverse map if this becomes a bottle-neck in the future
21
+ for idx, string in self.strings.items():
22
+ if string == s:
23
+ return idx
24
+
25
+ string_id = self.string_id
26
+ self.strings[string_id] = s
27
+ self.string_id += 1
28
+ return string_id
29
+
30
+ def __getitem__(self, idx):
31
+ return self.strings[idx]
32
+
33
+ def copy(self):
34
+ o = CustomStrings(self._kb)
35
+ o.strings = self.strings.copy()
36
+ o.string_id = self.string_id
37
+ return o
38
+
39
+
40
+ KnowledgeBasePlugin.register_default("custom_strings", CustomStrings)
@@ -1,7 +1,6 @@
1
1
  import os
2
2
  import logging
3
3
  import networkx
4
- import string
5
4
  import itertools
6
5
  from collections import defaultdict
7
6
  from typing import Union, Optional, Iterable, Set
@@ -14,6 +13,7 @@ from archinfo.arch_arm import get_real_address_if_arm
14
13
  import claripy
15
14
 
16
15
  from angr.block import Block
16
+ from angr.knowledge_plugins.cfg.memory_data import MemoryDataSort
17
17
 
18
18
  from ...codenode import CodeNode, BlockNode, HookNode, SyscallNode
19
19
  from ...serializable import Serializable
@@ -80,6 +80,7 @@ class Function(Serializable):
80
80
  "is_alignment",
81
81
  "is_prototype_guessed",
82
82
  "ran_cca",
83
+ "_cyclomatic_complexity",
83
84
  )
84
85
 
85
86
  def __init__(
@@ -161,6 +162,9 @@ class Function(Serializable):
161
162
  self.info = {} # storing special information, like $gp values for MIPS32
162
163
  self.tags = () # store function tags. can be set manually by performing CodeTagging analysis.
163
164
 
165
+ # Initialize _cyclomatic_complexity to None
166
+ self._cyclomatic_complexity = None
167
+
164
168
  # TODO: Can we remove the following two members?
165
169
  # Register offsets of those arguments passed in registers
166
170
  self._argument_registers = []
@@ -302,6 +306,42 @@ class Function(Serializable):
302
306
  except (SimEngineError, SimMemoryError):
303
307
  pass
304
308
 
309
+ @property
310
+ def cyclomatic_complexity(self):
311
+ """
312
+ The cyclomatic complexity of the function.
313
+
314
+ Cyclomatic complexity is a software metric used to indicate the complexity of a program.
315
+ It is a quantitative measure of the number of linearly independent paths through a program's source code.
316
+ It is computed using the formula: M = E - N + 2P, where
317
+ E = the number of edges in the graph,
318
+ N = the number of nodes in the graph,
319
+ P = the number of connected components.
320
+
321
+ The cyclomatic complexity value is lazily computed and cached for future use.
322
+ Initially this value is None until it is computed for the first time
323
+
324
+ :return: The cyclomatic complexity of the function.
325
+ :rtype: int
326
+ """
327
+ if self._cyclomatic_complexity is None:
328
+ self._cyclomatic_complexity = (
329
+ self.transition_graph.number_of_edges() - self.transition_graph.number_of_nodes() + 2
330
+ )
331
+ return self._cyclomatic_complexity
332
+
333
+ @property
334
+ def xrefs(self):
335
+ """
336
+ An iterator of all xrefs of the current function.
337
+
338
+ :return: angr.knowledge_plugins.xrefs.xref.XRef instances.
339
+ """
340
+ for block in self.blocks:
341
+ yield from self._function_manager._kb.xrefs.get_xrefs_by_ins_addr_region(
342
+ block.addr, block.addr + block.size
343
+ )
344
+
305
345
  @property
306
346
  def block_addrs(self):
307
347
  """
@@ -413,49 +453,28 @@ class Function(Serializable):
413
453
  """
414
454
  return FunctionParser.parse_from_cmsg(cmsg, **kwargs)
415
455
 
416
- def string_references(self, minimum_length=2, vex_only=False):
456
+ def string_references(self, minimum_length=2):
417
457
  """
418
458
  All of the constant string references used by this function.
419
459
 
420
460
  :param minimum_length: The minimum length of strings to find (default is 1)
421
- :param vex_only: Only analyze VEX IR, don't interpret the entry state to detect additional constants.
422
- :return: A list of tuples of (address, string) where is address is the location of the string in
423
- memory.
461
+ :return: A generator yielding tuples of (address, string) where is address
462
+ is the location of the string in memory.
424
463
  """
425
- strings = []
426
- memory = self._project.loader.memory
427
464
 
428
- # get known instruction addresses and call targets
429
- # these addresses cannot be string references, but show up frequently in the runtime values
430
- known_executable_addresses = set()
431
- for block in self.blocks:
432
- known_executable_addresses.update(block.instruction_addrs)
433
- for function in self._function_manager.values():
434
- known_executable_addresses.update({x.addr for x in function.graph.nodes()})
435
-
436
- # loop over all local runtime values and check if the value points to a printable string
437
- for addr in self.local_runtime_values if not vex_only else self.code_constants:
438
- if not isinstance(addr, claripy.fp.FPV) and addr in memory:
439
- # check that the address isn't an pointing to known executable code
440
- # and that it isn't an indirect pointer to known executable code
441
- try:
442
- possible_pointer = memory.unpack_word(addr)
443
- if addr not in known_executable_addresses and possible_pointer not in known_executable_addresses:
444
- # build string
445
- stn = ""
446
- offset = 0
447
- current_char = chr(memory[addr + offset])
448
- while current_char in string.printable:
449
- stn += current_char
450
- offset += 1
451
- current_char = chr(memory[addr + offset])
452
-
453
- # check that the string was a null terminated string with minimum length
454
- if current_char == "\x00" and len(stn) >= minimum_length:
455
- strings.append((addr, stn))
456
- except KeyError:
457
- pass
458
- return strings
465
+ cfg = self._function_manager._kb.cfgs.get_most_accurate()
466
+
467
+ for x in self.xrefs:
468
+ try:
469
+ md = cfg.memory_data[x.dst]
470
+ except KeyError:
471
+ continue
472
+ if md.sort not in {MemoryDataSort.String, MemoryDataSort.UnicodeString}:
473
+ continue
474
+ if len(md.content) < minimum_length:
475
+ continue
476
+
477
+ yield (md.addr, md.content)
459
478
 
460
479
  @property
461
480
  def local_runtime_values(self):
@@ -574,6 +593,7 @@ class Function(Serializable):
574
593
  s += " Alignment: %s\n" % (self.alignment)
575
594
  s += f" Arguments: reg: {self._argument_registers}, stack: {self._argument_stack_variables}\n"
576
595
  s += " Blocks: [%s]\n" % ", ".join(["%#x" % i for i in self.block_addrs])
596
+ s += " Cyclomatic Complexity: %s\n" % self.cyclomatic_complexity
577
597
  s += " Calling convention: %s" % self.calling_convention
578
598
  return s
579
599
 
@@ -74,7 +74,7 @@ class DefinitionAnnotation(Annotation):
74
74
  and self.eliminatable == other.eliminatable
75
75
  )
76
76
  else:
77
- raise ValueError("DefinitionAnnotation can only check equality with other DefinitionAnnotation")
77
+ return False
78
78
 
79
79
  def __repr__(self):
80
80
  return f"<{self.__class__.__name__}({repr(self.definition)})"