angr 9.2.178__cp310-abi3-manylinux_2_28_x86_64.whl → 9.2.180__cp310-abi3-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.178"
5
+ __version__ = "9.2.180"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
@@ -55,7 +55,13 @@ class VEXExprConverter(Converter):
55
55
  except UnsupportedIROpError:
56
56
  log.warning("VEXExprConverter: Unsupported IROp %s.", expr.op)
57
57
  return DirtyExpression(
58
- manager.next_atom(), f"unsupported_{expr.op}", [], bits=expr.result_size(manager.tyenv)
58
+ manager.next_atom(),
59
+ f"unsupported_{expr.op}",
60
+ [],
61
+ bits=expr.result_size(manager.tyenv),
62
+ ins_addr=manager.ins_addr,
63
+ vex_block_addr=manager.block_addr,
64
+ vex_stmt_idx=manager.vex_stmt_idx,
59
65
  )
60
66
 
61
67
  log.warning("VEXExprConverter: Unsupported VEX expression of type %s.", type(expr))
@@ -64,7 +70,15 @@ class VEXExprConverter(Converter):
64
70
  except ValueError:
65
71
  # e.g., "ValueError: Type Ity_INVALID does not have size"
66
72
  bits = 0
67
- return DirtyExpression(manager.next_atom(), f"unsupported_{type(expr)!s}", [], bits=bits)
73
+ return DirtyExpression(
74
+ manager.next_atom(),
75
+ f"unsupported_{type(expr)!s}",
76
+ [],
77
+ bits=bits,
78
+ ins_addr=manager.ins_addr,
79
+ vex_block_addr=manager.block_addr,
80
+ vex_stmt_idx=manager.vex_stmt_idx,
81
+ )
68
82
 
69
83
  @staticmethod
70
84
  def convert_list(exprs, manager):
@@ -479,8 +493,22 @@ class VEXStmtConverter(Converter):
479
493
  try:
480
494
  func = STATEMENT_MAPPINGS[type(stmt)]
481
495
  except KeyError:
482
- dirty = DirtyExpression(manager.next_atom(), str(stmt), [], bits=0)
483
- return DirtyStatement(idx, dirty, ins_addr=manager.ins_addr)
496
+ dirty = DirtyExpression(
497
+ manager.next_atom(),
498
+ str(stmt),
499
+ [],
500
+ bits=0,
501
+ ins_addr=manager.ins_addr,
502
+ vex_block_addr=manager.block_addr,
503
+ vex_stmt_idx=manager.vex_stmt_idx,
504
+ )
505
+ return DirtyStatement(
506
+ idx,
507
+ dirty,
508
+ ins_addr=manager.ins_addr,
509
+ vex_block_addr=manager.block_addr,
510
+ vex_stmt_idx=manager.vex_stmt_idx,
511
+ )
484
512
 
485
513
  return func(idx, stmt, manager)
486
514
 
@@ -631,6 +659,9 @@ class VEXStmtConverter(Converter):
631
659
  maddr=maddr,
632
660
  msize=stmt.mSize,
633
661
  bits=bits,
662
+ ins_addr=manager.ins_addr,
663
+ vex_block_addr=manager.block_addr,
664
+ vex_stmt_idx=manager.vex_stmt_idx,
634
665
  )
635
666
 
636
667
  if stmt.tmp == 0xFFFFFFFF:
angr/analyses/cfg/cfb.py CHANGED
@@ -6,6 +6,7 @@ from collections.abc import Callable
6
6
  import cle
7
7
  from cle.backends.externs import KernelObject, ExternObject
8
8
  from cle.backends.tls.elf_tls import ELFTLSObject
9
+ from cle.backends.ihex import Hex
9
10
  from sortedcontainers import SortedDict
10
11
 
11
12
  from angr.analyses import AnalysesHub
@@ -163,6 +164,16 @@ class CFBlanket(Analysis):
163
164
  size = obj.max_addr - obj.min_addr
164
165
  mr = MemoryRegion(obj.min_addr, size, "tls", obj, None)
165
166
  self._regions.append(mr)
167
+ elif isinstance(obj, Hex):
168
+ if obj.segments:
169
+ for segment in obj.segments:
170
+ mr = MemoryRegion(segment.vaddr, segment.memsize, "segment", obj, segment)
171
+ self._regions.append(mr)
172
+ else:
173
+ base_addr = obj.min_addr # but it's always 0
174
+ size = obj.max_addr - base_addr
175
+ mr = MemoryRegion(base_addr, size, "segment", obj, None)
176
+ self._regions.append(mr)
166
177
  else:
167
178
  size = obj.size if hasattr(obj, "size") else obj.max_addr - obj.min_addr
168
179
  type_ = "TODO"
@@ -1248,7 +1248,7 @@ class AILSimplifier(Analysis):
1248
1248
  if to_replace_used_in_refs:
1249
1249
  continue
1250
1250
 
1251
- if any(not isinstance(expr_and_use[0], VirtualVariable) for _, expr_and_use in all_uses_with_def):
1251
+ if any(isinstance(expr_and_use[0], VirtualVariable) for _, expr_and_use in all_uses_with_def):
1252
1252
  # if any of the uses are phi assignments, we skip
1253
1253
  used_in_phi_assignment = False
1254
1254
  for _, expr_and_use in all_uses_with_def:
@@ -2545,6 +2545,11 @@ class Clinic(Analysis):
2545
2545
  intended_head = preds[0]
2546
2546
  other_heads = preds[1:]
2547
2547
 
2548
+ # I've seen cases where there is one more block between the actual intended head and the candidate.
2549
+ # binary 7995a0325b446c462bdb6ae10b692eee2ecadd8e888e9d7729befe4412007afb, block 0x140032760
2550
+ while ail_graph.out_degree[intended_head] == 1 and ail_graph.in_degree[intended_head] == 1:
2551
+ intended_head = next(iter(ail_graph.predecessors(intended_head)))
2552
+
2548
2553
  # now here is the tricky part. there are two cases:
2549
2554
  # Case 1: the intended head and the other heads share the same suffix (of instructions)
2550
2555
  # Example:
@@ -2649,8 +2654,6 @@ class Clinic(Analysis):
2649
2654
  def _get_overlapping_suffix_instructions_compare_conditional_jumps(
2650
2655
  ailblock_0: ailment.Block, ailblock_1: ailment.Block
2651
2656
  ) -> bool:
2652
- # TODO: The logic here is naive and highly customized to the only example I can access. Expand this method
2653
- # later to handle more cases if needed.
2654
2657
  if len(ailblock_0.statements) == 0 or len(ailblock_1.statements) == 0:
2655
2658
  return False
2656
2659
 
@@ -2663,7 +2666,10 @@ class Clinic(Analysis):
2663
2666
 
2664
2667
  last_stmt_0 = ailblock_0.statements[-1]
2665
2668
  last_stmt_1 = ailblock_1.statements[-1]
2666
- if not (isinstance(last_stmt_0, ailment.Stmt.ConditionalJump) and last_stmt_0.likes(last_stmt_1)):
2669
+ if not (
2670
+ isinstance(last_stmt_0, ailment.Stmt.ConditionalJump)
2671
+ and isinstance(last_stmt_1, ailment.Stmt.ConditionalJump)
2672
+ ):
2667
2673
  return False
2668
2674
 
2669
2675
  last_cmp_stmt_0 = next(
@@ -133,7 +133,7 @@ class CASIntrinsics(PeepholeOptimizationMultiStmtBase):
133
133
  ins_addr=cas_stmt.ins_addr,
134
134
  )
135
135
 
136
- assignment_dst = cas_stmt.expd_lo
136
+ assignment_dst = cas_stmt.old_lo
137
137
  stmt = Assignment(cas_stmt.idx, assignment_dst, call_expr, **cas_stmt.tags) # type:ignore
138
138
  return [stmt]
139
139
 
@@ -155,7 +155,7 @@ class CASIntrinsics(PeepholeOptimizationMultiStmtBase):
155
155
  bits=cas_stmt.bits,
156
156
  ins_addr=cas_stmt.ins_addr,
157
157
  )
158
- assignment_dst = cas_stmt.expd_lo
158
+ assignment_dst = cas_stmt.old_lo
159
159
  stmt = Assignment(cas_stmt.idx, assignment_dst, call_expr, **cas_stmt.tags) # type:ignore
160
160
  return [stmt, next_stmt]
161
161
 
@@ -271,12 +271,19 @@ class ExpressionCounter(SequenceWalker):
271
271
  # the current assignment depends on, StatementLocation of the assignment statement, a Boolean variable that
272
272
  # indicates if ExpressionUseFinder has succeeded or not)
273
273
  self.assignments: defaultdict[Any, set[tuple]] = defaultdict(set)
274
- self.uses: dict[int, set[tuple[Expression, LocationBase | None]]] = {}
274
+ self.outerscope_uses: dict[int, set[tuple[Expression, LocationBase | None]]] = {}
275
+ self.all_uses: dict[int, set[tuple[Expression, LocationBase | None]]] = {}
276
+ # inner_scope indicates if we are currently within one of the inner scopes (e.g., a loop). we only collect
277
+ # assignments in the outermost level and stop collecting assignments when we enter inner scopes.
278
+ # we always collect uses, but uses in the outmost scope will be recorded in self.outerscope_uses
279
+ self._outer_scope: bool = True
275
280
 
276
281
  super().__init__(handlers)
277
282
  self.walk(node)
278
283
 
279
284
  def _handle_Statement(self, idx: int, stmt: Statement, node: ailment.Block | LoopNode):
285
+ if not self._outer_scope:
286
+ return
280
287
  if isinstance(stmt, ailment.Stmt.Assignment):
281
288
  if is_phi_assignment(stmt):
282
289
  return
@@ -312,32 +319,40 @@ class ExpressionCounter(SequenceWalker):
312
319
 
313
320
  def _handle_Block(self, node: ailment.Block, **kwargs):
314
321
  # find assignments and uses of variables
315
- use_finder = ExpressionUseFinder()
316
- for idx, stmt in enumerate(node.statements):
317
- self._handle_Statement(idx, stmt, node)
318
- use_finder.walk_statement(stmt, block=node)
319
-
320
- for varid, content in use_finder.uses.items():
321
- if varid not in self.uses:
322
- self.uses[varid] = set()
323
- self.uses[varid] |= content
322
+ self._collect_uses(node, None)
324
323
 
325
324
  def _collect_assignments(self, expr: Expression, node) -> None:
325
+ if not self._outer_scope:
326
+ return
326
327
  finder = MultiStatementExpressionAssignmentFinder(self._handle_Statement)
327
328
  finder.walk_expression(expr, None, None, node)
328
329
 
329
- def _collect_uses(self, expr: Expression | Statement, loc: LocationBase):
330
+ def _collect_uses(self, thing: Expression | Statement | ailment.Block, loc: LocationBase | None):
330
331
  use_finder = ExpressionUseFinder()
331
- if isinstance(expr, Statement):
332
- use_finder.walk_statement(expr)
332
+ if isinstance(thing, ailment.Block):
333
+ for idx, stmt in enumerate(thing.statements):
334
+ self._handle_Statement(idx, stmt, thing)
335
+ use_finder.walk_statement(stmt, block=thing)
336
+ elif isinstance(thing, Statement):
337
+ use_finder.walk_statement(thing)
333
338
  else:
334
- use_finder.walk_expression(expr, stmt_idx=-1)
339
+ use_finder.walk_expression(thing, stmt_idx=-1)
335
340
 
336
341
  for varid, uses in use_finder.uses.items():
337
342
  for use in uses:
338
- if varid not in self.uses:
339
- self.uses[varid] = set()
340
- self.uses[varid].add((use[0], loc))
343
+ # overwrite the location if loc is specified
344
+ content = (use[0], loc) if loc is not None else use
345
+
346
+ # update all_uses
347
+ if varid not in self.all_uses:
348
+ self.all_uses[varid] = set()
349
+ self.all_uses[varid].add(content)
350
+
351
+ # update outerscope_uses if we are in the outer scope
352
+ if self._outer_scope:
353
+ if varid not in self.outerscope_uses:
354
+ self.outerscope_uses[varid] = set()
355
+ self.outerscope_uses[varid].add(content)
341
356
 
342
357
  def _handle_ConditionalBreak(self, node: ConditionalBreakNode, **kwargs):
343
358
  # collect uses on the condition expression
@@ -366,7 +381,12 @@ class ExpressionCounter(SequenceWalker):
366
381
  if node.condition is not None:
367
382
  self._collect_assignments(node.condition, node)
368
383
  self._collect_uses(node.condition, ConditionLocation(node.addr))
369
- # we do not go ahead and collect into the loop body
384
+
385
+ outer_scope = self._outer_scope
386
+ self._outer_scope = False
387
+ super()._handle_Loop(node, **kwargs)
388
+ self._outer_scope = outer_scope
389
+
370
390
  return None
371
391
 
372
392
  def _handle_SwitchCase(self, node: SwitchCaseNode, **kwargs):
@@ -125,10 +125,16 @@ class RegionSimplifier(Analysis):
125
125
  # before the definition site and the use site.
126
126
  var_with_loads = {}
127
127
  single_use_variables = []
128
- for var, uses in expr_counter.uses.items():
129
- if len(uses) == 1 and var in expr_counter.assignments and len(expr_counter.assignments[var]) == 1:
128
+ for var, outerscope_uses in expr_counter.outerscope_uses.items():
129
+ all_uses = expr_counter.all_uses[var]
130
+ if (
131
+ len(outerscope_uses) == 1
132
+ and len(all_uses) == 1
133
+ and var in expr_counter.assignments
134
+ and len(expr_counter.assignments[var]) == 1
135
+ ):
130
136
  definition, deps, loc, has_loads = next(iter(expr_counter.assignments[var]))
131
- _, use_expr_loc = next(iter(uses))
137
+ _, use_expr_loc = next(iter(outerscope_uses))
132
138
  if isinstance(use_expr_loc, ExpressionLocation) and use_expr_loc.phi_stmt:
133
139
  # we cannot fold expressions that are used in phi statements
134
140
  continue
@@ -169,7 +175,7 @@ class RegionSimplifier(Analysis):
169
175
  definition.ret_expr = definition.ret_expr.copy()
170
176
  definition.ret_expr.variable = None
171
177
  variable_assignments[var] = definition, loc
172
- variable_uses[var] = next(iter(expr_counter.uses[var]))
178
+ variable_uses[var] = next(iter(expr_counter.outerscope_uses[var]))
173
179
  variable_assignment_dependencies[var] = deps
174
180
 
175
181
  # any variable definition that uses an existing to-be-removed variable cannot be folded
@@ -188,7 +188,7 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
188
188
  def _reg_predicate(self, node_: Block, *, reg_offset: int, reg_size: int) -> tuple[bool, Any]:
189
189
  out_state: RewritingState = (
190
190
  self.head_controlled_loop_outstates[(node_.addr, node_.idx)]
191
- if is_head_controlled_loop_block(node_)
191
+ if is_head_controlled_loop_block(node_) and (node_.addr, node_.idx) in self.head_controlled_loop_outstates
192
192
  else self.out_states[(node_.addr, node_.idx)]
193
193
  )
194
194
  if reg_offset in out_state.registers and reg_size in out_state.registers[reg_offset]:
@@ -561,7 +561,10 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
561
561
  continue
562
562
  varname = v.c_repr() if v.type is None else v.variable.name
563
563
  yield "extern ", None
564
- yield from type_to_c_repr_chunks(v.type, name=varname, name_type=v, full=False)
564
+ if v.type is None:
565
+ yield "<unknown-type>", None
566
+ else:
567
+ yield from type_to_c_repr_chunks(v.type, name=varname, name_type=v, full=False)
565
568
  yield ";\n", None
566
569
  yield "\n", None
567
570
 
@@ -1327,9 +1330,10 @@ class CFunctionCall(CStatement, CExpression):
1327
1330
  return True
1328
1331
 
1329
1332
  # FIXME: Handle name mangle
1330
- for func in self.codegen.kb.functions.get_by_name(callee.name):
1331
- if func is not callee and (caller.binary is not callee.binary or func.binary is callee.binary):
1332
- return True
1333
+ if callee is not None:
1334
+ for func in self.codegen.kb.functions.get_by_name(callee.name):
1335
+ if func is not callee and (caller.binary is not callee.binary or func.binary is callee.binary):
1336
+ return True
1333
1337
 
1334
1338
  return False
1335
1339
 
@@ -3194,7 +3198,9 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3194
3198
  # Handlers
3195
3199
  #
3196
3200
 
3197
- def _handle(self, node, is_expr: bool = True, lvalue: bool = False, likely_signed=False):
3201
+ def _handle(
3202
+ self, node, is_expr: bool = True, lvalue: bool = False, likely_signed=False, type_: SimType | None = None
3203
+ ):
3198
3204
  if (node, is_expr) in self.ailexpr2cnode:
3199
3205
  return self.ailexpr2cnode[(node, is_expr)]
3200
3206
 
@@ -3204,7 +3210,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3204
3210
  converted = (
3205
3211
  handler(node, is_expr=is_expr)
3206
3212
  if isinstance(node, Stmt.Call)
3207
- else handler(node, lvalue=lvalue, likely_signed=likely_signed)
3213
+ else handler(node, lvalue=lvalue, likely_signed=likely_signed, type_=type_)
3208
3214
  )
3209
3215
  self.ailexpr2cnode[(node, is_expr)] = converted
3210
3216
  return converted
@@ -3483,6 +3489,8 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3483
3489
  and i < len(target_func.prototype.args)
3484
3490
  ):
3485
3491
  type_ = target_func.prototype.args[i].with_arch(self.project.arch)
3492
+ if target_func.prototype_libname is not None:
3493
+ type_ = dereference_simtype_by_lib(type_, target_func.prototype_libname)
3486
3494
 
3487
3495
  if isinstance(arg, Expr.Const):
3488
3496
  if type_ is None or is_machine_word_size_type(type_, self.project.arch):
@@ -3490,7 +3498,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3490
3498
 
3491
3499
  new_arg = self._handle_Expr_Const(arg, type_=type_)
3492
3500
  else:
3493
- new_arg = self._handle(arg)
3501
+ new_arg = self._handle(arg, type_=type_)
3494
3502
  args.append(new_arg)
3495
3503
 
3496
3504
  ret_expr = None
@@ -3737,10 +3745,19 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3737
3745
  reference_values["offset"] = var_access
3738
3746
  return CConstant(expr.value, type_, reference_values=reference_values, tags=expr.tags, codegen=self)
3739
3747
 
3740
- def _handle_Expr_UnaryOp(self, expr, **kwargs):
3748
+ def _handle_Expr_UnaryOp(self, expr, type_: SimType | None = None, **kwargs):
3749
+ data_type = None
3750
+ if expr.op == "Reference" and isinstance(type_, SimTypePointer) and not isinstance(type_.pts_to, SimTypeBottom):
3751
+ data_type = type_.pts_to
3752
+
3753
+ operand = self._handle(expr.operand, lvalue=expr.op == "Reference", type_=data_type)
3754
+
3755
+ if expr.op == "Reference" and isinstance(operand, CUnaryOp) and operand.op == "Dereference":
3756
+ # cancel out
3757
+ return operand.operand
3741
3758
  return CUnaryOp(
3742
3759
  expr.op,
3743
- self._handle(expr.operand),
3760
+ operand,
3744
3761
  tags=expr.tags,
3745
3762
  codegen=self,
3746
3763
  )
@@ -3847,7 +3864,9 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3847
3864
  cexpr = self._handle(expr.expr)
3848
3865
  return CMultiStatementExpression(cstmts, cexpr, tags=expr.tags, codegen=self)
3849
3866
 
3850
- def _handle_VirtualVariable(self, expr: Expr.VirtualVariable, **kwargs):
3867
+ def _handle_VirtualVariable(
3868
+ self, expr: Expr.VirtualVariable, lvalue: bool = False, type_: SimType | None = None, **kwargs
3869
+ ):
3851
3870
  def negotiate(old_ty: SimType, proposed_ty: SimType) -> SimType:
3852
3871
  # we do not allow returning a struct for a primitive type
3853
3872
  if old_ty.size == proposed_ty.size and (
@@ -3860,13 +3879,29 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3860
3879
  if "struct_member_info" in expr.tags:
3861
3880
  offset, var, _ = expr.struct_member_info
3862
3881
  cbasevar = self._variable(var, expr.size, vvar_id=expr.varid)
3882
+ data_type = type_
3883
+ if data_type is None:
3884
+ # try to determine the type of this variable read
3885
+ data_type = cbasevar.type
3886
+ if data_type.size // self.project.arch.byte_width > expr.size:
3887
+ # fallback to a more suitable type
3888
+ data_type = (
3889
+ {
3890
+ 64: SimTypeLongLong(signed=False),
3891
+ 32: SimTypeInt(signed=False),
3892
+ 16: SimTypeShort(signed=False),
3893
+ 8: SimTypeChar(signed=False),
3894
+ }
3895
+ .get(expr.bits, data_type)
3896
+ .with_arch(self.project.arch)
3897
+ )
3863
3898
  cvar = self._access_constant_offset(
3864
- self._get_variable_reference(cbasevar), offset, cbasevar.type, False, negotiate
3899
+ self._get_variable_reference(cbasevar), offset, data_type, lvalue, negotiate
3865
3900
  )
3866
3901
  else:
3867
3902
  cvar = self._variable(expr.variable, None, vvar_id=expr.varid)
3868
3903
 
3869
- if expr.variable.size != expr.size:
3904
+ if not lvalue and expr.variable.size != expr.size:
3870
3905
  l.warning(
3871
3906
  "VirtualVariable size (%d) and variable size (%d) do not match. Force a type cast.",
3872
3907
  expr.size,
@@ -4097,6 +4132,13 @@ class PointerArithmeticFixer(CStructuredCodeWalker):
4097
4132
  a_ptr = a_ptr + 1.
4098
4133
  """
4099
4134
 
4135
+ def handle_CAssignment(self, obj: CAssignment):
4136
+ if "type" in obj.tags and "dst" in obj.tags["type"] and "src" in obj.tags["type"]:
4137
+ # HACK: do not attempt to fix pointer arithmetic if dst and src types are explicitly given
4138
+ # FIXME: Properly propagate dst and src types to lhs and rhs
4139
+ return obj
4140
+ return super().handle_CAssignment(obj)
4141
+
4100
4142
  def handle_CBinaryOp(self, obj: CBinaryOp): # type: ignore
4101
4143
  obj: CBinaryOp = super().handle_CBinaryOp(obj)
4102
4144
  if (