angr 9.2.148__py3-none-manylinux2014_x86_64.whl → 9.2.150__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (61) hide show
  1. angr/__init__.py +1 -1
  2. angr/__main__.py +100 -37
  3. angr/analyses/calling_convention/calling_convention.py +42 -2
  4. angr/analyses/cfg/cfg_emulated.py +5 -2
  5. angr/analyses/cfg/cfg_fast.py +48 -46
  6. angr/analyses/decompiler/ail_simplifier.py +65 -32
  7. angr/analyses/decompiler/block_simplifier.py +20 -6
  8. angr/analyses/decompiler/clinic.py +80 -13
  9. angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
  10. angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
  11. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
  12. angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
  13. angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
  14. angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
  15. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +2 -1
  16. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
  17. angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
  18. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
  19. angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
  20. angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
  21. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  22. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
  23. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
  24. angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
  25. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
  26. angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
  27. angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
  28. angr/analyses/decompiler/presets/fast.py +2 -0
  29. angr/analyses/decompiler/presets/full.py +2 -0
  30. angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
  31. angr/analyses/decompiler/ssailification/ssailification.py +23 -3
  32. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
  33. angr/analyses/decompiler/structured_codegen/c.py +141 -10
  34. angr/analyses/decompiler/utils.py +23 -1
  35. angr/analyses/disassembly.py +2 -1
  36. angr/analyses/patchfinder.py +1 -1
  37. angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
  38. angr/analyses/typehoon/lifter.py +20 -0
  39. angr/analyses/typehoon/simple_solver.py +42 -9
  40. angr/analyses/typehoon/translator.py +4 -1
  41. angr/analyses/typehoon/typeconsts.py +17 -6
  42. angr/analyses/typehoon/typehoon.py +25 -6
  43. angr/analyses/variable_recovery/engine_ail.py +44 -5
  44. angr/analyses/variable_recovery/engine_base.py +35 -12
  45. angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
  46. angr/calling_conventions.py +23 -5
  47. angr/engines/light/engine.py +7 -0
  48. angr/engines/pcode/lifter.py +7 -0
  49. angr/knowledge_plugins/functions/function.py +68 -0
  50. angr/knowledge_plugins/propagations/states.py +5 -2
  51. angr/knowledge_plugins/variables/variable_manager.py +3 -3
  52. angr/procedures/definitions/__init__.py +1 -1
  53. angr/procedures/definitions/types_stl.py +22 -0
  54. angr/sim_type.py +251 -130
  55. angr/utils/graph.py +51 -27
  56. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/METADATA +7 -7
  57. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/RECORD +61 -55
  58. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/WHEEL +1 -1
  59. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/licenses/LICENSE +3 -0
  60. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/entry_points.txt +0 -0
  61. {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+ from ailment.expression import BinaryOp, Const, Convert
3
+
4
+ from .base import PeepholeOptimizationExprBase
5
+
6
+
7
+ class RewriteConvMul(PeepholeOptimizationExprBase):
8
+ """
9
+ Rewrites multiplication to be inside conversion.
10
+ """
11
+
12
+ __slots__ = ()
13
+
14
+ NAME = "Rewrite Conv Mul"
15
+ expr_classes = (BinaryOp,)
16
+
17
+ # Conv(64->32, (Conv(32->64, expr) * N<64>)) * N<32>)
18
+ # => Conv(64->32, (Conv(32->64, expr) * N<64>) * Conv(32->64,N<32>))
19
+ def optimize(self, expr: BinaryOp, **kwargs):
20
+ if (
21
+ expr.op == "Mul"
22
+ and isinstance(expr.operands[1], Const)
23
+ and expr.operands[1].bits == 32
24
+ and isinstance(expr.operands[0], Convert)
25
+ and expr.operands[0].from_bits > expr.operands[0].to_bits
26
+ ):
27
+ op0, op1 = expr.operands
28
+ operand_expr = op0.operand
29
+ if (
30
+ isinstance(operand_expr, BinaryOp)
31
+ and operand_expr.op == "Mul"
32
+ and isinstance(operand_expr.operands[1], Const)
33
+ and operand_expr.operands[1].bits == 64
34
+ ):
35
+ new_op1 = Convert(op1.idx, op1.bits, op0.from_bits, False, op1, **op1.tags)
36
+ new_op0 = op0.operand
37
+ new_expr = BinaryOp(expr.idx, "Mul", [new_op0, new_op1], expr.signed, **expr.tags)
38
+ return Convert(new_expr.idx, op0.from_bits, op0.to_bits, False, new_expr, **expr.tags)
39
+
40
+ return None
@@ -0,0 +1,90 @@
1
+ # pylint:disable=arguments-differ,too-many-boolean-expressions,no-self-use
2
+ from __future__ import annotations
3
+
4
+ from archinfo import Endness
5
+ from ailment.constant import UNDETERMINED_SIZE
6
+ from ailment.expression import Const, VirtualVariable, BinaryOp, UnaryOp, Load
7
+ from ailment.statement import Call, WeakAssignment
8
+
9
+ from angr.sim_type import SimTypeReference, SimCppClass
10
+ from angr.knowledge_plugins.key_definitions import atoms
11
+ from .base import PeepholeOptimizationStmtBase
12
+
13
+
14
+ class RewriteCxxOperatorCalls(PeepholeOptimizationStmtBase):
15
+ """
16
+ Rewrite C++ operator function calls into operations.
17
+ """
18
+
19
+ __slots__ = ()
20
+
21
+ NAME = "Rewrite C++ operator function calls into operations"
22
+ stmt_classes = (Call,)
23
+
24
+ def optimize(self, stmt: Call, block=None, **kwargs): # type: ignore
25
+ assert self.project is not None
26
+
27
+ # are we calling a function that we deem as an overridden operator function?
28
+ if isinstance(stmt.target, Const):
29
+ func_addr = stmt.target.value
30
+ if not self.project.kb.functions.contains_addr(func_addr):
31
+ return None
32
+ func = self.project.kb.functions[func_addr]
33
+ if "operator=" in func.demangled_name and stmt.args is not None:
34
+ return self._optimize_operator_equal(stmt)
35
+ if "operator+" in func.demangled_name and stmt.args is not None:
36
+ return self._optimize_operator_add(stmt)
37
+ # TODO: Support other types of C++ operator functions
38
+
39
+ return None
40
+
41
+ def _optimize_operator_equal(self, stmt: Call) -> WeakAssignment | None:
42
+ if stmt.args and len(stmt.args) == 2 and isinstance(stmt.args[0], UnaryOp) and stmt.args[0].op == "Reference":
43
+ dst = stmt.args[0].operand
44
+ if isinstance(dst, VirtualVariable):
45
+ self.preserve_vvar_ids.add(dst.varid)
46
+ atom = atoms.VirtualVariable(dst.varid, dst.size, dst.category, dst.oident)
47
+ if stmt.prototype is not None and isinstance(stmt.prototype.returnty, SimTypeReference):
48
+ type_hint = self._type_hint_from_typeref(stmt.prototype.returnty)
49
+ if type_hint is not None:
50
+ self.type_hints.append((atom, type_hint))
51
+ arg1 = (
52
+ Load(None, stmt.args[1], UNDETERMINED_SIZE, Endness.BE, **stmt.tags)
53
+ if isinstance(stmt.args[1], Const)
54
+ else stmt.args[1]
55
+ )
56
+ type_ = None
57
+ if stmt.prototype is not None:
58
+ dst_ty = stmt.prototype.returnty
59
+ if isinstance(dst_ty, SimTypeReference):
60
+ dst_ty = dst_ty.refs
61
+ type_ = {"dst": dst_ty, "src": stmt.prototype.args[1]}
62
+ return WeakAssignment(stmt.idx, stmt.args[0].operand, arg1, type=type_, **stmt.tags) # type:ignore
63
+ return None
64
+
65
+ def _optimize_operator_add(self, stmt: Call) -> WeakAssignment | None:
66
+ if (
67
+ stmt.args
68
+ and len(stmt.args) == 3
69
+ and isinstance(stmt.args[1], UnaryOp)
70
+ and stmt.args[1].op == "Reference"
71
+ and isinstance(stmt.args[1].operand, VirtualVariable)
72
+ and isinstance(stmt.args[2], Const)
73
+ and isinstance(stmt.ret_expr, VirtualVariable)
74
+ ):
75
+ arg2 = Load(None, stmt.args[2], UNDETERMINED_SIZE, Endness.BE, **stmt.tags)
76
+ addition = BinaryOp(None, "Add", [stmt.args[1].operand, arg2], **stmt.tags)
77
+ type_ = None
78
+ if stmt.prototype is not None:
79
+ dst_ty = stmt.prototype.returnty
80
+ if isinstance(dst_ty, SimTypeReference):
81
+ dst_ty = dst_ty.refs
82
+ type_ = {"dst": dst_ty, "src": stmt.prototype.args[1]}
83
+ return WeakAssignment(stmt.idx, stmt.ret_expr, addition, type=type_, **stmt.tags)
84
+ return None
85
+
86
+ @staticmethod
87
+ def _type_hint_from_typeref(typeref: SimTypeReference) -> str | None:
88
+ if isinstance(typeref.refs, SimCppClass) and typeref.refs.unique_name:
89
+ return typeref.refs.unique_name
90
+ return None
@@ -22,6 +22,7 @@ from angr.analyses.decompiler.optimization_passes import (
22
22
  DeadblockRemover,
23
23
  SwitchReusedEntryRewriter,
24
24
  ConditionConstantPropagation,
25
+ DetermineLoadSizes,
25
26
  )
26
27
 
27
28
 
@@ -49,6 +50,7 @@ preset_fast = DecompilationPreset(
49
50
  InlinedStringTransformationSimplifier,
50
51
  CallStatementRewriter,
51
52
  ConditionConstantPropagation,
53
+ DetermineLoadSizes,
52
54
  ],
53
55
  )
54
56
 
@@ -27,6 +27,7 @@ from angr.analyses.decompiler.optimization_passes import (
27
27
  CallStatementRewriter,
28
28
  SwitchReusedEntryRewriter,
29
29
  ConditionConstantPropagation,
30
+ DetermineLoadSizes,
30
31
  )
31
32
 
32
33
 
@@ -59,6 +60,7 @@ preset_full = DecompilationPreset(
59
60
  CallStatementRewriter,
60
61
  SwitchReusedEntryRewriter,
61
62
  ConditionConstantPropagation,
63
+ DetermineLoadSizes,
62
64
  ],
63
65
  )
64
66
 
@@ -6,7 +6,17 @@ import logging
6
6
  from archinfo import Endness
7
7
  from ailment.block import Block
8
8
  from ailment.manager import Manager
9
- from ailment.statement import Statement, Assignment, Store, Call, Return, ConditionalJump, DirtyStatement, Jump
9
+ from ailment.statement import (
10
+ Statement,
11
+ Assignment,
12
+ Store,
13
+ Call,
14
+ Return,
15
+ ConditionalJump,
16
+ DirtyStatement,
17
+ Jump,
18
+ WeakAssignment,
19
+ )
10
20
  from ailment.expression import (
11
21
  Expression,
12
22
  Register,
@@ -181,6 +191,19 @@ class SimEngineSSARewriting(
181
191
  return new_stmt
182
192
  return None
183
193
 
194
+ def _handle_stmt_WeakAssignment(self, stmt) -> WeakAssignment | None:
195
+ new_src = self._expr(stmt.src)
196
+ new_dst = self._expr(stmt.dst)
197
+
198
+ if new_dst is not None or new_src is not None:
199
+ return WeakAssignment(
200
+ stmt.idx,
201
+ stmt.dst if new_dst is None else new_dst, # type: ignore
202
+ stmt.src if new_src is None else new_src,
203
+ **stmt.tags,
204
+ )
205
+ return None
206
+
184
207
  def _handle_stmt_Store(self, stmt: Store) -> Store | Assignment | tuple[Assignment, ...] | None:
185
208
  new_data = self._expr(stmt.data)
186
209
  if stmt.guard is None:
@@ -505,7 +528,28 @@ class SimEngineSSARewriting(
505
528
  return None
506
529
 
507
530
  def _handle_expr_StackBaseOffset(self, expr):
508
- return None
531
+ if expr.offset not in self.state.stackvars:
532
+ # create it on the fly
533
+ vvar_id = self.get_vvid_by_def(
534
+ self.block.addr,
535
+ self.block.idx,
536
+ self.stmt_idx,
537
+ atoms.MemoryLocation(expr.offset, 1, self.project.arch.memory_endness),
538
+ "l",
539
+ )
540
+ vvar = VirtualVariable(
541
+ self.ail_manager.next_atom(),
542
+ vvar_id,
543
+ 1 * self.arch.byte_width,
544
+ category=VirtualVariableCategory.STACK,
545
+ oident=expr.offset,
546
+ **expr.tags,
547
+ )
548
+ self.state.stackvars[expr.offset][1] = vvar
549
+ else:
550
+ sz = 1 if 1 in self.state.stackvars[expr.offset] else max(self.state.stackvars[expr.offset])
551
+ vvar = self.state.stackvars[expr.offset][sz]
552
+ return UnaryOp(expr.idx, "Reference", vvar, bits=expr.bits, **expr.tags)
509
553
 
510
554
  def _handle_expr_VirtualVariable(self, expr):
511
555
  return None
@@ -807,7 +851,8 @@ class SimEngineSSARewriting(
807
851
  and expr.size in self.stackvar_locs[expr.addr.offset]
808
852
  ):
809
853
  if expr.size not in self.state.stackvars[expr.addr.offset]:
810
- # create it on the fly
854
+ # we have not seen its use before (which does not necessarily mean it's never created!), so we create
855
+ # it on the fly and record it in self.state.stackvars
811
856
  vvar_id = self.get_vvid_by_def(
812
857
  self.block.addr,
813
858
  self.block.idx,
@@ -815,7 +860,7 @@ class SimEngineSSARewriting(
815
860
  atoms.MemoryLocation(expr.addr.offset, expr.size, Endness(expr.endness)),
816
861
  "l",
817
862
  )
818
- return VirtualVariable(
863
+ var = VirtualVariable(
819
864
  self.ail_manager.next_atom(),
820
865
  vvar_id,
821
866
  expr.size * self.arch.byte_width,
@@ -823,6 +868,8 @@ class SimEngineSSARewriting(
823
868
  oident=expr.addr.offset,
824
869
  **expr.tags,
825
870
  )
871
+ self.state.stackvars[expr.addr.offset][expr.size] = var
872
+ return var
826
873
 
827
874
  # TODO: Support truncation
828
875
  # TODO: Maybe also support concatenation
@@ -134,7 +134,9 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
134
134
  if self._ssa_stackvars:
135
135
  # for stack variables, we collect all definitions and identify stack variable locations using heuristics
136
136
 
137
- stackvar_locs = self._synthesize_stackvar_locs([def_ for def_, _ in def_to_loc if isinstance(def_, Store)])
137
+ stackvar_locs = self._synthesize_stackvar_locs(
138
+ [def_ for def_, _ in def_to_loc if isinstance(def_, (Store, StackBaseOffset))]
139
+ )
138
140
  # handle function arguments
139
141
  if self._func_args:
140
142
  for func_arg in self._func_args:
@@ -173,6 +175,20 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
173
175
  if def_.size in stackvar_locs[off] and def_.size < full_sz:
174
176
  udef_to_defs[("stack", off, def_.size)].add(def_)
175
177
  udef_to_blockkeys[("stack", off, def_.size)].add((loc.block_addr, loc.block_idx))
178
+ elif isinstance(def_, StackBaseOffset):
179
+ sz = 1
180
+ idx_begin = bisect_left(sorted_stackvar_offs, def_.offset)
181
+ for i in range(idx_begin, len(sorted_stackvar_offs)):
182
+ off = sorted_stackvar_offs[i]
183
+ if off >= def_.offset + sz:
184
+ break
185
+ full_sz = max(stackvar_locs[off])
186
+ udef_to_defs[("stack", off, full_sz)].add(def_)
187
+ udef_to_blockkeys[("stack", off, full_sz)].add((loc.block_addr, loc.block_idx))
188
+ # add a definition for the partial stack variable
189
+ if sz in stackvar_locs[off] and sz < full_sz:
190
+ udef_to_defs[("stack", off, sz)].add(def_)
191
+ udef_to_blockkeys[("stack", off, sz)].add((loc.block_addr, loc.block_idx))
176
192
  elif isinstance(def_, Tmp):
177
193
  # Tmps are local to each block and do not need phi nodes
178
194
  pass
@@ -211,7 +227,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
211
227
  return last_frontier
212
228
 
213
229
  @staticmethod
214
- def _synthesize_stackvar_locs(defs: list[Store]) -> dict[int, set[int]]:
230
+ def _synthesize_stackvar_locs(defs: list[Store | StackBaseOffset]) -> dict[int, set[int]]:
215
231
  """
216
232
  Derive potential locations (in terms of offsets and sizes) for stack variables based on all stack variable
217
233
  definitions provided.
@@ -224,7 +240,11 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
224
240
  offs: set[int] = set()
225
241
 
226
242
  for def_ in defs:
227
- if isinstance(def_.addr, StackBaseOffset):
243
+ if isinstance(def_, StackBaseOffset):
244
+ stack_off = def_.offset
245
+ accesses[stack_off].add(1)
246
+ offs.add(stack_off)
247
+ elif isinstance(def_, Store) and isinstance(def_.addr, StackBaseOffset):
228
248
  stack_off = def_.addr.offset
229
249
  accesses[stack_off].add(def_.size)
230
250
  offs.add(stack_off)
@@ -60,6 +60,10 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
60
60
 
61
61
  self._expr(stmt.src)
62
62
 
63
+ def _handle_stmt_WeakAssignment(self, stmt):
64
+ self._expr(stmt.src)
65
+ self._expr(stmt.dst)
66
+
63
67
  def _handle_stmt_Store(self, stmt: Store):
64
68
  self._expr(stmt.addr)
65
69
  self._expr(stmt.data)
@@ -149,6 +153,17 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
149
153
  self.loc_to_defs[codeloc].add(expr)
150
154
  self.state.live_stackvars.add((expr.addr.offset, expr.size))
151
155
 
156
+ def _handle_expr_StackBaseOffset(self, expr: StackBaseOffset):
157
+ # we don't know the size, so we assume the size is 1 for now...
158
+ sz = 1
159
+ if isinstance(expr.offset, int) and (expr.offset, sz) not in self.state.live_stackvars:
160
+ codeloc = self._codeloc()
161
+ self.def_to_loc.append((expr, codeloc))
162
+ if codeloc not in self.loc_to_defs:
163
+ self.loc_to_defs[codeloc] = OrderedSet()
164
+ self.loc_to_defs[codeloc].add(expr)
165
+ self.state.live_stackvars.add((expr.offset, sz))
166
+
152
167
  def _handle_expr_Tmp(self, expr: Tmp):
153
168
  if self.use_tmps:
154
169
  codeloc = self._codeloc()
@@ -269,6 +284,5 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
269
284
  _handle_expr_Phi = _handle_Dummy
270
285
  _handle_expr_Const = _handle_Dummy
271
286
  _handle_expr_MultiStatementExpression = _handle_Dummy
272
- _handle_expr_StackBaseOffset = _handle_Dummy
273
287
  _handle_expr_BasePointerOffset = _handle_Dummy
274
288
  _handle_expr_Call = _handle_Dummy
@@ -5,8 +5,10 @@ from collections.abc import Callable
5
5
  from collections import defaultdict, Counter
6
6
  import logging
7
7
  import struct
8
+ import re
8
9
 
9
10
  from ailment import Block, Expr, Stmt, Tmp
11
+ from ailment.constant import UNDETERMINED_SIZE
10
12
  from ailment.expression import StackBaseOffset, BinaryOp
11
13
  from unique_log_filter import UniqueLogFilter
12
14
 
@@ -34,6 +36,7 @@ from angr.sim_type import (
34
36
  SimTypeInt128,
35
37
  SimTypeInt256,
36
38
  SimTypeInt512,
39
+ SimCppClass,
37
40
  )
38
41
  from angr.knowledge_plugins.functions import Function
39
42
  from angr.sim_variable import SimVariable, SimTemporaryVariable, SimStackVariable, SimMemoryVariable
@@ -156,6 +159,18 @@ def guess_value_type(value: int, project: angr.Project) -> SimType | None:
156
159
  return None
157
160
 
158
161
 
162
+ def type_equals(t0: SimType, t1: SimType) -> bool:
163
+ # special logic for C++ classes
164
+ if isinstance(t0, SimCppClass) and isinstance(t1, SimCppClass): # noqa: SIM102
165
+ # TODO: Use the information (class names, etc.) in types_stl
166
+ if {t1.name, t0.name} == {
167
+ "std::string",
168
+ "class std::basic_string<char, struct std::char_traits<char>, class std::allocator<char>>",
169
+ }:
170
+ return True
171
+ return t0 == t1
172
+
173
+
159
174
  def type_to_c_repr_chunks(ty: SimType, name=None, name_type=None, full=False, indent_str=""):
160
175
  """
161
176
  Helper generator function to turn a SimType into generated tuples of (C-string, AST node).
@@ -164,7 +179,10 @@ def type_to_c_repr_chunks(ty: SimType, name=None, name_type=None, full=False, in
164
179
  if full:
165
180
  # struct def preamble
166
181
  yield indent_str, None
167
- yield "typedef struct ", None
182
+ if isinstance(ty, SimCppClass):
183
+ yield "class ", None
184
+ else:
185
+ yield "typedef struct ", None
168
186
  yield ty.name, ty
169
187
  yield " {\n", None
170
188
 
@@ -1242,6 +1260,7 @@ class CFunctionCall(CStatement, CExpression):
1242
1260
  "callee_func",
1243
1261
  "callee_target",
1244
1262
  "is_expr",
1263
+ "prettify_thiscall",
1245
1264
  "ret_expr",
1246
1265
  "returning",
1247
1266
  "show_demangled_name",
@@ -1258,6 +1277,7 @@ class CFunctionCall(CStatement, CExpression):
1258
1277
  is_expr: bool = False,
1259
1278
  show_demangled_name=True,
1260
1279
  show_disambiguated_name: bool = True,
1280
+ prettify_thiscall: bool = True,
1261
1281
  tags=None,
1262
1282
  codegen=None,
1263
1283
  **kwargs,
@@ -1273,6 +1293,7 @@ class CFunctionCall(CStatement, CExpression):
1273
1293
  self.is_expr = is_expr
1274
1294
  self.show_demangled_name = show_demangled_name
1275
1295
  self.show_disambiguated_name = show_disambiguated_name
1296
+ self.prettify_thiscall = prettify_thiscall
1276
1297
 
1277
1298
  @property
1278
1299
  def prototype(self) -> SimTypeFunction | None: # TODO there should be a prototype for each callsite!
@@ -1313,6 +1334,13 @@ class CFunctionCall(CStatement, CExpression):
1313
1334
 
1314
1335
  return False
1315
1336
 
1337
+ @staticmethod
1338
+ def _is_func_likely_cxx_class_method(func_name: str) -> bool:
1339
+ if "::" not in func_name:
1340
+ return False
1341
+ chunks = func_name.split("::")
1342
+ return re.match(r"[a-zA-Z_][a-zA-Z0-9_]*", chunks[-1]) is not None
1343
+
1316
1344
  def c_repr_chunks(self, indent=0, asexpr: bool = False):
1317
1345
  """
1318
1346
 
@@ -1332,8 +1360,13 @@ class CFunctionCall(CStatement, CExpression):
1332
1360
  func_name = get_cpp_function_name(self.callee_func.demangled_name, specialized=False, qualified=True)
1333
1361
  else:
1334
1362
  func_name = self.callee_func.name
1363
+ if self.prettify_thiscall and self.args and self._is_func_likely_cxx_class_method(func_name):
1364
+ func_name = self.callee_func.short_name
1365
+ yield from self._c_repr_chunks_thiscall(func_name, asexpr=asexpr)
1366
+ return
1335
1367
  if self.show_disambiguated_name and self._is_target_ambiguous(func_name):
1336
1368
  func_name = self.callee_func.get_unambiguous_name(display_name=func_name)
1369
+
1337
1370
  yield func_name, self
1338
1371
  elif isinstance(self.callee_target, str):
1339
1372
  yield self.callee_target, self
@@ -1356,6 +1389,37 @@ class CFunctionCall(CStatement, CExpression):
1356
1389
  yield " /* do not return */", None
1357
1390
  yield "\n", None
1358
1391
 
1392
+ def _c_repr_chunks_thiscall(self, func_name: str, asexpr: bool = False):
1393
+ # The first argument is the `this` pointer
1394
+ assert self.args
1395
+ this_ref = self.args[0]
1396
+ if isinstance(this_ref, CUnaryOp) and this_ref.op == "Reference":
1397
+ yield from CExpression._try_c_repr_chunks(this_ref.operand)
1398
+ else:
1399
+ yield from CExpression._try_c_repr_chunks(this_ref)
1400
+
1401
+ yield ".", None
1402
+ yield func_name, self
1403
+
1404
+ # the remaining arguments
1405
+ paren = CClosingObject("(")
1406
+ yield "(", paren
1407
+
1408
+ for i, arg in enumerate(self.args):
1409
+ if i == 0:
1410
+ continue
1411
+ if i > 1:
1412
+ yield ", ", None
1413
+ yield from CExpression._try_c_repr_chunks(arg)
1414
+
1415
+ yield ")", paren
1416
+
1417
+ if not self.is_expr and not asexpr:
1418
+ yield ";", None
1419
+ if not self.returning:
1420
+ yield " /* do not return */", None
1421
+ yield "\n", None
1422
+
1359
1423
 
1360
1424
  class CReturn(CStatement):
1361
1425
  __slots__ = ("retval",)
@@ -1761,6 +1825,13 @@ class CBinaryOp(CExpression):
1761
1825
  # C spec https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2596.pdf 6.3.1.8 Usual arithmetic conversions
1762
1826
  rhs_ptr = isinstance(rhs_ty, SimTypePointer)
1763
1827
  lhs_ptr = isinstance(lhs_ty, SimTypePointer)
1828
+ rhs_cls = isinstance(unpack_typeref(rhs_ty), SimCppClass)
1829
+ lhs_cls = isinstance(unpack_typeref(lhs_ty), SimCppClass)
1830
+
1831
+ if lhs_cls:
1832
+ return lhs_ty
1833
+ if rhs_cls:
1834
+ return rhs_ty
1764
1835
 
1765
1836
  if op in ("Add", "Sub"):
1766
1837
  if lhs_ptr and rhs_ptr:
@@ -2462,6 +2533,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2462
2533
  # AIL statements
2463
2534
  Stmt.Store: self._handle_Stmt_Store,
2464
2535
  Stmt.Assignment: self._handle_Stmt_Assignment,
2536
+ Stmt.WeakAssignment: self._handle_Stmt_WeakAssignment,
2465
2537
  Stmt.Call: self._handle_Stmt_Call,
2466
2538
  Stmt.Jump: self._handle_Stmt_Jump,
2467
2539
  Stmt.ConditionalJump: self._handle_Stmt_ConditionalJump,
@@ -2798,17 +2870,17 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2798
2870
 
2799
2871
  if offset == 0:
2800
2872
  data_type = renegotiate_type(data_type, base_type)
2801
- if base_type == data_type or (
2873
+ if type_equals(base_type, data_type) or (
2802
2874
  base_type.size is not None and data_type.size is not None and base_type.size < data_type.size
2803
2875
  ):
2804
2876
  # case 1: we're done because we found it
2805
2877
  # case 2: we're done because we can never find it and we might as well stop early
2806
2878
  if base_expr:
2807
- if base_type != data_type:
2879
+ if not type_equals(base_type, data_type):
2808
2880
  return _force_type_cast(base_type, data_type, base_expr)
2809
2881
  return base_expr
2810
2882
 
2811
- if base_type != data_type:
2883
+ if not type_equals(base_type, data_type):
2812
2884
  return _force_type_cast(base_type, data_type, expr)
2813
2885
  return CUnaryOp("Dereference", expr, codegen=self)
2814
2886
 
@@ -3265,13 +3337,58 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3265
3337
  csrc = self._handle(stmt.src, lvalue=False)
3266
3338
  cdst = None
3267
3339
 
3340
+ src_type = csrc.type
3341
+ dst_type = src_type
3342
+ if hasattr(stmt, "type"):
3343
+ src_type = stmt.type.get("src", None)
3344
+ dst_type = stmt.type.get("dst", None)
3345
+
3346
+ if isinstance(stmt.dst, Expr.VirtualVariable) and stmt.dst.was_stack:
3347
+
3348
+ def negotiate(old_ty, proposed_ty):
3349
+ # transfer casts from the dst to the src if possible
3350
+ # if we see something like *(size_t*)&v4 = x; where v4 is a pointer, change to v4 = (void*)x;
3351
+ nonlocal csrc
3352
+ if not type_equals(old_ty, proposed_ty) and qualifies_for_simple_cast(old_ty, proposed_ty):
3353
+ csrc = CTypeCast(csrc.type, proposed_ty, csrc, codegen=self)
3354
+ return proposed_ty
3355
+ return old_ty
3356
+
3357
+ if stmt.dst.variable is not None:
3358
+ if "struct_member_info" in stmt.dst.tags:
3359
+ offset, var, _ = stmt.dst.struct_member_info
3360
+ cvar = self._variable(var, stmt.dst.size, vvar_id=stmt.dst.varid)
3361
+ else:
3362
+ cvar = self._variable(stmt.dst.variable, stmt.dst.size, vvar_id=stmt.dst.varid)
3363
+ offset = stmt.dst.variable_offset or 0
3364
+ assert type(offset) is int # I refuse to deal with the alternative
3365
+
3366
+ cdst = self._access_constant_offset(
3367
+ self._get_variable_reference(cvar), offset, dst_type, True, negotiate
3368
+ )
3369
+
3370
+ if cdst is None:
3371
+ cdst = self._handle(stmt.dst, lvalue=True)
3372
+
3373
+ return CAssignment(cdst, csrc, tags=stmt.tags, codegen=self)
3374
+
3375
+ def _handle_Stmt_WeakAssignment(self, stmt, **kwargs):
3376
+ csrc = self._handle(stmt.src, lvalue=False)
3377
+ cdst = None
3378
+
3379
+ src_type = csrc.type
3380
+ dst_type = src_type
3381
+ if hasattr(stmt, "type"):
3382
+ src_type = stmt.type.get("src", None)
3383
+ dst_type = stmt.type.get("dst", None)
3384
+
3268
3385
  if isinstance(stmt.dst, Expr.VirtualVariable) and stmt.dst.was_stack:
3269
3386
 
3270
3387
  def negotiate(old_ty, proposed_ty):
3271
3388
  # transfer casts from the dst to the src if possible
3272
3389
  # if we see something like *(size_t*)&v4 = x; where v4 is a pointer, change to v4 = (void*)x;
3273
3390
  nonlocal csrc
3274
- if old_ty != proposed_ty and qualifies_for_simple_cast(old_ty, proposed_ty):
3391
+ if not type_equals(old_ty, proposed_ty) and qualifies_for_simple_cast(old_ty, proposed_ty):
3275
3392
  csrc = CTypeCast(csrc.type, proposed_ty, csrc, codegen=self)
3276
3393
  return proposed_ty
3277
3394
  return old_ty
@@ -3286,7 +3403,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3286
3403
  assert type(offset) is int # I refuse to deal with the alternative
3287
3404
 
3288
3405
  cdst = self._access_constant_offset(
3289
- self._get_variable_reference(cvar), offset, csrc.type, True, negotiate
3406
+ self._get_variable_reference(cvar), offset, dst_type, True, negotiate
3290
3407
  )
3291
3408
 
3292
3409
  if cdst is None:
@@ -3413,7 +3530,18 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3413
3530
  return CRegister(expr, tags=expr.tags, codegen=self)
3414
3531
 
3415
3532
  def _handle_Expr_Load(self, expr: Expr.Load, **kwargs):
3416
- ty = self.default_simtype_from_bits(expr.bits)
3533
+ if expr.size == UNDETERMINED_SIZE:
3534
+ # the size is undetermined; we force it to 1
3535
+ expr_size = 1
3536
+ expr_bits = 8
3537
+ else:
3538
+ expr_size = expr.size
3539
+ expr_bits = expr.bits
3540
+
3541
+ if expr.size > 100 and isinstance(expr.addr, Expr.Const):
3542
+ return self._handle_Expr_Const(expr.addr, type_=SimTypePointer(SimTypeChar()).with_arch(self.project.arch))
3543
+
3544
+ ty = self.default_simtype_from_bits(expr_bits)
3417
3545
 
3418
3546
  def negotiate(old_ty: SimType, proposed_ty: SimType) -> SimType:
3419
3547
  # we do not allow returning a struct for a primitive type
@@ -3430,7 +3558,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3430
3558
  offset, var, _ = expr.struct_member_info
3431
3559
  cvar = self._variable(var, var.size)
3432
3560
  else:
3433
- cvar = self._variable(expr.variable, expr.size)
3561
+ cvar = self._variable(expr.variable, expr_size)
3434
3562
  offset = expr.variable_offset or 0
3435
3563
 
3436
3564
  assert type(offset) is int # I refuse to deal with the alternative
@@ -3449,7 +3577,10 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3449
3577
  inline_string = False
3450
3578
  function_pointer = False
3451
3579
 
3452
- if reference_values is None and hasattr(expr, "reference_values"):
3580
+ if type_ is None and hasattr(expr, "type"):
3581
+ type_ = expr.type
3582
+
3583
+ if type_ is None and reference_values is None and hasattr(expr, "reference_values"):
3453
3584
  reference_values = expr.reference_values.copy()
3454
3585
  if reference_values:
3455
3586
  type_ = next(iter(reference_values))
@@ -3665,7 +3796,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3665
3796
  if expr.variable is not None:
3666
3797
  if "struct_member_info" in expr.tags:
3667
3798
  offset, var, _ = expr.struct_member_info
3668
- cbasevar = self._variable(var, expr.size)
3799
+ cbasevar = self._variable(var, expr.size, vvar_id=expr.varid)
3669
3800
  cvar = self._access_constant_offset(
3670
3801
  self._get_variable_reference(cbasevar), offset, cbasevar.type, False, negotiate
3671
3802
  )