angr 9.2.148__py3-none-win_amd64.whl → 9.2.149__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (56) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +42 -2
  3. angr/analyses/cfg/cfg_emulated.py +5 -2
  4. angr/analyses/cfg/cfg_fast.py +48 -46
  5. angr/analyses/decompiler/ail_simplifier.py +65 -32
  6. angr/analyses/decompiler/block_simplifier.py +20 -6
  7. angr/analyses/decompiler/clinic.py +80 -13
  8. angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
  9. angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
  10. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
  11. angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
  12. angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
  13. angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
  14. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
  15. angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
  16. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
  17. angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
  18. angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
  19. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  20. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
  21. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
  22. angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
  23. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
  24. angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
  25. angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
  26. angr/analyses/decompiler/presets/fast.py +2 -0
  27. angr/analyses/decompiler/presets/full.py +2 -0
  28. angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
  29. angr/analyses/decompiler/ssailification/ssailification.py +23 -3
  30. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
  31. angr/analyses/decompiler/structured_codegen/c.py +141 -10
  32. angr/analyses/decompiler/utils.py +6 -1
  33. angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
  34. angr/analyses/typehoon/lifter.py +20 -0
  35. angr/analyses/typehoon/simple_solver.py +42 -9
  36. angr/analyses/typehoon/translator.py +4 -1
  37. angr/analyses/typehoon/typeconsts.py +17 -6
  38. angr/analyses/typehoon/typehoon.py +21 -5
  39. angr/analyses/variable_recovery/engine_ail.py +44 -5
  40. angr/analyses/variable_recovery/engine_base.py +35 -12
  41. angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
  42. angr/calling_conventions.py +23 -5
  43. angr/engines/light/engine.py +7 -0
  44. angr/knowledge_plugins/functions/function.py +68 -0
  45. angr/knowledge_plugins/propagations/states.py +5 -2
  46. angr/knowledge_plugins/variables/variable_manager.py +3 -3
  47. angr/lib/angr_native.dll +0 -0
  48. angr/procedures/definitions/__init__.py +1 -1
  49. angr/procedures/definitions/types_stl.py +22 -0
  50. angr/sim_type.py +251 -130
  51. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/METADATA +7 -7
  52. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/RECORD +56 -50
  53. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/WHEEL +1 -1
  54. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/licenses/LICENSE +3 -0
  55. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/entry_points.txt +0 -0
  56. {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ from angr.errors import AngrDecompilationError
18
18
  from angr.knowledge_base import KnowledgeBase
19
19
  from angr.knowledge_plugins.functions import Function
20
20
  from angr.knowledge_plugins.cfg.memory_data import MemoryDataSort
21
+ from angr.knowledge_plugins.key_definitions import atoms
21
22
  from angr.codenode import BlockNode
22
23
  from angr.utils import timethis
23
24
  from angr.utils.graph import GraphUtils
@@ -122,7 +123,7 @@ class Clinic(Analysis):
122
123
  desired_variables: set[str] | None = None,
123
124
  force_loop_single_exit: bool = True,
124
125
  complete_successors: bool = False,
125
- max_type_constraints: int = 750,
126
+ max_type_constraints: int = 4000,
126
127
  ):
127
128
  if not func.normalized and mode == ClinicMode.DECOMPILE:
128
129
  raise ValueError("Decompilation must work on normalized function graphs.")
@@ -505,17 +506,29 @@ class Clinic(Analysis):
505
506
  self._update_progress(37.0, text="Tracking stack pointers")
506
507
  spt = self._track_stack_pointers()
507
508
 
509
+ preserve_vvar_ids: set[int] = set()
510
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] = []
511
+
508
512
  # Simplify blocks
509
513
  # we never remove dead memory definitions before making callsites. otherwise stack arguments may go missing
510
514
  # before they are recognized as stack arguments.
511
515
  self._update_progress(38.0, text="Simplifying blocks 1")
512
- ail_graph = self._simplify_blocks(ail_graph, stack_pointer_tracker=spt, cache=block_simplification_cache)
516
+ ail_graph = self._simplify_blocks(
517
+ ail_graph,
518
+ stack_pointer_tracker=spt,
519
+ cache=block_simplification_cache,
520
+ preserve_vvar_ids=preserve_vvar_ids,
521
+ type_hints=type_hints,
522
+ )
513
523
  self._rewrite_alloca(ail_graph)
514
524
 
515
525
  # Run simplification passes
516
526
  self._update_progress(40.0, text="Running simplifications 1")
517
527
  ail_graph = self._run_simplification_passes(
518
- ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION
528
+ ail_graph,
529
+ stack_pointer_tracker=spt,
530
+ stack_items=self.stack_items,
531
+ stage=OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION,
519
532
  )
520
533
 
521
534
  # Simplify the entire function for the first time
@@ -532,7 +545,19 @@ class Clinic(Analysis):
532
545
  # Run simplification passes again. there might be more chances for peephole optimizations after function-level
533
546
  # simplification
534
547
  self._update_progress(48.0, text="Simplifying blocks 2")
535
- ail_graph = self._simplify_blocks(ail_graph, stack_pointer_tracker=spt, cache=block_simplification_cache)
548
+ ail_graph = self._simplify_blocks(
549
+ ail_graph,
550
+ stack_pointer_tracker=spt,
551
+ cache=block_simplification_cache,
552
+ preserve_vvar_ids=preserve_vvar_ids,
553
+ type_hints=type_hints,
554
+ )
555
+
556
+ # Run simplification passes
557
+ self._update_progress(49.0, text="Running simplifications 2")
558
+ ail_graph = self._run_simplification_passes(
559
+ ail_graph, stage=OptimizationPassStage.BEFORE_SSA_LEVEL1_TRANSFORMATION
560
+ )
536
561
 
537
562
  # rewrite (qualified) stack variables into SSA form
538
563
  ail_graph = self._transform_to_ssa_level1(ail_graph, func_args)
@@ -544,11 +569,13 @@ class Clinic(Analysis):
544
569
  # Rust-specific; only call this on Rust binaries when we can identify language and compiler
545
570
  ail_graph = self._rewrite_rust_probestack_call(ail_graph)
546
571
  # Windows-specific
547
- ail_graph = self._rewrite_windows_stkchk_call(ail_graph)
572
+ ail_graph = self._rewrite_windows_chkstk_call(ail_graph)
548
573
 
549
574
  # Make call-sites
550
575
  self._update_progress(50.0, text="Making callsites")
551
- _, stackarg_offsets, removed_vvar_ids = self._make_callsites(ail_graph, func_args, stack_pointer_tracker=spt)
576
+ _, stackarg_offsets, removed_vvar_ids = self._make_callsites(
577
+ ail_graph, func_args, stack_pointer_tracker=spt, preserve_vvar_ids=preserve_vvar_ids
578
+ )
552
579
 
553
580
  # Run simplification passes
554
581
  self._update_progress(53.0, text="Running simplifications 2")
@@ -565,6 +592,7 @@ class Clinic(Analysis):
565
592
  fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
566
593
  removed_vvar_ids=removed_vvar_ids,
567
594
  arg_vvars=arg_vvars,
595
+ preserve_vvar_ids=preserve_vvar_ids,
568
596
  )
569
597
 
570
598
  # After global optimization, there might be more chances for peephole optimizations.
@@ -574,10 +602,12 @@ class Clinic(Analysis):
574
602
  ail_graph,
575
603
  stack_pointer_tracker=spt,
576
604
  cache=block_simplification_cache,
605
+ preserve_vvar_ids=preserve_vvar_ids,
606
+ type_hints=type_hints,
577
607
  )
578
608
 
579
609
  # Run simplification passes
580
- self._update_progress(65.0, text="Running simplifications 3 ")
610
+ self._update_progress(65.0, text="Running simplifications 3")
581
611
  ail_graph = self._run_simplification_passes(
582
612
  ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
583
613
  )
@@ -592,6 +622,7 @@ class Clinic(Analysis):
592
622
  narrow_expressions=True,
593
623
  fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
594
624
  arg_vvars=arg_vvars,
625
+ preserve_vvar_ids=preserve_vvar_ids,
595
626
  )
596
627
 
597
628
  self._update_progress(75.0, text="Simplifying blocks 4")
@@ -599,6 +630,8 @@ class Clinic(Analysis):
599
630
  ail_graph,
600
631
  stack_pointer_tracker=spt,
601
632
  cache=block_simplification_cache,
633
+ preserve_vvar_ids=preserve_vvar_ids,
634
+ type_hints=type_hints,
602
635
  )
603
636
 
604
637
  # Simplify the entire function for the fourth time
@@ -611,6 +644,12 @@ class Clinic(Analysis):
611
644
  narrow_expressions=True,
612
645
  fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
613
646
  arg_vvars=arg_vvars,
647
+ preserve_vvar_ids=preserve_vvar_ids,
648
+ )
649
+
650
+ self._update_progress(79.0, text="Running simplifications 4")
651
+ ail_graph = self._run_simplification_passes(
652
+ ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.BEFORE_VARIABLE_RECOVERY
614
653
  )
615
654
 
616
655
  # update arg_list
@@ -623,7 +662,7 @@ class Clinic(Analysis):
623
662
 
624
663
  # Recover variables on AIL blocks
625
664
  self._update_progress(80.0, text="Recovering variables")
626
- variable_kb = self._recover_and_link_variables(ail_graph, arg_list, arg_vvars, vvar2vvar)
665
+ variable_kb = self._recover_and_link_variables(ail_graph, arg_list, arg_vvars, vvar2vvar, type_hints)
627
666
 
628
667
  # Run simplification passes
629
668
  self._update_progress(85.0, text="Running simplifications 4")
@@ -1226,6 +1265,8 @@ class Clinic(Analysis):
1226
1265
  ail_graph: networkx.DiGraph,
1227
1266
  stack_pointer_tracker=None,
1228
1267
  cache: dict[ailment.Block, NamedTuple] | None = None,
1268
+ preserve_vvar_ids: set[int] | None = None,
1269
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
1229
1270
  ):
1230
1271
  """
1231
1272
  Simplify all blocks in self._blocks.
@@ -1244,6 +1285,8 @@ class Clinic(Analysis):
1244
1285
  ail_block,
1245
1286
  stack_pointer_tracker=stack_pointer_tracker,
1246
1287
  cache=cache,
1288
+ preserve_vvar_ids=preserve_vvar_ids,
1289
+ type_hints=type_hints,
1247
1290
  )
1248
1291
  key = ail_block.addr, ail_block.idx
1249
1292
  blocks_by_addr_and_idx[key] = simplified
@@ -1259,7 +1302,14 @@ class Clinic(Analysis):
1259
1302
 
1260
1303
  return ail_graph
1261
1304
 
1262
- def _simplify_block(self, ail_block, stack_pointer_tracker=None, cache=None):
1305
+ def _simplify_block(
1306
+ self,
1307
+ ail_block,
1308
+ stack_pointer_tracker=None,
1309
+ cache=None,
1310
+ preserve_vvar_ids: set[int] | None = None,
1311
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
1312
+ ):
1263
1313
  """
1264
1314
  Simplify a single AIL block.
1265
1315
 
@@ -1286,6 +1336,8 @@ class Clinic(Analysis):
1286
1336
  peephole_optimizations=self.peephole_optimizations,
1287
1337
  cached_reaching_definitions=cached_rd,
1288
1338
  cached_propagator=cached_prop,
1339
+ preserve_vvar_ids=preserve_vvar_ids,
1340
+ type_hints=type_hints,
1289
1341
  )
1290
1342
  # update the cache
1291
1343
  if cache is not None:
@@ -1308,6 +1360,7 @@ class Clinic(Analysis):
1308
1360
  rewrite_ccalls=True,
1309
1361
  removed_vvar_ids: set[int] | None = None,
1310
1362
  arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
1363
+ preserve_vvar_ids: set[int] | None = None,
1311
1364
  ) -> None:
1312
1365
  """
1313
1366
  Simplify the entire function until it reaches a fixed point.
@@ -1326,6 +1379,7 @@ class Clinic(Analysis):
1326
1379
  rewrite_ccalls=rewrite_ccalls,
1327
1380
  removed_vvar_ids=removed_vvar_ids,
1328
1381
  arg_vvars=arg_vvars,
1382
+ preserve_vvar_ids=preserve_vvar_ids,
1329
1383
  )
1330
1384
  if not simplified:
1331
1385
  break
@@ -1343,6 +1397,7 @@ class Clinic(Analysis):
1343
1397
  rewrite_ccalls=True,
1344
1398
  removed_vvar_ids: set[int] | None = None,
1345
1399
  arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
1400
+ preserve_vvar_ids: set[int] | None = None,
1346
1401
  ):
1347
1402
  """
1348
1403
  Simplify the entire function once.
@@ -1367,6 +1422,7 @@ class Clinic(Analysis):
1367
1422
  removed_vvar_ids=removed_vvar_ids,
1368
1423
  arg_vvars=arg_vvars,
1369
1424
  secondary_stackvars=self.secondary_stackvars,
1425
+ avoid_vvar_ids=preserve_vvar_ids,
1370
1426
  )
1371
1427
  # cache the simplifier's RDA analysis
1372
1428
  self.reaching_definitions = simp._reaching_definitions
@@ -1381,6 +1437,7 @@ class Clinic(Analysis):
1381
1437
  stage: OptimizationPassStage = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION,
1382
1438
  variable_kb=None,
1383
1439
  stack_items: dict[int, StackItem] | None = None,
1440
+ stack_pointer_tracker=None,
1384
1441
  **kwargs,
1385
1442
  ):
1386
1443
  addr_and_idx_to_blocks: dict[tuple[int, int | None], ailment.Block] = {}
@@ -1415,6 +1472,7 @@ class Clinic(Analysis):
1415
1472
  scratch=self.optimization_scratch,
1416
1473
  force_loop_single_exit=self._force_loop_single_exit,
1417
1474
  complete_successors=self._complete_successors,
1475
+ stack_pointer_tracker=stack_pointer_tracker,
1418
1476
  **kwargs,
1419
1477
  )
1420
1478
  if a.out_graph:
@@ -1550,7 +1608,13 @@ class Clinic(Analysis):
1550
1608
  return []
1551
1609
 
1552
1610
  @timethis
1553
- def _make_callsites(self, ail_graph, func_args: set[ailment.Expr.VirtualVariable], stack_pointer_tracker=None):
1611
+ def _make_callsites(
1612
+ self,
1613
+ ail_graph,
1614
+ func_args: set[ailment.Expr.VirtualVariable],
1615
+ stack_pointer_tracker=None,
1616
+ preserve_vvar_ids: set[int] | None = None,
1617
+ ):
1554
1618
  """
1555
1619
  Simplify all function call statements.
1556
1620
  """
@@ -1588,6 +1652,7 @@ class Clinic(Analysis):
1588
1652
  fail_fast=self._fail_fast,
1589
1653
  stack_pointer_tracker=stack_pointer_tracker,
1590
1654
  peephole_optimizations=self.peephole_optimizations,
1655
+ preserve_vvar_ids=preserve_vvar_ids,
1591
1656
  )
1592
1657
  return simp.result_block
1593
1658
  return None
@@ -1663,6 +1728,7 @@ class Clinic(Analysis):
1663
1728
  arg_list: list,
1664
1729
  arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]],
1665
1730
  vvar2vvar: dict[int, int],
1731
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]],
1666
1732
  ):
1667
1733
  # variable recovery
1668
1734
  tmp_kb = KnowledgeBase(self.project) if self.variable_kb is None else self.variable_kb
@@ -1677,6 +1743,7 @@ class Clinic(Analysis):
1677
1743
  unify_variables=False,
1678
1744
  func_arg_vvars=arg_vvars,
1679
1745
  vvar_to_vvar=vvar2vvar,
1746
+ type_hints=type_hints,
1680
1747
  )
1681
1748
  # get ground-truth types
1682
1749
  var_manager = tmp_kb.variables[self.function.addr]
@@ -1710,7 +1777,7 @@ class Clinic(Analysis):
1710
1777
  must_struct = None
1711
1778
  total_type_constraints = sum(len(tc) for tc in vr.type_constraints.values()) if vr.type_constraints else 0
1712
1779
  if total_type_constraints > self._max_type_constraints:
1713
- l.info(
1780
+ l.warning(
1714
1781
  "The number of type constraints (%d) is greater than the threshold (%d). Skipping type inference.",
1715
1782
  total_type_constraints,
1716
1783
  self._max_type_constraints,
@@ -1821,7 +1888,7 @@ class Clinic(Analysis):
1821
1888
  if off in variable_manager.stack_offset_to_struct_member_info:
1822
1889
  stmt.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[off]
1823
1890
 
1824
- elif stmt_type is ailment.Stmt.Assignment:
1891
+ elif stmt_type is ailment.Stmt.Assignment or stmt_type is ailment.Stmt.WeakAssignment:
1825
1892
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.dst)
1826
1893
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.src)
1827
1894
 
@@ -2937,7 +3004,7 @@ class Clinic(Analysis):
2937
3004
  break
2938
3005
  return ail_graph
2939
3006
 
2940
- def _rewrite_windows_stkchk_call(self, ail_graph) -> networkx.DiGraph:
3007
+ def _rewrite_windows_chkstk_call(self, ail_graph) -> networkx.DiGraph:
2941
3008
  if not (self.project.simos is not None and self.project.simos.name == "Win32"):
2942
3009
  return ail_graph
2943
3010
 
@@ -3,7 +3,16 @@ from __future__ import annotations
3
3
  import logging
4
4
 
5
5
  from ailment.block import Block
6
- from ailment.statement import Statement, Assignment, Store, Call, Return, ConditionalJump, DirtyStatement
6
+ from ailment.statement import (
7
+ Statement,
8
+ Assignment,
9
+ Store,
10
+ Call,
11
+ Return,
12
+ ConditionalJump,
13
+ DirtyStatement,
14
+ WeakAssignment,
15
+ )
7
16
  from ailment.expression import (
8
17
  Expression,
9
18
  VirtualVariable,
@@ -99,6 +108,19 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
99
108
  return Assignment(stmt.idx, dst, src, **stmt.tags)
100
109
  return None
101
110
 
111
+ def _handle_stmt_WeakAssignment(self, stmt) -> WeakAssignment | None:
112
+ new_src = self._expr(stmt.src)
113
+ new_dst = self._expr(stmt.dst)
114
+
115
+ if new_dst is not None or new_src is not None:
116
+ return WeakAssignment(
117
+ stmt.idx,
118
+ stmt.dst if new_dst is None else new_dst, # type: ignore
119
+ stmt.src if new_src is None else new_src,
120
+ **stmt.tags,
121
+ )
122
+ return None
123
+
102
124
  def _handle_stmt_Store(self, stmt):
103
125
  new_addr = self._expr(stmt.addr)
104
126
  new_data = self._expr(stmt.data)
@@ -299,7 +321,7 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
299
321
  return VEXCCallExpression(
300
322
  expr.idx,
301
323
  expr.callee,
302
- new_operands,
324
+ tuple(new_operands),
303
325
  bits=expr.bits,
304
326
  **expr.tags,
305
327
  )
@@ -33,6 +33,8 @@ from .call_stmt_rewriter import CallStatementRewriter
33
33
  from .duplication_reverter import DuplicationReverter
34
34
  from .switch_reused_entry_rewriter import SwitchReusedEntryRewriter
35
35
  from .condition_constprop import ConditionConstantPropagation
36
+ from .determine_load_sizes import DetermineLoadSizes
37
+ from .eager_std_string_concatenation import EagerStdStringConcatenationPass
36
38
 
37
39
  if TYPE_CHECKING:
38
40
  from angr.analyses.decompiler.presets import DecompilationPreset
@@ -68,6 +70,8 @@ ALL_OPTIMIZATION_PASSES = [
68
70
  CallStatementRewriter,
69
71
  TagSlicer,
70
72
  ConditionConstantPropagation,
73
+ DetermineLoadSizes,
74
+ EagerStdStringConcatenationPass,
71
75
  ]
72
76
 
73
77
  # these passes may duplicate code to remove gotos or improve the structure of the graph
@@ -122,6 +126,7 @@ __all__ = (
122
126
  "DeadblockRemover",
123
127
  "DivSimplifier",
124
128
  "DuplicationReverter",
129
+ "EagerStdStringConcatenationPass",
125
130
  "ExprOpSwapper",
126
131
  "FlipBooleanCmp",
127
132
  "ITEExprConverter",
@@ -98,20 +98,22 @@ class BasePointerSaveSimplifier(OptimizationPass):
98
98
  and isinstance(stmt.dst, ailment.Expr.VirtualVariable)
99
99
  and stmt.dst.was_stack
100
100
  and stmt.dst.stack_offset < 0
101
- and isinstance(stmt.src, ailment.Expr.VirtualVariable)
102
- and stmt.src.was_reg
103
- and stmt.src.reg_offset == self.project.arch.bp_offset
104
101
  ):
105
- return first_block, idx, stmt.dst
106
- if (
107
- isinstance(stmt, ailment.Stmt.Assignment)
108
- and isinstance(stmt.dst, ailment.Expr.VirtualVariable)
109
- and stmt.dst.was_stack
110
- and stmt.dst.stack_offset < 0
111
- and isinstance(stmt.src, ailment.Expr.StackBaseOffset)
112
- and stmt.src.offset == 0
113
- ):
114
- return first_block, idx, stmt.dst
102
+ if (
103
+ isinstance(stmt.src, ailment.Expr.VirtualVariable)
104
+ and stmt.src.was_reg
105
+ and stmt.src.reg_offset == self.project.arch.bp_offset
106
+ ):
107
+ return first_block, idx, stmt.dst
108
+ if isinstance(stmt.src, ailment.Expr.StackBaseOffset) and stmt.src.offset == 0:
109
+ return first_block, idx, stmt.dst
110
+ if (
111
+ isinstance(stmt.src, ailment.Expr.UnaryOp)
112
+ and isinstance(stmt.src.operand, ailment.Expr.VirtualVariable)
113
+ and stmt.src.operand.was_stack
114
+ and stmt.src.operand.stack_offset == 0
115
+ ):
116
+ return first_block, idx, stmt.dst
115
117
 
116
118
  # Not found
117
119
  return None
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+ import logging
3
+
4
+ from ailment.constant import UNDETERMINED_SIZE
5
+ from ailment.expression import BinaryOp, Load, Const
6
+ from ailment.statement import Assignment, WeakAssignment
7
+
8
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
9
+
10
+
11
+ _l = logging.getLogger(name=__name__)
12
+
13
+
14
+ class DetermineLoadSizes(OptimizationPass):
15
+ """
16
+ Determine the sizes of Load expressions whose sizes are undetermined.
17
+ """
18
+
19
+ ARCHES = None
20
+ PLATFORMS = None
21
+ STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
22
+ NAME = "Determine sizes of loads whose sizes are undetermined"
23
+ DESCRIPTION = __doc__.strip() # type: ignore
24
+
25
+ def __init__(self, func, **kwargs):
26
+ super().__init__(func, **kwargs)
27
+
28
+ self.analyze()
29
+
30
+ def _check(self):
31
+ return True, None
32
+
33
+ def _analyze(self, cache=None):
34
+
35
+ changed = False
36
+
37
+ for block in self._graph.nodes:
38
+ for idx in range(len(block.statements)): # pylint:disable=consider-using-enumerate
39
+ stmt = block.statements[idx]
40
+ if isinstance(stmt, (Assignment, WeakAssignment)):
41
+ if isinstance(stmt.src, BinaryOp) and stmt.src.op == "Add" and stmt.src.operands:
42
+ operands = stmt.src.operands
43
+ elif isinstance(stmt.src, Load):
44
+ operands = [stmt.src]
45
+ else:
46
+ continue
47
+
48
+ for operand in operands:
49
+ if (
50
+ isinstance(operand, Load)
51
+ and isinstance(operand.addr, Const)
52
+ and operand.size == UNDETERMINED_SIZE
53
+ ):
54
+ # probably a string!
55
+ bs = self.project.loader.memory.load_null_terminated_bytes(
56
+ operand.addr.value, max_size=4096
57
+ )
58
+ if bs is not None:
59
+ operand.size = len(bs)
60
+ operand.bits = len(bs) * 8
61
+ changed = True
62
+
63
+ if changed:
64
+ self.out_graph = self._graph
@@ -0,0 +1,165 @@
1
+ # pylint:disable=too-many-boolean-expressions,unused-argument
2
+ from __future__ import annotations
3
+ from typing import TYPE_CHECKING
4
+ import logging
5
+ import re
6
+
7
+ from archinfo import Endness
8
+
9
+ from ailment.constant import UNDETERMINED_SIZE
10
+ from ailment.statement import Assignment, WeakAssignment
11
+ from ailment.expression import VirtualVariable, BinaryOp, Const, Load
12
+
13
+ from .optimization_pass import OptimizationPass, OptimizationPassStage
14
+
15
+ if TYPE_CHECKING:
16
+ from angr.analyses.s_reaching_definitions import SRDAModel
17
+
18
+
19
+ _l = logging.getLogger(name=__name__)
20
+
21
+
22
+ class EagerStdStringConcatenationPass(OptimizationPass):
23
+ """
24
+ TODO: Unfinished
25
+ """
26
+
27
+ ARCHES = None
28
+ PLATFORMS = None
29
+ STAGE = OptimizationPassStage.BEFORE_VARIABLE_RECOVERY
30
+ NAME = "Condense multiple constant std::string creation calls into one when possible"
31
+ DESCRIPTION = __doc__.strip() # type: ignore
32
+
33
+ def __init__(self, func, **kwargs):
34
+ super().__init__(func, **kwargs)
35
+ self.analyze()
36
+
37
+ def _check(self):
38
+ # TODO: ensure func calls std::string::operator+ and std::string::operator=
39
+ return False, {}
40
+
41
+ def _analyze(self, cache=None):
42
+ rd = self.project.analyses.SReachingDefinitions(subject=self._func, func_graph=self._graph).model
43
+ cfg = self.kb.cfgs.get_most_accurate()
44
+ assert cfg is not None
45
+
46
+ # update each block
47
+ for key in list(self.blocks_by_addr_and_idx):
48
+ block = self.blocks_by_addr_and_idx[key]
49
+ new_block = None
50
+ for idx, stmt in enumerate(block.statements):
51
+ if (
52
+ isinstance(stmt, Assignment)
53
+ and hasattr(stmt, "type")
54
+ and "dst" in stmt.type
55
+ and "src" in stmt.type
56
+ and isinstance(stmt.dst, VirtualVariable)
57
+ and isinstance(stmt.src, BinaryOp)
58
+ and stmt.src.op == "Add"
59
+ ):
60
+ dst_ty, src_ty = stmt.type["dst"], stmt.type["src"]
61
+ if self._is_std_string_type(dst_ty.c_repr()) and self._is_std_string_type(src_ty.c_repr()):
62
+ op0, op1 = stmt.src.operands
63
+ if isinstance(op1, VirtualVariable) and isinstance(op0, Load):
64
+ op0, op1 = op1, op0
65
+ if (
66
+ isinstance(op0, VirtualVariable)
67
+ and isinstance(op1, Load)
68
+ and isinstance(op1.addr, Const)
69
+ and isinstance(op1.addr.value, int)
70
+ # is op1 a constant string?
71
+ and op1.addr.value in cfg.memory_data
72
+ and cfg.memory_data[op1.addr.value].sort == "string"
73
+ ):
74
+ op1_str = cfg.memory_data[op1.addr.value].content
75
+ # is op0 also an std::string?
76
+ op0_str = self._get_vvar_def_string(op0.varid, rd, cfg, block.addr, block.idx)
77
+ if op0_str is not None and op1_str is not None:
78
+ # let's create a new string
79
+ final_str = op0_str + op1_str
80
+ str_id = self.kb.custom_strings.allocate(final_str)
81
+ # replace the assignment with a new assignment
82
+ new_stmt = WeakAssignment(
83
+ stmt.idx,
84
+ stmt.dst,
85
+ Load(
86
+ None,
87
+ Const(None, None, str_id, self.project.arch.bits, custom_string=True),
88
+ UNDETERMINED_SIZE,
89
+ Endness.BE,
90
+ ),
91
+ **stmt.tags,
92
+ )
93
+ new_block = block.copy() if new_block is None else new_block
94
+ new_block.statements[idx] = new_stmt
95
+ if new_block is not None:
96
+ self._update_block(block, new_block)
97
+
98
+ def _get_vvar_def_string(self, vvar_id: int, rd: SRDAModel, cfg, block_addr, block_idx) -> bytes | None:
99
+ # search for the closest weak definition of the specified variable
100
+ # TODO: Optimize this logic in the future
101
+
102
+ starting_block = self.blocks_by_addr_and_idx[(block_addr, block_idx)]
103
+ queue = [starting_block]
104
+ visited = set()
105
+ while queue:
106
+ block = queue.pop(0)
107
+ if block in visited:
108
+ continue
109
+ visited.add(block)
110
+
111
+ if not (block.addr == block_addr and block.idx == block_idx):
112
+ for stmt in block.statements:
113
+ if (
114
+ isinstance(stmt, WeakAssignment)
115
+ and isinstance(stmt.dst, VirtualVariable)
116
+ and stmt.dst.varid == vvar_id
117
+ ):
118
+ if (
119
+ isinstance(stmt.src, Load)
120
+ and isinstance(stmt.src.addr, Const)
121
+ and stmt.src.addr.value in cfg.memory_data
122
+ ):
123
+ if cfg.memory_data[stmt.src.addr.value].sort == "string":
124
+ return cfg.memory_data[stmt.src.addr.value].content
125
+ elif (
126
+ isinstance(stmt.src, Const)
127
+ and hasattr(stmt.src, "custom_string")
128
+ and stmt.src.custom_string
129
+ ):
130
+ return self.kb.custom_strings.get(stmt.src.value)
131
+
132
+ preds = list(self._graph.predecessors(block))
133
+ if len(preds) == 1:
134
+ queue.append(preds[0])
135
+
136
+ return None
137
+
138
+ @staticmethod
139
+ def _is_std_string_type(type_str: str) -> bool:
140
+ type_str = type_str.removeprefix("const ")
141
+ return (
142
+ re.match(
143
+ r"class std::basic_string<char a\d+, struct std::char_traits<char> a\d+, class std::allocator<char>>",
144
+ type_str,
145
+ )
146
+ is not None
147
+ )
148
+
149
+ # pcreg_offset = self.project.arch.registers[getpc_reg][0]
150
+
151
+
152
+ #
153
+ # old_block = self.blocks_by_addr_and_idx[block_key]
154
+ # block = old_block.copy()
155
+ # old_stmt = block.statements[stmt_idx]
156
+ # block.statements[stmt_idx] = ailment.Stmt.Assignment(
157
+ # old_stmt.idx,
158
+ # ailment.Expr.Register(None, None, pcreg_offset, 32, reg_name=getpc_reg),
159
+ # ailment.Expr.Const(None, None, getpc_reg_value, 32),
160
+ # **old_stmt.tags,
161
+ # )
162
+ # # remove the statement that pushes return address onto the stack
163
+ # if stmt_idx > 0 and isinstance(block.statements[stmt_idx - 1], ailment.Stmt.Store):
164
+ # block.statements = block.statements[: stmt_idx - 1] + block.statements[stmt_idx:]
165
+ # self._update_block(old_block, block)
@@ -73,7 +73,16 @@ class SimplifierAILEngine(
73
73
  self.state.store_variable(dst, src)
74
74
 
75
75
  if (src, dst) != (stmt.src, stmt.dst):
76
- return ailment.statement.Assignment(stmt.idx, dst, src, **stmt.tags)
76
+ return ailment.statement.Assignment(stmt.idx, dst, src, **stmt.tags) # type:ignore
77
+
78
+ return stmt
79
+
80
+ def _handle_stmt_WeakAssignment(self, stmt: ailment.statement.WeakAssignment):
81
+ src = self._expr(stmt.src)
82
+ dst = self._expr(stmt.dst)
83
+
84
+ if (src, dst) != (stmt.src, stmt.dst):
85
+ return ailment.statement.WeakAssignment(stmt.idx, dst, src, **stmt.tags) # type:ignore
77
86
 
78
87
  return stmt
79
88
 
@@ -150,7 +159,7 @@ class SimplifierAILEngine(
150
159
  def _handle_stmt_DirtyStatement(self, stmt):
151
160
  expr = self._expr(stmt.dirty)
152
161
  if expr != stmt.dirty:
153
- return ailment.statement.DirtyStatement(stmt.idx, expr, **stmt.tags)
162
+ return ailment.statement.DirtyStatement(stmt.idx, expr, **stmt.tags) # type:ignore
154
163
  return stmt
155
164
 
156
165
  def _handle_stmt_Label(self, stmt):