angr 9.2.148__py3-none-manylinux2014_x86_64.whl → 9.2.150__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +100 -37
- angr/analyses/calling_convention/calling_convention.py +42 -2
- angr/analyses/cfg/cfg_emulated.py +5 -2
- angr/analyses/cfg/cfg_fast.py +48 -46
- angr/analyses/decompiler/ail_simplifier.py +65 -32
- angr/analyses/decompiler/block_simplifier.py +20 -6
- angr/analyses/decompiler/clinic.py +80 -13
- angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
- angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
- angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
- angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
- angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +2 -1
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
- angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
- angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
- angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
- angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
- angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
- angr/analyses/decompiler/presets/fast.py +2 -0
- angr/analyses/decompiler/presets/full.py +2 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
- angr/analyses/decompiler/ssailification/ssailification.py +23 -3
- angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
- angr/analyses/decompiler/structured_codegen/c.py +141 -10
- angr/analyses/decompiler/utils.py +23 -1
- angr/analyses/disassembly.py +2 -1
- angr/analyses/patchfinder.py +1 -1
- angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
- angr/analyses/typehoon/lifter.py +20 -0
- angr/analyses/typehoon/simple_solver.py +42 -9
- angr/analyses/typehoon/translator.py +4 -1
- angr/analyses/typehoon/typeconsts.py +17 -6
- angr/analyses/typehoon/typehoon.py +25 -6
- angr/analyses/variable_recovery/engine_ail.py +44 -5
- angr/analyses/variable_recovery/engine_base.py +35 -12
- angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
- angr/calling_conventions.py +23 -5
- angr/engines/light/engine.py +7 -0
- angr/engines/pcode/lifter.py +7 -0
- angr/knowledge_plugins/functions/function.py +68 -0
- angr/knowledge_plugins/propagations/states.py +5 -2
- angr/knowledge_plugins/variables/variable_manager.py +3 -3
- angr/procedures/definitions/__init__.py +1 -1
- angr/procedures/definitions/types_stl.py +22 -0
- angr/sim_type.py +251 -130
- angr/utils/graph.py +51 -27
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/METADATA +7 -7
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/RECORD +61 -55
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/WHEEL +1 -1
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/licenses/LICENSE +3 -0
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/entry_points.txt +0 -0
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ from angr.errors import AngrDecompilationError
|
|
|
18
18
|
from angr.knowledge_base import KnowledgeBase
|
|
19
19
|
from angr.knowledge_plugins.functions import Function
|
|
20
20
|
from angr.knowledge_plugins.cfg.memory_data import MemoryDataSort
|
|
21
|
+
from angr.knowledge_plugins.key_definitions import atoms
|
|
21
22
|
from angr.codenode import BlockNode
|
|
22
23
|
from angr.utils import timethis
|
|
23
24
|
from angr.utils.graph import GraphUtils
|
|
@@ -122,7 +123,7 @@ class Clinic(Analysis):
|
|
|
122
123
|
desired_variables: set[str] | None = None,
|
|
123
124
|
force_loop_single_exit: bool = True,
|
|
124
125
|
complete_successors: bool = False,
|
|
125
|
-
max_type_constraints: int =
|
|
126
|
+
max_type_constraints: int = 4000,
|
|
126
127
|
):
|
|
127
128
|
if not func.normalized and mode == ClinicMode.DECOMPILE:
|
|
128
129
|
raise ValueError("Decompilation must work on normalized function graphs.")
|
|
@@ -505,17 +506,29 @@ class Clinic(Analysis):
|
|
|
505
506
|
self._update_progress(37.0, text="Tracking stack pointers")
|
|
506
507
|
spt = self._track_stack_pointers()
|
|
507
508
|
|
|
509
|
+
preserve_vvar_ids: set[int] = set()
|
|
510
|
+
type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] = []
|
|
511
|
+
|
|
508
512
|
# Simplify blocks
|
|
509
513
|
# we never remove dead memory definitions before making callsites. otherwise stack arguments may go missing
|
|
510
514
|
# before they are recognized as stack arguments.
|
|
511
515
|
self._update_progress(38.0, text="Simplifying blocks 1")
|
|
512
|
-
ail_graph = self._simplify_blocks(
|
|
516
|
+
ail_graph = self._simplify_blocks(
|
|
517
|
+
ail_graph,
|
|
518
|
+
stack_pointer_tracker=spt,
|
|
519
|
+
cache=block_simplification_cache,
|
|
520
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
521
|
+
type_hints=type_hints,
|
|
522
|
+
)
|
|
513
523
|
self._rewrite_alloca(ail_graph)
|
|
514
524
|
|
|
515
525
|
# Run simplification passes
|
|
516
526
|
self._update_progress(40.0, text="Running simplifications 1")
|
|
517
527
|
ail_graph = self._run_simplification_passes(
|
|
518
|
-
ail_graph,
|
|
528
|
+
ail_graph,
|
|
529
|
+
stack_pointer_tracker=spt,
|
|
530
|
+
stack_items=self.stack_items,
|
|
531
|
+
stage=OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION,
|
|
519
532
|
)
|
|
520
533
|
|
|
521
534
|
# Simplify the entire function for the first time
|
|
@@ -532,7 +545,19 @@ class Clinic(Analysis):
|
|
|
532
545
|
# Run simplification passes again. there might be more chances for peephole optimizations after function-level
|
|
533
546
|
# simplification
|
|
534
547
|
self._update_progress(48.0, text="Simplifying blocks 2")
|
|
535
|
-
ail_graph = self._simplify_blocks(
|
|
548
|
+
ail_graph = self._simplify_blocks(
|
|
549
|
+
ail_graph,
|
|
550
|
+
stack_pointer_tracker=spt,
|
|
551
|
+
cache=block_simplification_cache,
|
|
552
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
553
|
+
type_hints=type_hints,
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
# Run simplification passes
|
|
557
|
+
self._update_progress(49.0, text="Running simplifications 2")
|
|
558
|
+
ail_graph = self._run_simplification_passes(
|
|
559
|
+
ail_graph, stage=OptimizationPassStage.BEFORE_SSA_LEVEL1_TRANSFORMATION
|
|
560
|
+
)
|
|
536
561
|
|
|
537
562
|
# rewrite (qualified) stack variables into SSA form
|
|
538
563
|
ail_graph = self._transform_to_ssa_level1(ail_graph, func_args)
|
|
@@ -544,11 +569,13 @@ class Clinic(Analysis):
|
|
|
544
569
|
# Rust-specific; only call this on Rust binaries when we can identify language and compiler
|
|
545
570
|
ail_graph = self._rewrite_rust_probestack_call(ail_graph)
|
|
546
571
|
# Windows-specific
|
|
547
|
-
ail_graph = self.
|
|
572
|
+
ail_graph = self._rewrite_windows_chkstk_call(ail_graph)
|
|
548
573
|
|
|
549
574
|
# Make call-sites
|
|
550
575
|
self._update_progress(50.0, text="Making callsites")
|
|
551
|
-
_, stackarg_offsets, removed_vvar_ids = self._make_callsites(
|
|
576
|
+
_, stackarg_offsets, removed_vvar_ids = self._make_callsites(
|
|
577
|
+
ail_graph, func_args, stack_pointer_tracker=spt, preserve_vvar_ids=preserve_vvar_ids
|
|
578
|
+
)
|
|
552
579
|
|
|
553
580
|
# Run simplification passes
|
|
554
581
|
self._update_progress(53.0, text="Running simplifications 2")
|
|
@@ -565,6 +592,7 @@ class Clinic(Analysis):
|
|
|
565
592
|
fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
|
|
566
593
|
removed_vvar_ids=removed_vvar_ids,
|
|
567
594
|
arg_vvars=arg_vvars,
|
|
595
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
568
596
|
)
|
|
569
597
|
|
|
570
598
|
# After global optimization, there might be more chances for peephole optimizations.
|
|
@@ -574,10 +602,12 @@ class Clinic(Analysis):
|
|
|
574
602
|
ail_graph,
|
|
575
603
|
stack_pointer_tracker=spt,
|
|
576
604
|
cache=block_simplification_cache,
|
|
605
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
606
|
+
type_hints=type_hints,
|
|
577
607
|
)
|
|
578
608
|
|
|
579
609
|
# Run simplification passes
|
|
580
|
-
self._update_progress(65.0, text="Running simplifications 3
|
|
610
|
+
self._update_progress(65.0, text="Running simplifications 3")
|
|
581
611
|
ail_graph = self._run_simplification_passes(
|
|
582
612
|
ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
|
|
583
613
|
)
|
|
@@ -592,6 +622,7 @@ class Clinic(Analysis):
|
|
|
592
622
|
narrow_expressions=True,
|
|
593
623
|
fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
|
|
594
624
|
arg_vvars=arg_vvars,
|
|
625
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
595
626
|
)
|
|
596
627
|
|
|
597
628
|
self._update_progress(75.0, text="Simplifying blocks 4")
|
|
@@ -599,6 +630,8 @@ class Clinic(Analysis):
|
|
|
599
630
|
ail_graph,
|
|
600
631
|
stack_pointer_tracker=spt,
|
|
601
632
|
cache=block_simplification_cache,
|
|
633
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
634
|
+
type_hints=type_hints,
|
|
602
635
|
)
|
|
603
636
|
|
|
604
637
|
# Simplify the entire function for the fourth time
|
|
@@ -611,6 +644,12 @@ class Clinic(Analysis):
|
|
|
611
644
|
narrow_expressions=True,
|
|
612
645
|
fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
|
|
613
646
|
arg_vvars=arg_vvars,
|
|
647
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
self._update_progress(79.0, text="Running simplifications 4")
|
|
651
|
+
ail_graph = self._run_simplification_passes(
|
|
652
|
+
ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.BEFORE_VARIABLE_RECOVERY
|
|
614
653
|
)
|
|
615
654
|
|
|
616
655
|
# update arg_list
|
|
@@ -623,7 +662,7 @@ class Clinic(Analysis):
|
|
|
623
662
|
|
|
624
663
|
# Recover variables on AIL blocks
|
|
625
664
|
self._update_progress(80.0, text="Recovering variables")
|
|
626
|
-
variable_kb = self._recover_and_link_variables(ail_graph, arg_list, arg_vvars, vvar2vvar)
|
|
665
|
+
variable_kb = self._recover_and_link_variables(ail_graph, arg_list, arg_vvars, vvar2vvar, type_hints)
|
|
627
666
|
|
|
628
667
|
# Run simplification passes
|
|
629
668
|
self._update_progress(85.0, text="Running simplifications 4")
|
|
@@ -1226,6 +1265,8 @@ class Clinic(Analysis):
|
|
|
1226
1265
|
ail_graph: networkx.DiGraph,
|
|
1227
1266
|
stack_pointer_tracker=None,
|
|
1228
1267
|
cache: dict[ailment.Block, NamedTuple] | None = None,
|
|
1268
|
+
preserve_vvar_ids: set[int] | None = None,
|
|
1269
|
+
type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
|
|
1229
1270
|
):
|
|
1230
1271
|
"""
|
|
1231
1272
|
Simplify all blocks in self._blocks.
|
|
@@ -1244,6 +1285,8 @@ class Clinic(Analysis):
|
|
|
1244
1285
|
ail_block,
|
|
1245
1286
|
stack_pointer_tracker=stack_pointer_tracker,
|
|
1246
1287
|
cache=cache,
|
|
1288
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
1289
|
+
type_hints=type_hints,
|
|
1247
1290
|
)
|
|
1248
1291
|
key = ail_block.addr, ail_block.idx
|
|
1249
1292
|
blocks_by_addr_and_idx[key] = simplified
|
|
@@ -1259,7 +1302,14 @@ class Clinic(Analysis):
|
|
|
1259
1302
|
|
|
1260
1303
|
return ail_graph
|
|
1261
1304
|
|
|
1262
|
-
def _simplify_block(
|
|
1305
|
+
def _simplify_block(
|
|
1306
|
+
self,
|
|
1307
|
+
ail_block,
|
|
1308
|
+
stack_pointer_tracker=None,
|
|
1309
|
+
cache=None,
|
|
1310
|
+
preserve_vvar_ids: set[int] | None = None,
|
|
1311
|
+
type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
|
|
1312
|
+
):
|
|
1263
1313
|
"""
|
|
1264
1314
|
Simplify a single AIL block.
|
|
1265
1315
|
|
|
@@ -1286,6 +1336,8 @@ class Clinic(Analysis):
|
|
|
1286
1336
|
peephole_optimizations=self.peephole_optimizations,
|
|
1287
1337
|
cached_reaching_definitions=cached_rd,
|
|
1288
1338
|
cached_propagator=cached_prop,
|
|
1339
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
1340
|
+
type_hints=type_hints,
|
|
1289
1341
|
)
|
|
1290
1342
|
# update the cache
|
|
1291
1343
|
if cache is not None:
|
|
@@ -1308,6 +1360,7 @@ class Clinic(Analysis):
|
|
|
1308
1360
|
rewrite_ccalls=True,
|
|
1309
1361
|
removed_vvar_ids: set[int] | None = None,
|
|
1310
1362
|
arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
|
|
1363
|
+
preserve_vvar_ids: set[int] | None = None,
|
|
1311
1364
|
) -> None:
|
|
1312
1365
|
"""
|
|
1313
1366
|
Simplify the entire function until it reaches a fixed point.
|
|
@@ -1326,6 +1379,7 @@ class Clinic(Analysis):
|
|
|
1326
1379
|
rewrite_ccalls=rewrite_ccalls,
|
|
1327
1380
|
removed_vvar_ids=removed_vvar_ids,
|
|
1328
1381
|
arg_vvars=arg_vvars,
|
|
1382
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
1329
1383
|
)
|
|
1330
1384
|
if not simplified:
|
|
1331
1385
|
break
|
|
@@ -1343,6 +1397,7 @@ class Clinic(Analysis):
|
|
|
1343
1397
|
rewrite_ccalls=True,
|
|
1344
1398
|
removed_vvar_ids: set[int] | None = None,
|
|
1345
1399
|
arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
|
|
1400
|
+
preserve_vvar_ids: set[int] | None = None,
|
|
1346
1401
|
):
|
|
1347
1402
|
"""
|
|
1348
1403
|
Simplify the entire function once.
|
|
@@ -1367,6 +1422,7 @@ class Clinic(Analysis):
|
|
|
1367
1422
|
removed_vvar_ids=removed_vvar_ids,
|
|
1368
1423
|
arg_vvars=arg_vvars,
|
|
1369
1424
|
secondary_stackvars=self.secondary_stackvars,
|
|
1425
|
+
avoid_vvar_ids=preserve_vvar_ids,
|
|
1370
1426
|
)
|
|
1371
1427
|
# cache the simplifier's RDA analysis
|
|
1372
1428
|
self.reaching_definitions = simp._reaching_definitions
|
|
@@ -1381,6 +1437,7 @@ class Clinic(Analysis):
|
|
|
1381
1437
|
stage: OptimizationPassStage = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION,
|
|
1382
1438
|
variable_kb=None,
|
|
1383
1439
|
stack_items: dict[int, StackItem] | None = None,
|
|
1440
|
+
stack_pointer_tracker=None,
|
|
1384
1441
|
**kwargs,
|
|
1385
1442
|
):
|
|
1386
1443
|
addr_and_idx_to_blocks: dict[tuple[int, int | None], ailment.Block] = {}
|
|
@@ -1415,6 +1472,7 @@ class Clinic(Analysis):
|
|
|
1415
1472
|
scratch=self.optimization_scratch,
|
|
1416
1473
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
1417
1474
|
complete_successors=self._complete_successors,
|
|
1475
|
+
stack_pointer_tracker=stack_pointer_tracker,
|
|
1418
1476
|
**kwargs,
|
|
1419
1477
|
)
|
|
1420
1478
|
if a.out_graph:
|
|
@@ -1550,7 +1608,13 @@ class Clinic(Analysis):
|
|
|
1550
1608
|
return []
|
|
1551
1609
|
|
|
1552
1610
|
@timethis
|
|
1553
|
-
def _make_callsites(
|
|
1611
|
+
def _make_callsites(
|
|
1612
|
+
self,
|
|
1613
|
+
ail_graph,
|
|
1614
|
+
func_args: set[ailment.Expr.VirtualVariable],
|
|
1615
|
+
stack_pointer_tracker=None,
|
|
1616
|
+
preserve_vvar_ids: set[int] | None = None,
|
|
1617
|
+
):
|
|
1554
1618
|
"""
|
|
1555
1619
|
Simplify all function call statements.
|
|
1556
1620
|
"""
|
|
@@ -1588,6 +1652,7 @@ class Clinic(Analysis):
|
|
|
1588
1652
|
fail_fast=self._fail_fast,
|
|
1589
1653
|
stack_pointer_tracker=stack_pointer_tracker,
|
|
1590
1654
|
peephole_optimizations=self.peephole_optimizations,
|
|
1655
|
+
preserve_vvar_ids=preserve_vvar_ids,
|
|
1591
1656
|
)
|
|
1592
1657
|
return simp.result_block
|
|
1593
1658
|
return None
|
|
@@ -1663,6 +1728,7 @@ class Clinic(Analysis):
|
|
|
1663
1728
|
arg_list: list,
|
|
1664
1729
|
arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]],
|
|
1665
1730
|
vvar2vvar: dict[int, int],
|
|
1731
|
+
type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]],
|
|
1666
1732
|
):
|
|
1667
1733
|
# variable recovery
|
|
1668
1734
|
tmp_kb = KnowledgeBase(self.project) if self.variable_kb is None else self.variable_kb
|
|
@@ -1677,6 +1743,7 @@ class Clinic(Analysis):
|
|
|
1677
1743
|
unify_variables=False,
|
|
1678
1744
|
func_arg_vvars=arg_vvars,
|
|
1679
1745
|
vvar_to_vvar=vvar2vvar,
|
|
1746
|
+
type_hints=type_hints,
|
|
1680
1747
|
)
|
|
1681
1748
|
# get ground-truth types
|
|
1682
1749
|
var_manager = tmp_kb.variables[self.function.addr]
|
|
@@ -1710,7 +1777,7 @@ class Clinic(Analysis):
|
|
|
1710
1777
|
must_struct = None
|
|
1711
1778
|
total_type_constraints = sum(len(tc) for tc in vr.type_constraints.values()) if vr.type_constraints else 0
|
|
1712
1779
|
if total_type_constraints > self._max_type_constraints:
|
|
1713
|
-
l.
|
|
1780
|
+
l.warning(
|
|
1714
1781
|
"The number of type constraints (%d) is greater than the threshold (%d). Skipping type inference.",
|
|
1715
1782
|
total_type_constraints,
|
|
1716
1783
|
self._max_type_constraints,
|
|
@@ -1821,7 +1888,7 @@ class Clinic(Analysis):
|
|
|
1821
1888
|
if off in variable_manager.stack_offset_to_struct_member_info:
|
|
1822
1889
|
stmt.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[off]
|
|
1823
1890
|
|
|
1824
|
-
elif stmt_type is ailment.Stmt.Assignment:
|
|
1891
|
+
elif stmt_type is ailment.Stmt.Assignment or stmt_type is ailment.Stmt.WeakAssignment:
|
|
1825
1892
|
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.dst)
|
|
1826
1893
|
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.src)
|
|
1827
1894
|
|
|
@@ -2937,7 +3004,7 @@ class Clinic(Analysis):
|
|
|
2937
3004
|
break
|
|
2938
3005
|
return ail_graph
|
|
2939
3006
|
|
|
2940
|
-
def
|
|
3007
|
+
def _rewrite_windows_chkstk_call(self, ail_graph) -> networkx.DiGraph:
|
|
2941
3008
|
if not (self.project.simos is not None and self.project.simos.name == "Win32"):
|
|
2942
3009
|
return ail_graph
|
|
2943
3010
|
|
|
@@ -3,7 +3,16 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
|
|
5
5
|
from ailment.block import Block
|
|
6
|
-
from ailment.statement import
|
|
6
|
+
from ailment.statement import (
|
|
7
|
+
Statement,
|
|
8
|
+
Assignment,
|
|
9
|
+
Store,
|
|
10
|
+
Call,
|
|
11
|
+
Return,
|
|
12
|
+
ConditionalJump,
|
|
13
|
+
DirtyStatement,
|
|
14
|
+
WeakAssignment,
|
|
15
|
+
)
|
|
7
16
|
from ailment.expression import (
|
|
8
17
|
Expression,
|
|
9
18
|
VirtualVariable,
|
|
@@ -99,6 +108,19 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
|
|
|
99
108
|
return Assignment(stmt.idx, dst, src, **stmt.tags)
|
|
100
109
|
return None
|
|
101
110
|
|
|
111
|
+
def _handle_stmt_WeakAssignment(self, stmt) -> WeakAssignment | None:
|
|
112
|
+
new_src = self._expr(stmt.src)
|
|
113
|
+
new_dst = self._expr(stmt.dst)
|
|
114
|
+
|
|
115
|
+
if new_dst is not None or new_src is not None:
|
|
116
|
+
return WeakAssignment(
|
|
117
|
+
stmt.idx,
|
|
118
|
+
stmt.dst if new_dst is None else new_dst, # type: ignore
|
|
119
|
+
stmt.src if new_src is None else new_src,
|
|
120
|
+
**stmt.tags,
|
|
121
|
+
)
|
|
122
|
+
return None
|
|
123
|
+
|
|
102
124
|
def _handle_stmt_Store(self, stmt):
|
|
103
125
|
new_addr = self._expr(stmt.addr)
|
|
104
126
|
new_data = self._expr(stmt.data)
|
|
@@ -299,7 +321,7 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
|
|
|
299
321
|
return VEXCCallExpression(
|
|
300
322
|
expr.idx,
|
|
301
323
|
expr.callee,
|
|
302
|
-
new_operands,
|
|
324
|
+
tuple(new_operands),
|
|
303
325
|
bits=expr.bits,
|
|
304
326
|
**expr.tags,
|
|
305
327
|
)
|
|
@@ -33,6 +33,8 @@ from .call_stmt_rewriter import CallStatementRewriter
|
|
|
33
33
|
from .duplication_reverter import DuplicationReverter
|
|
34
34
|
from .switch_reused_entry_rewriter import SwitchReusedEntryRewriter
|
|
35
35
|
from .condition_constprop import ConditionConstantPropagation
|
|
36
|
+
from .determine_load_sizes import DetermineLoadSizes
|
|
37
|
+
from .eager_std_string_concatenation import EagerStdStringConcatenationPass
|
|
36
38
|
|
|
37
39
|
if TYPE_CHECKING:
|
|
38
40
|
from angr.analyses.decompiler.presets import DecompilationPreset
|
|
@@ -68,6 +70,8 @@ ALL_OPTIMIZATION_PASSES = [
|
|
|
68
70
|
CallStatementRewriter,
|
|
69
71
|
TagSlicer,
|
|
70
72
|
ConditionConstantPropagation,
|
|
73
|
+
DetermineLoadSizes,
|
|
74
|
+
EagerStdStringConcatenationPass,
|
|
71
75
|
]
|
|
72
76
|
|
|
73
77
|
# these passes may duplicate code to remove gotos or improve the structure of the graph
|
|
@@ -122,6 +126,7 @@ __all__ = (
|
|
|
122
126
|
"DeadblockRemover",
|
|
123
127
|
"DivSimplifier",
|
|
124
128
|
"DuplicationReverter",
|
|
129
|
+
"EagerStdStringConcatenationPass",
|
|
125
130
|
"ExprOpSwapper",
|
|
126
131
|
"FlipBooleanCmp",
|
|
127
132
|
"ITEExprConverter",
|
|
@@ -98,20 +98,22 @@ class BasePointerSaveSimplifier(OptimizationPass):
|
|
|
98
98
|
and isinstance(stmt.dst, ailment.Expr.VirtualVariable)
|
|
99
99
|
and stmt.dst.was_stack
|
|
100
100
|
and stmt.dst.stack_offset < 0
|
|
101
|
-
and isinstance(stmt.src, ailment.Expr.VirtualVariable)
|
|
102
|
-
and stmt.src.was_reg
|
|
103
|
-
and stmt.src.reg_offset == self.project.arch.bp_offset
|
|
104
101
|
):
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
102
|
+
if (
|
|
103
|
+
isinstance(stmt.src, ailment.Expr.VirtualVariable)
|
|
104
|
+
and stmt.src.was_reg
|
|
105
|
+
and stmt.src.reg_offset == self.project.arch.bp_offset
|
|
106
|
+
):
|
|
107
|
+
return first_block, idx, stmt.dst
|
|
108
|
+
if isinstance(stmt.src, ailment.Expr.StackBaseOffset) and stmt.src.offset == 0:
|
|
109
|
+
return first_block, idx, stmt.dst
|
|
110
|
+
if (
|
|
111
|
+
isinstance(stmt.src, ailment.Expr.UnaryOp)
|
|
112
|
+
and isinstance(stmt.src.operand, ailment.Expr.VirtualVariable)
|
|
113
|
+
and stmt.src.operand.was_stack
|
|
114
|
+
and stmt.src.operand.stack_offset == 0
|
|
115
|
+
):
|
|
116
|
+
return first_block, idx, stmt.dst
|
|
115
117
|
|
|
116
118
|
# Not found
|
|
117
119
|
return None
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from ailment.constant import UNDETERMINED_SIZE
|
|
5
|
+
from ailment.expression import BinaryOp, Load, Const
|
|
6
|
+
from ailment.statement import Assignment, WeakAssignment
|
|
7
|
+
|
|
8
|
+
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_l = logging.getLogger(name=__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DetermineLoadSizes(OptimizationPass):
|
|
15
|
+
"""
|
|
16
|
+
Determine the sizes of Load expressions whose sizes are undetermined.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
ARCHES = None
|
|
20
|
+
PLATFORMS = None
|
|
21
|
+
STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
|
|
22
|
+
NAME = "Determine sizes of loads whose sizes are undetermined"
|
|
23
|
+
DESCRIPTION = __doc__.strip() # type: ignore
|
|
24
|
+
|
|
25
|
+
def __init__(self, func, **kwargs):
|
|
26
|
+
super().__init__(func, **kwargs)
|
|
27
|
+
|
|
28
|
+
self.analyze()
|
|
29
|
+
|
|
30
|
+
def _check(self):
|
|
31
|
+
return True, None
|
|
32
|
+
|
|
33
|
+
def _analyze(self, cache=None):
|
|
34
|
+
|
|
35
|
+
changed = False
|
|
36
|
+
|
|
37
|
+
for block in self._graph.nodes:
|
|
38
|
+
for idx in range(len(block.statements)): # pylint:disable=consider-using-enumerate
|
|
39
|
+
stmt = block.statements[idx]
|
|
40
|
+
if isinstance(stmt, (Assignment, WeakAssignment)):
|
|
41
|
+
if isinstance(stmt.src, BinaryOp) and stmt.src.op == "Add" and stmt.src.operands:
|
|
42
|
+
operands = stmt.src.operands
|
|
43
|
+
elif isinstance(stmt.src, Load):
|
|
44
|
+
operands = [stmt.src]
|
|
45
|
+
else:
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
for operand in operands:
|
|
49
|
+
if (
|
|
50
|
+
isinstance(operand, Load)
|
|
51
|
+
and isinstance(operand.addr, Const)
|
|
52
|
+
and operand.size == UNDETERMINED_SIZE
|
|
53
|
+
):
|
|
54
|
+
# probably a string!
|
|
55
|
+
bs = self.project.loader.memory.load_null_terminated_bytes(
|
|
56
|
+
operand.addr.value, max_size=4096
|
|
57
|
+
)
|
|
58
|
+
if bs is not None:
|
|
59
|
+
operand.size = len(bs)
|
|
60
|
+
operand.bits = len(bs) * 8
|
|
61
|
+
changed = True
|
|
62
|
+
|
|
63
|
+
if changed:
|
|
64
|
+
self.out_graph = self._graph
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# pylint:disable=too-many-boolean-expressions,unused-argument
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from archinfo import Endness
|
|
8
|
+
|
|
9
|
+
from ailment.constant import UNDETERMINED_SIZE
|
|
10
|
+
from ailment.statement import Assignment, WeakAssignment
|
|
11
|
+
from ailment.expression import VirtualVariable, BinaryOp, Const, Load
|
|
12
|
+
|
|
13
|
+
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from angr.analyses.s_reaching_definitions import SRDAModel
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
_l = logging.getLogger(name=__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EagerStdStringConcatenationPass(OptimizationPass):
|
|
23
|
+
"""
|
|
24
|
+
TODO: Unfinished
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
ARCHES = None
|
|
28
|
+
PLATFORMS = None
|
|
29
|
+
STAGE = OptimizationPassStage.BEFORE_VARIABLE_RECOVERY
|
|
30
|
+
NAME = "Condense multiple constant std::string creation calls into one when possible"
|
|
31
|
+
DESCRIPTION = __doc__.strip() # type: ignore
|
|
32
|
+
|
|
33
|
+
def __init__(self, func, **kwargs):
|
|
34
|
+
super().__init__(func, **kwargs)
|
|
35
|
+
self.analyze()
|
|
36
|
+
|
|
37
|
+
def _check(self):
|
|
38
|
+
# TODO: ensure func calls std::string::operator+ and std::string::operator=
|
|
39
|
+
return False, {}
|
|
40
|
+
|
|
41
|
+
def _analyze(self, cache=None):
|
|
42
|
+
rd = self.project.analyses.SReachingDefinitions(subject=self._func, func_graph=self._graph).model
|
|
43
|
+
cfg = self.kb.cfgs.get_most_accurate()
|
|
44
|
+
assert cfg is not None
|
|
45
|
+
|
|
46
|
+
# update each block
|
|
47
|
+
for key in list(self.blocks_by_addr_and_idx):
|
|
48
|
+
block = self.blocks_by_addr_and_idx[key]
|
|
49
|
+
new_block = None
|
|
50
|
+
for idx, stmt in enumerate(block.statements):
|
|
51
|
+
if (
|
|
52
|
+
isinstance(stmt, Assignment)
|
|
53
|
+
and hasattr(stmt, "type")
|
|
54
|
+
and "dst" in stmt.type
|
|
55
|
+
and "src" in stmt.type
|
|
56
|
+
and isinstance(stmt.dst, VirtualVariable)
|
|
57
|
+
and isinstance(stmt.src, BinaryOp)
|
|
58
|
+
and stmt.src.op == "Add"
|
|
59
|
+
):
|
|
60
|
+
dst_ty, src_ty = stmt.type["dst"], stmt.type["src"]
|
|
61
|
+
if self._is_std_string_type(dst_ty.c_repr()) and self._is_std_string_type(src_ty.c_repr()):
|
|
62
|
+
op0, op1 = stmt.src.operands
|
|
63
|
+
if isinstance(op1, VirtualVariable) and isinstance(op0, Load):
|
|
64
|
+
op0, op1 = op1, op0
|
|
65
|
+
if (
|
|
66
|
+
isinstance(op0, VirtualVariable)
|
|
67
|
+
and isinstance(op1, Load)
|
|
68
|
+
and isinstance(op1.addr, Const)
|
|
69
|
+
and isinstance(op1.addr.value, int)
|
|
70
|
+
# is op1 a constant string?
|
|
71
|
+
and op1.addr.value in cfg.memory_data
|
|
72
|
+
and cfg.memory_data[op1.addr.value].sort == "string"
|
|
73
|
+
):
|
|
74
|
+
op1_str = cfg.memory_data[op1.addr.value].content
|
|
75
|
+
# is op0 also an std::string?
|
|
76
|
+
op0_str = self._get_vvar_def_string(op0.varid, rd, cfg, block.addr, block.idx)
|
|
77
|
+
if op0_str is not None and op1_str is not None:
|
|
78
|
+
# let's create a new string
|
|
79
|
+
final_str = op0_str + op1_str
|
|
80
|
+
str_id = self.kb.custom_strings.allocate(final_str)
|
|
81
|
+
# replace the assignment with a new assignment
|
|
82
|
+
new_stmt = WeakAssignment(
|
|
83
|
+
stmt.idx,
|
|
84
|
+
stmt.dst,
|
|
85
|
+
Load(
|
|
86
|
+
None,
|
|
87
|
+
Const(None, None, str_id, self.project.arch.bits, custom_string=True),
|
|
88
|
+
UNDETERMINED_SIZE,
|
|
89
|
+
Endness.BE,
|
|
90
|
+
),
|
|
91
|
+
**stmt.tags,
|
|
92
|
+
)
|
|
93
|
+
new_block = block.copy() if new_block is None else new_block
|
|
94
|
+
new_block.statements[idx] = new_stmt
|
|
95
|
+
if new_block is not None:
|
|
96
|
+
self._update_block(block, new_block)
|
|
97
|
+
|
|
98
|
+
def _get_vvar_def_string(self, vvar_id: int, rd: SRDAModel, cfg, block_addr, block_idx) -> bytes | None:
|
|
99
|
+
# search for the closest weak definition of the specified variable
|
|
100
|
+
# TODO: Optimize this logic in the future
|
|
101
|
+
|
|
102
|
+
starting_block = self.blocks_by_addr_and_idx[(block_addr, block_idx)]
|
|
103
|
+
queue = [starting_block]
|
|
104
|
+
visited = set()
|
|
105
|
+
while queue:
|
|
106
|
+
block = queue.pop(0)
|
|
107
|
+
if block in visited:
|
|
108
|
+
continue
|
|
109
|
+
visited.add(block)
|
|
110
|
+
|
|
111
|
+
if not (block.addr == block_addr and block.idx == block_idx):
|
|
112
|
+
for stmt in block.statements:
|
|
113
|
+
if (
|
|
114
|
+
isinstance(stmt, WeakAssignment)
|
|
115
|
+
and isinstance(stmt.dst, VirtualVariable)
|
|
116
|
+
and stmt.dst.varid == vvar_id
|
|
117
|
+
):
|
|
118
|
+
if (
|
|
119
|
+
isinstance(stmt.src, Load)
|
|
120
|
+
and isinstance(stmt.src.addr, Const)
|
|
121
|
+
and stmt.src.addr.value in cfg.memory_data
|
|
122
|
+
):
|
|
123
|
+
if cfg.memory_data[stmt.src.addr.value].sort == "string":
|
|
124
|
+
return cfg.memory_data[stmt.src.addr.value].content
|
|
125
|
+
elif (
|
|
126
|
+
isinstance(stmt.src, Const)
|
|
127
|
+
and hasattr(stmt.src, "custom_string")
|
|
128
|
+
and stmt.src.custom_string
|
|
129
|
+
):
|
|
130
|
+
return self.kb.custom_strings.get(stmt.src.value)
|
|
131
|
+
|
|
132
|
+
preds = list(self._graph.predecessors(block))
|
|
133
|
+
if len(preds) == 1:
|
|
134
|
+
queue.append(preds[0])
|
|
135
|
+
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def _is_std_string_type(type_str: str) -> bool:
|
|
140
|
+
type_str = type_str.removeprefix("const ")
|
|
141
|
+
return (
|
|
142
|
+
re.match(
|
|
143
|
+
r"class std::basic_string<char a\d+, struct std::char_traits<char> a\d+, class std::allocator<char>>",
|
|
144
|
+
type_str,
|
|
145
|
+
)
|
|
146
|
+
is not None
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# pcreg_offset = self.project.arch.registers[getpc_reg][0]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
#
|
|
153
|
+
# old_block = self.blocks_by_addr_and_idx[block_key]
|
|
154
|
+
# block = old_block.copy()
|
|
155
|
+
# old_stmt = block.statements[stmt_idx]
|
|
156
|
+
# block.statements[stmt_idx] = ailment.Stmt.Assignment(
|
|
157
|
+
# old_stmt.idx,
|
|
158
|
+
# ailment.Expr.Register(None, None, pcreg_offset, 32, reg_name=getpc_reg),
|
|
159
|
+
# ailment.Expr.Const(None, None, getpc_reg_value, 32),
|
|
160
|
+
# **old_stmt.tags,
|
|
161
|
+
# )
|
|
162
|
+
# # remove the statement that pushes return address onto the stack
|
|
163
|
+
# if stmt_idx > 0 and isinstance(block.statements[stmt_idx - 1], ailment.Stmt.Store):
|
|
164
|
+
# block.statements = block.statements[: stmt_idx - 1] + block.statements[stmt_idx:]
|
|
165
|
+
# self._update_block(old_block, block)
|
|
@@ -73,7 +73,16 @@ class SimplifierAILEngine(
|
|
|
73
73
|
self.state.store_variable(dst, src)
|
|
74
74
|
|
|
75
75
|
if (src, dst) != (stmt.src, stmt.dst):
|
|
76
|
-
return ailment.statement.Assignment(stmt.idx, dst, src, **stmt.tags)
|
|
76
|
+
return ailment.statement.Assignment(stmt.idx, dst, src, **stmt.tags) # type:ignore
|
|
77
|
+
|
|
78
|
+
return stmt
|
|
79
|
+
|
|
80
|
+
def _handle_stmt_WeakAssignment(self, stmt: ailment.statement.WeakAssignment):
|
|
81
|
+
src = self._expr(stmt.src)
|
|
82
|
+
dst = self._expr(stmt.dst)
|
|
83
|
+
|
|
84
|
+
if (src, dst) != (stmt.src, stmt.dst):
|
|
85
|
+
return ailment.statement.WeakAssignment(stmt.idx, dst, src, **stmt.tags) # type:ignore
|
|
77
86
|
|
|
78
87
|
return stmt
|
|
79
88
|
|
|
@@ -150,7 +159,7 @@ class SimplifierAILEngine(
|
|
|
150
159
|
def _handle_stmt_DirtyStatement(self, stmt):
|
|
151
160
|
expr = self._expr(stmt.dirty)
|
|
152
161
|
if expr != stmt.dirty:
|
|
153
|
-
return ailment.statement.DirtyStatement(stmt.idx, expr, **stmt.tags)
|
|
162
|
+
return ailment.statement.DirtyStatement(stmt.idx, expr, **stmt.tags) # type:ignore
|
|
154
163
|
return stmt
|
|
155
164
|
|
|
156
165
|
def _handle_stmt_Label(self, stmt):
|
|
@@ -8,6 +8,7 @@ from ailment.expression import Op
|
|
|
8
8
|
from angr.analyses.decompiler.structuring.structurer_nodes import ConditionNode
|
|
9
9
|
from angr.analyses.decompiler.utils import (
|
|
10
10
|
structured_node_is_simple_return,
|
|
11
|
+
structured_node_is_simple_return_strict,
|
|
11
12
|
sequence_to_statements,
|
|
12
13
|
structured_node_has_multi_predecessors,
|
|
13
14
|
)
|
|
@@ -44,7 +45,7 @@ class FlipBooleanWalker(SequenceWalker):
|
|
|
44
45
|
and node.true_node is not None
|
|
45
46
|
and node.false_node is None
|
|
46
47
|
and idx < len(seq_node.nodes) - 1
|
|
47
|
-
and
|
|
48
|
+
and structured_node_is_simple_return_strict(seq_node.nodes[idx + 1])
|
|
48
49
|
and node not in type1_condition_nodes
|
|
49
50
|
):
|
|
50
51
|
# Type 2: Special Filter:
|