angr 9.2.140__py3-none-manylinux2014_x86_64.whl → 9.2.141__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +88 -32
- angr/analyses/calling_convention/fact_collector.py +44 -18
- angr/analyses/calling_convention/utils.py +3 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +9 -8
- angr/analyses/decompiler/ail_simplifier.py +48 -20
- angr/analyses/decompiler/callsite_maker.py +24 -11
- angr/analyses/decompiler/clinic.py +10 -0
- angr/analyses/decompiler/decompiler.py +1 -0
- angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
- angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +84 -15
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +76 -1
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
- angr/analyses/decompiler/region_identifier.py +6 -4
- angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
- angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
- angr/analyses/decompiler/ssailification/rewriting.py +23 -15
- angr/analyses/decompiler/ssailification/rewriting_engine.py +105 -24
- angr/analyses/decompiler/ssailification/ssailification.py +22 -14
- angr/analyses/decompiler/structured_codegen/c.py +73 -137
- angr/analyses/decompiler/structuring/dream.py +1 -1
- angr/analyses/decompiler/structuring/phoenix.py +6 -1
- angr/analyses/decompiler/structuring/structurer_base.py +2 -1
- angr/analyses/decompiler/utils.py +46 -20
- angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
- angr/analyses/variable_recovery/engine_ail.py +1 -1
- angr/analyses/variable_recovery/engine_vex.py +20 -4
- angr/calling_conventions.py +15 -10
- angr/factory.py +8 -3
- angr/knowledge_plugins/variables/variable_manager.py +7 -5
- angr/simos/simos.py +3 -1
- angr/utils/types.py +48 -0
- {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/METADATA +6 -6
- {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/RECORD +40 -39
- {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/LICENSE +0 -0
- {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/WHEEL +0 -0
- {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/entry_points.txt +0 -0
- {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
|
@@ -33,6 +33,7 @@ from angr.knowledge_plugins.key_definitions.rd_model import ReachingDefinitionsM
|
|
|
33
33
|
from angr.knowledge_plugins.variables.variable_access import VariableAccessSort
|
|
34
34
|
from angr.knowledge_plugins.functions import Function
|
|
35
35
|
from angr.utils.constants import DEFAULT_STATEMENT
|
|
36
|
+
from angr.utils.ssa import get_reg_offset_base_and_size, get_reg_offset_base
|
|
36
37
|
from angr import SIM_PROCEDURES
|
|
37
38
|
from angr.analyses import Analysis, register_analysis, ReachingDefinitionsAnalysis
|
|
38
39
|
from angr.analyses.reaching_definitions import get_all_definitions
|
|
@@ -264,7 +265,7 @@ class CallingConventionAnalysis(Analysis):
|
|
|
264
265
|
self.cc = cc
|
|
265
266
|
self.prototype = prototype
|
|
266
267
|
|
|
267
|
-
def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction] | None:
|
|
268
|
+
def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None] | None:
|
|
268
269
|
"""
|
|
269
270
|
Get the calling convention for a PLT stub.
|
|
270
271
|
|
|
@@ -296,6 +297,14 @@ class CallingConventionAnalysis(Analysis):
|
|
|
296
297
|
real_func = None
|
|
297
298
|
|
|
298
299
|
if real_func is not None:
|
|
300
|
+
if real_func.calling_convention is None:
|
|
301
|
+
cc_cls = default_cc(self.project.arch.name)
|
|
302
|
+
if cc_cls is None:
|
|
303
|
+
# can't determine the default calling convention for this architecture
|
|
304
|
+
return None
|
|
305
|
+
cc = cc_cls(self.project.arch)
|
|
306
|
+
else:
|
|
307
|
+
cc = real_func.calling_convention
|
|
299
308
|
if real_func.is_simprocedure:
|
|
300
309
|
if self.project.is_hooked(real_func.addr):
|
|
301
310
|
# prioritize the hooker
|
|
@@ -303,17 +312,20 @@ class CallingConventionAnalysis(Analysis):
|
|
|
303
312
|
if hooker is not None and (
|
|
304
313
|
not hooker.is_stub or (hooker.is_function and not hooker.guessed_prototype)
|
|
305
314
|
):
|
|
306
|
-
return
|
|
307
|
-
if real_func.
|
|
308
|
-
return
|
|
315
|
+
return cc, hooker.prototype
|
|
316
|
+
if real_func.prototype is not None:
|
|
317
|
+
return cc, real_func.prototype
|
|
309
318
|
else:
|
|
310
|
-
return
|
|
319
|
+
return cc, real_func.prototype
|
|
311
320
|
|
|
312
321
|
if self.analyze_callsites:
|
|
313
322
|
# determine the calling convention by analyzing its callsites
|
|
314
323
|
callsite_facts = self._extract_and_analyze_callsites(max_analyzing_callsites=1)
|
|
315
324
|
cc_cls = default_cc(self.project.arch.name)
|
|
316
|
-
|
|
325
|
+
if cc_cls is None:
|
|
326
|
+
# can't determine the default calling convention for this architecture
|
|
327
|
+
return None
|
|
328
|
+
cc = cc_cls(self.project.arch)
|
|
317
329
|
prototype = SimTypeFunction([], None)
|
|
318
330
|
prototype = self._adjust_prototype(
|
|
319
331
|
prototype, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate
|
|
@@ -342,7 +354,7 @@ class CallingConventionAnalysis(Analysis):
|
|
|
342
354
|
input_variables = vm.input_variables()
|
|
343
355
|
input_args = self._args_from_vars(input_variables, vm)
|
|
344
356
|
else:
|
|
345
|
-
input_args = self._input_args
|
|
357
|
+
input_args = set(self._input_args)
|
|
346
358
|
retval_size = self._retval_size
|
|
347
359
|
|
|
348
360
|
# check if this function is a variadic function
|
|
@@ -355,8 +367,14 @@ class CallingConventionAnalysis(Analysis):
|
|
|
355
367
|
# TODO: properly determine sp_delta
|
|
356
368
|
sp_delta = self.project.arch.bytes if self.project.arch.call_pushes_ret else 0
|
|
357
369
|
|
|
358
|
-
|
|
359
|
-
|
|
370
|
+
full_input_args = self._consolidate_input_args(input_args)
|
|
371
|
+
full_input_args_copy = list(full_input_args) # input_args might be modified by find_cc()
|
|
372
|
+
cc = SimCC.find_cc(self.project.arch, full_input_args_copy, sp_delta, platform=self.project.simos.name)
|
|
373
|
+
|
|
374
|
+
# update input_args according to the difference between full_input_args and full_input_args_copy
|
|
375
|
+
for a in full_input_args:
|
|
376
|
+
if a not in full_input_args_copy and a in input_args:
|
|
377
|
+
input_args.remove(a)
|
|
360
378
|
|
|
361
379
|
if cc is None:
|
|
362
380
|
l.warning(
|
|
@@ -657,12 +675,6 @@ class CallingConventionAnalysis(Analysis):
|
|
|
657
675
|
else:
|
|
658
676
|
break
|
|
659
677
|
|
|
660
|
-
if None in temp_args:
|
|
661
|
-
first_none_idx = temp_args.index(None)
|
|
662
|
-
# test if there is at least one argument set after None; if so, we ignore the first None
|
|
663
|
-
if any(arg is not None for arg in temp_args[first_none_idx:]):
|
|
664
|
-
temp_args[first_none_idx] = expected_args[first_none_idx]
|
|
665
|
-
|
|
666
678
|
if None in temp_args:
|
|
667
679
|
# we be very conservative here and ignore all arguments starting from the first missing one
|
|
668
680
|
first_none_idx = temp_args.index(None)
|
|
@@ -681,17 +693,18 @@ class CallingConventionAnalysis(Analysis):
|
|
|
681
693
|
if all(fact.return_value_used is False for fact in facts):
|
|
682
694
|
proto.returnty = SimTypeBottom(label="void")
|
|
683
695
|
else:
|
|
684
|
-
proto.returnty
|
|
696
|
+
if proto.returnty is None or isinstance(proto.returnty, SimTypeBottom):
|
|
697
|
+
proto.returnty = SimTypeInt().with_arch(self.project.arch)
|
|
685
698
|
|
|
686
699
|
if (
|
|
687
700
|
update_arguments == UpdateArgumentsOption.AlwaysUpdate
|
|
688
701
|
or (update_arguments == UpdateArgumentsOption.UpdateWhenCCHasNoArgs and not proto.args)
|
|
689
702
|
) and len({len(fact.args) for fact in facts}) == 1:
|
|
690
703
|
fact = next(iter(facts))
|
|
691
|
-
proto.args =
|
|
704
|
+
proto.args = tuple(
|
|
692
705
|
self._guess_arg_type(arg) if arg is not None else SimTypeInt().with_arch(self.project.arch)
|
|
693
706
|
for arg in fact.args
|
|
694
|
-
|
|
707
|
+
)
|
|
695
708
|
|
|
696
709
|
return proto
|
|
697
710
|
|
|
@@ -730,13 +743,8 @@ class CallingConventionAnalysis(Analysis):
|
|
|
730
743
|
# a register variable, convert it to a register argument
|
|
731
744
|
if not is_sane_register_variable(self.project.arch, variable.reg, variable.size, def_cc=def_cc):
|
|
732
745
|
continue
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
reg_name = self.project.arch.translate_register_name(variable.reg, size=self.project.arch.bytes)
|
|
736
|
-
arg = SimRegArg(reg_name, self.project.arch.bytes)
|
|
737
|
-
else:
|
|
738
|
-
reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
|
|
739
|
-
arg = SimRegArg(reg_name, variable.size)
|
|
746
|
+
reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
|
|
747
|
+
arg = SimRegArg(reg_name, variable.size)
|
|
740
748
|
args.add(arg)
|
|
741
749
|
|
|
742
750
|
accesses = var_manager.get_variable_accesses(variable)
|
|
@@ -778,15 +786,58 @@ class CallingConventionAnalysis(Analysis):
|
|
|
778
786
|
|
|
779
787
|
return args.difference(restored_reg_vars)
|
|
780
788
|
|
|
781
|
-
def
|
|
789
|
+
def _consolidate_input_args(self, input_args: set[SimRegArg | SimStackArg]) -> set[SimRegArg | SimStackArg]:
|
|
790
|
+
"""
|
|
791
|
+
Consolidate register arguments by converting partial registers to full registers on certain architectures.
|
|
792
|
+
|
|
793
|
+
:param input_args: A set of input arguments.
|
|
794
|
+
:return: A set of consolidated input args.
|
|
795
|
+
"""
|
|
796
|
+
|
|
797
|
+
if self.project.arch.name in {"AMD64", "X86"}:
|
|
798
|
+
new_input_args = set()
|
|
799
|
+
for a in input_args:
|
|
800
|
+
if isinstance(a, SimRegArg) and a.size < self.project.arch.bytes:
|
|
801
|
+
# use complete registers on AMD64 and X86
|
|
802
|
+
reg_offset, reg_size = self.project.arch.registers[a.reg_name]
|
|
803
|
+
full_reg_offset, full_reg_size = get_reg_offset_base_and_size(
|
|
804
|
+
reg_offset, self.project.arch, size=reg_size
|
|
805
|
+
)
|
|
806
|
+
full_reg_name = self.project.arch.translate_register_name(full_reg_offset, size=full_reg_size)
|
|
807
|
+
arg = SimRegArg(full_reg_name, full_reg_size)
|
|
808
|
+
if arg not in new_input_args:
|
|
809
|
+
new_input_args.add(arg)
|
|
810
|
+
else:
|
|
811
|
+
new_input_args.add(a)
|
|
812
|
+
return new_input_args
|
|
813
|
+
|
|
814
|
+
return input_args
|
|
815
|
+
|
|
816
|
+
def _reorder_args(self, args: set[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
|
|
782
817
|
"""
|
|
783
818
|
Reorder arguments according to the calling convention identified.
|
|
784
819
|
|
|
785
|
-
:param args: A
|
|
820
|
+
:param args: A set of arguments that haven't been ordered.
|
|
786
821
|
:param cc: The identified calling convention.
|
|
787
822
|
:return: A reordered list of args.
|
|
788
823
|
"""
|
|
789
824
|
|
|
825
|
+
def _is_same_reg(rn0: str, rn1: str) -> bool:
|
|
826
|
+
"""
|
|
827
|
+
Check if rn0 and rn1 belong to the same base register.
|
|
828
|
+
|
|
829
|
+
:param rn0: Register name of the first register.
|
|
830
|
+
:param rn1: Register name of the second register.
|
|
831
|
+
:return: True if they belong to the same base register; False otherwise.
|
|
832
|
+
"""
|
|
833
|
+
if rn0 == rn1:
|
|
834
|
+
return True
|
|
835
|
+
off0, sz0 = self.project.arch.registers[rn0]
|
|
836
|
+
full_off0 = get_reg_offset_base(off0, self.project.arch, sz0)
|
|
837
|
+
off1, sz1 = self.project.arch.registers[rn1]
|
|
838
|
+
full_off1 = get_reg_offset_base(off1, self.project.arch, sz1)
|
|
839
|
+
return full_off0 == full_off1
|
|
840
|
+
|
|
790
841
|
reg_args = []
|
|
791
842
|
|
|
792
843
|
# split args into two lists
|
|
@@ -805,7 +856,7 @@ class CallingConventionAnalysis(Analysis):
|
|
|
805
856
|
# match int args first
|
|
806
857
|
for reg_name in cc.ARG_REGS:
|
|
807
858
|
try:
|
|
808
|
-
arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and a.reg_name
|
|
859
|
+
arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name)))
|
|
809
860
|
except StopIteration:
|
|
810
861
|
# have we reached the end of the args list?
|
|
811
862
|
if [a for a in int_args if isinstance(a, SimRegArg)] or len(stack_int_args) > 0:
|
|
@@ -821,7 +872,9 @@ class CallingConventionAnalysis(Analysis):
|
|
|
821
872
|
if fp_args:
|
|
822
873
|
for reg_name in cc.FP_ARG_REGS:
|
|
823
874
|
try:
|
|
824
|
-
arg = next(
|
|
875
|
+
arg = next(
|
|
876
|
+
iter(a for a in fp_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name))
|
|
877
|
+
)
|
|
825
878
|
except StopIteration:
|
|
826
879
|
# have we reached the end of the args list?
|
|
827
880
|
if [a for a in fp_args if isinstance(a, SimRegArg)] or len(stack_fp_args) > 0:
|
|
@@ -886,12 +939,15 @@ class CallingConventionAnalysis(Analysis):
|
|
|
886
939
|
if 5 <= ret_val_size <= 8:
|
|
887
940
|
return SimTypeLongLong()
|
|
888
941
|
|
|
889
|
-
|
|
890
|
-
return SimTypeInt() if cc.arch.bits == 32 else SimTypeLongLong()
|
|
942
|
+
return SimTypeBottom(label="void")
|
|
891
943
|
|
|
892
944
|
@staticmethod
|
|
893
945
|
def _likely_saving_temp_reg(ail_block: ailment.Block, d: Definition, all_reg_defs: set[Definition]) -> bool:
|
|
894
|
-
if
|
|
946
|
+
if (
|
|
947
|
+
d.codeloc.block_addr == ail_block.addr
|
|
948
|
+
and d.codeloc.stmt_idx is not None
|
|
949
|
+
and d.codeloc.stmt_idx < len(ail_block.statements)
|
|
950
|
+
):
|
|
895
951
|
stmt = ail_block.statements[d.codeloc.stmt_idx]
|
|
896
952
|
if isinstance(stmt, ailment.Stmt.Assignment) and isinstance(stmt.src, ailment.Expr.Register):
|
|
897
953
|
src_offset = stmt.src.reg_offset
|
|
@@ -90,7 +90,7 @@ binop_handler = SimEngineNostmtVEX[FactCollectorState, claripy.ast.BV, FactColle
|
|
|
90
90
|
|
|
91
91
|
class SimEngineFactCollectorVEX(
|
|
92
92
|
SimEngineNostmtVEX[FactCollectorState, SpOffset | RegisterOffset | int, None],
|
|
93
|
-
SimEngineLight[
|
|
93
|
+
SimEngineLight[FactCollectorState, SpOffset | RegisterOffset | int, Block, None],
|
|
94
94
|
):
|
|
95
95
|
"""
|
|
96
96
|
THe engine for FactCollector.
|
|
@@ -101,7 +101,7 @@ class SimEngineFactCollectorVEX(
|
|
|
101
101
|
super().__init__(project)
|
|
102
102
|
|
|
103
103
|
def _process_block_end(self, stmt_result: list, whitelist: set[int] | None) -> None:
|
|
104
|
-
if self.block.vex.jumpkind == "Ijk_Call":
|
|
104
|
+
if self.block.vex.jumpkind == "Ijk_Call" and self.arch.ret_offset is not None:
|
|
105
105
|
self.state.register_written(self.arch.ret_offset, self.arch.bytes)
|
|
106
106
|
|
|
107
107
|
def _top(self, bits: int):
|
|
@@ -110,7 +110,7 @@ class SimEngineFactCollectorVEX(
|
|
|
110
110
|
def _is_top(self, expr: Any) -> bool:
|
|
111
111
|
raise NotImplementedError
|
|
112
112
|
|
|
113
|
-
def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.IRExpr) -> Any:
|
|
113
|
+
def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.expr.IRExpr) -> Any:
|
|
114
114
|
return None
|
|
115
115
|
|
|
116
116
|
def _handle_stmt_Put(self, stmt):
|
|
@@ -142,9 +142,9 @@ class SimEngineFactCollectorVEX(
|
|
|
142
142
|
return expr.con.value
|
|
143
143
|
|
|
144
144
|
def _handle_expr_GSPTR(self, expr):
|
|
145
|
-
return
|
|
145
|
+
return 0
|
|
146
146
|
|
|
147
|
-
def _handle_expr_Get(self, expr) -> SpOffset |
|
|
147
|
+
def _handle_expr_Get(self, expr) -> SpOffset | RegisterOffset:
|
|
148
148
|
if expr.offset == self.arch.sp_offset:
|
|
149
149
|
return SpOffset(self.arch.bits, self.state.sp_value, is_base=False)
|
|
150
150
|
if expr.offset == self.arch.bp_offset and not self.bp_as_gpr:
|
|
@@ -304,7 +304,10 @@ class FactCollector(Analysis):
|
|
|
304
304
|
|
|
305
305
|
def _handle_function(self, state: FactCollectorState, func: Function) -> None:
|
|
306
306
|
try:
|
|
307
|
-
|
|
307
|
+
if func.calling_convention is not None and func.prototype is not None:
|
|
308
|
+
arg_locs = func.calling_convention.arg_locs(func.prototype)
|
|
309
|
+
else:
|
|
310
|
+
return
|
|
308
311
|
except (TypeError, ValueError):
|
|
309
312
|
return
|
|
310
313
|
|
|
@@ -355,6 +358,7 @@ class FactCollector(Analysis):
|
|
|
355
358
|
|
|
356
359
|
if isinstance(node, BlockNode) and node.size == 0:
|
|
357
360
|
continue
|
|
361
|
+
|
|
358
362
|
if isinstance(node, HookNode):
|
|
359
363
|
# attempt to convert it into a function
|
|
360
364
|
if self.kb.functions.contains_addr(node.addr):
|
|
@@ -369,17 +373,43 @@ class FactCollector(Analysis):
|
|
|
369
373
|
and not isinstance(node.prototype.returnty, SimTypeBottom)
|
|
370
374
|
):
|
|
371
375
|
# assume the function overwrites the return variable
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
376
|
+
returnty_size = node.prototype.returnty.with_arch(self.project.arch).size
|
|
377
|
+
assert returnty_size is not None
|
|
378
|
+
retval_size = returnty_size // self.project.arch.byte_width
|
|
375
379
|
retval_sizes.append(retval_size)
|
|
376
380
|
continue
|
|
377
381
|
|
|
382
|
+
# if this block ends with a call to a function, we process the function first
|
|
383
|
+
func_succs = [
|
|
384
|
+
succ
|
|
385
|
+
for succ in func_graph.successors(node)
|
|
386
|
+
if isinstance(succ, (Function, HookNode)) or self.kb.functions.contains_addr(succ.addr)
|
|
387
|
+
]
|
|
388
|
+
if len(func_succs) == 1:
|
|
389
|
+
func_succ = func_succs[0]
|
|
390
|
+
if isinstance(func_succ, (BlockNode, HookNode)) and self.kb.functions.contains_addr(func_succ.addr):
|
|
391
|
+
# attempt to convert it into a function
|
|
392
|
+
func_succ = self.kb.functions.get_by_addr(func_succ.addr)
|
|
393
|
+
if isinstance(func_succ, Function):
|
|
394
|
+
if (
|
|
395
|
+
func_succ.calling_convention is not None
|
|
396
|
+
and func_succ.prototype is not None
|
|
397
|
+
and func_succ.prototype.returnty is not None
|
|
398
|
+
and not isinstance(func_succ.prototype.returnty, SimTypeBottom)
|
|
399
|
+
):
|
|
400
|
+
# assume the function overwrites the return variable
|
|
401
|
+
returnty_size = func_succ.prototype.returnty.with_arch(self.project.arch).size
|
|
402
|
+
assert returnty_size is not None
|
|
403
|
+
retval_size = returnty_size // self.project.arch.byte_width
|
|
404
|
+
retval_sizes.append(retval_size)
|
|
405
|
+
continue
|
|
406
|
+
|
|
378
407
|
block = self.project.factory.block(node.addr, size=node.size)
|
|
379
408
|
# scan the block statements backwards to find writes to the return value register
|
|
380
409
|
retval_size = None
|
|
381
410
|
for stmt in reversed(block.vex.statements):
|
|
382
411
|
if isinstance(stmt, pyvex.IRStmt.Put):
|
|
412
|
+
assert block.vex.tyenv is not None
|
|
383
413
|
size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
|
|
384
414
|
if stmt.offset == retreg_offset:
|
|
385
415
|
retval_size = max(size, 1)
|
|
@@ -391,9 +421,9 @@ class FactCollector(Analysis):
|
|
|
391
421
|
for pred, _, data in func_graph.in_edges(node, data=True):
|
|
392
422
|
edge_type = data.get("type")
|
|
393
423
|
if pred not in traversed and depth + 1 <= self._max_depth:
|
|
394
|
-
if edge_type == "
|
|
424
|
+
if edge_type == "call":
|
|
395
425
|
continue
|
|
396
|
-
if edge_type in {"transition", "
|
|
426
|
+
if edge_type in {"transition", "fake_return"}:
|
|
397
427
|
queue.append((depth + 1, pred))
|
|
398
428
|
|
|
399
429
|
self.retval_size = max(retval_sizes) if retval_sizes else None
|
|
@@ -472,6 +502,7 @@ class FactCollector(Analysis):
|
|
|
472
502
|
):
|
|
473
503
|
tmps[stmt.tmp] = "sp"
|
|
474
504
|
if isinstance(stmt, pyvex.IRStmt.Put):
|
|
505
|
+
assert block.vex.tyenv is not None
|
|
475
506
|
size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
|
|
476
507
|
# is the data loaded from the stack?
|
|
477
508
|
if (
|
|
@@ -532,13 +563,8 @@ class FactCollector(Analysis):
|
|
|
532
563
|
):
|
|
533
564
|
continue
|
|
534
565
|
reg_offset_created.add(offset)
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
reg_name = self.project.arch.translate_register_name(offset, size=self.project.arch.bytes)
|
|
538
|
-
arg = SimRegArg(reg_name, self.project.arch.bytes)
|
|
539
|
-
else:
|
|
540
|
-
reg_name = self.project.arch.translate_register_name(offset, size=size)
|
|
541
|
-
arg = SimRegArg(reg_name, size)
|
|
566
|
+
reg_name = self.project.arch.translate_register_name(offset, size=size)
|
|
567
|
+
arg = SimRegArg(reg_name, size)
|
|
542
568
|
self.input_args.append(arg)
|
|
543
569
|
|
|
544
570
|
stack_offset_created = set()
|
|
@@ -9,7 +9,9 @@ from angr.calling_conventions import SimCC
|
|
|
9
9
|
l = logging.getLogger(__name__)
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def is_sane_register_variable(
|
|
12
|
+
def is_sane_register_variable(
|
|
13
|
+
arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | type[SimCC] | None = None
|
|
14
|
+
) -> bool:
|
|
13
15
|
"""
|
|
14
16
|
Filters all registers that are surly not members of function arguments.
|
|
15
17
|
This can be seen as a workaround, since VariableRecoveryFast sometimes gives input variables of cc_ndep (which
|
|
@@ -182,23 +182,24 @@ class ConstantValueManager:
|
|
|
182
182
|
|
|
183
183
|
# determine blocks to run FCP on
|
|
184
184
|
|
|
185
|
-
# - include at most three levels of successors from the entrypoint
|
|
185
|
+
# - include at most three levels of superblock successors from the entrypoint
|
|
186
186
|
startpoint = self.func.startpoint
|
|
187
187
|
blocks = set()
|
|
188
|
-
|
|
189
|
-
|
|
188
|
+
succ_and_levels = [(startpoint, 0)]
|
|
189
|
+
while succ_and_levels:
|
|
190
190
|
new_succs = []
|
|
191
|
-
for node in
|
|
191
|
+
for node, level in succ_and_levels:
|
|
192
192
|
if node in blocks:
|
|
193
193
|
continue
|
|
194
194
|
blocks.add(node)
|
|
195
195
|
if node.addr == self.indirect_jump_addr:
|
|
196
196
|
# stop at the indirect jump block
|
|
197
197
|
continue
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
198
|
+
for _, succ, data in self.func.graph.out_edges(node, data=True):
|
|
199
|
+
new_level = level if data.get("type") == "fake_return" else level + 1
|
|
200
|
+
if new_level <= 3:
|
|
201
|
+
new_succs.append((succ, new_level))
|
|
202
|
+
succ_and_levels = new_succs
|
|
202
203
|
|
|
203
204
|
# - include at most six levels of predecessors from the indirect jump block
|
|
204
205
|
ij_block = self.func.get_node(self.indirect_jump_addr)
|
|
@@ -99,6 +99,7 @@ class AILSimplifier(Analysis):
|
|
|
99
99
|
removed_vvar_ids: set[int] | None = None,
|
|
100
100
|
arg_vvars: dict[int, tuple[VirtualVariable, SimVariable]] | None = None,
|
|
101
101
|
avoid_vvar_ids: set[int] | None = None,
|
|
102
|
+
secondary_stackvars: set[int] | None = None,
|
|
102
103
|
):
|
|
103
104
|
self.func = func
|
|
104
105
|
self.func_graph = func_graph if func_graph is not None else func.graph
|
|
@@ -119,6 +120,7 @@ class AILSimplifier(Analysis):
|
|
|
119
120
|
self._arg_vvars = arg_vvars
|
|
120
121
|
self._avoid_vvar_ids = avoid_vvar_ids
|
|
121
122
|
self._propagator_dead_vvar_ids: set[int] = set()
|
|
123
|
+
self._secondary_stackvars: set[int] = secondary_stackvars if secondary_stackvars is not None else set()
|
|
122
124
|
|
|
123
125
|
self._calls_to_remove: set[CodeLocation] = set()
|
|
124
126
|
self._assignments_to_remove: set[CodeLocation] = set()
|
|
@@ -1348,6 +1350,9 @@ class AILSimplifier(Analysis):
|
|
|
1348
1350
|
if rd.is_phi_vvar_id(def_.atom.varid):
|
|
1349
1351
|
# we always remove unused phi variables
|
|
1350
1352
|
pass
|
|
1353
|
+
elif def_.atom.varid in self._secondary_stackvars:
|
|
1354
|
+
# secondary stack variables are potentially removable
|
|
1355
|
+
pass
|
|
1351
1356
|
elif stackarg_offsets is not None:
|
|
1352
1357
|
# we always remove definitions for stack arguments
|
|
1353
1358
|
assert def_.atom.stack_offset is not None
|
|
@@ -1380,7 +1385,9 @@ class AILSimplifier(Analysis):
|
|
|
1380
1385
|
if not isinstance(def_.codeloc, ExternalCodeLocation):
|
|
1381
1386
|
assert def_.codeloc.block_addr is not None
|
|
1382
1387
|
assert def_.codeloc.stmt_idx is not None
|
|
1383
|
-
|
|
1388
|
+
stmts_to_keep_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(
|
|
1389
|
+
def_.codeloc.stmt_idx
|
|
1390
|
+
)
|
|
1384
1391
|
|
|
1385
1392
|
# find all phi variables that rely on variables that no longer exist
|
|
1386
1393
|
all_removed_var_ids = self._removed_vvar_ids.copy()
|
|
@@ -1503,8 +1510,36 @@ class AILSimplifier(Analysis):
|
|
|
1503
1510
|
|
|
1504
1511
|
return simplified
|
|
1505
1512
|
|
|
1513
|
+
@staticmethod
|
|
1514
|
+
def _get_vvar_used_by(
|
|
1515
|
+
vvar_id: int, rd: SRDAModel, blocks_dict: dict[tuple[int, int | None], Block]
|
|
1516
|
+
) -> set[int | None]:
|
|
1517
|
+
"""
|
|
1518
|
+
Get all atoms that use a specified virtual variable. The atoms are in the form of virtual variable ID or None
|
|
1519
|
+
(indicating the virtual variable is used by another statement like Store).
|
|
1520
|
+
|
|
1521
|
+
:param vvar_id: ID of the virtual variable.
|
|
1522
|
+
:param rd: The SRDA model.
|
|
1523
|
+
:return: The set of vvar use atoms.
|
|
1524
|
+
"""
|
|
1525
|
+
|
|
1526
|
+
vvar = rd.varid_to_vvar[vvar_id]
|
|
1527
|
+
used_by: set[int | None] = set()
|
|
1528
|
+
for used_vvar, loc in rd.all_vvar_uses[vvar]:
|
|
1529
|
+
if used_vvar is None:
|
|
1530
|
+
# no explicit reference
|
|
1531
|
+
used_by.add(None)
|
|
1532
|
+
elif loc.block_addr is not None:
|
|
1533
|
+
assert loc.stmt_idx is not None
|
|
1534
|
+
stmt = blocks_dict[(loc.block_addr, loc.block_idx)].statements[loc.stmt_idx]
|
|
1535
|
+
if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
|
|
1536
|
+
used_by.add(stmt.dst.varid)
|
|
1537
|
+
else:
|
|
1538
|
+
used_by.add(None)
|
|
1539
|
+
return used_by
|
|
1540
|
+
|
|
1506
1541
|
def _find_cyclic_dependent_phis_and_dirty_vvars(self, rd: SRDAModel) -> set[int]:
|
|
1507
|
-
blocks_dict = {(bb.addr, bb.idx): bb for bb in self.func_graph}
|
|
1542
|
+
blocks_dict: dict[tuple[int, int | None], Block] = {(bb.addr, bb.idx): bb for bb in self.func_graph}
|
|
1508
1543
|
|
|
1509
1544
|
# find dirty vvars and vexccall vvars
|
|
1510
1545
|
dirty_vvar_ids = set()
|
|
@@ -1520,25 +1555,14 @@ class AILSimplifier(Analysis):
|
|
|
1520
1555
|
|
|
1521
1556
|
phi_and_dirty_vvar_ids = rd.phi_vvar_ids | dirty_vvar_ids
|
|
1522
1557
|
|
|
1523
|
-
vvar_used_by: dict[int, set[int]] = defaultdict(set)
|
|
1558
|
+
vvar_used_by: dict[int, set[int | None]] = defaultdict(set)
|
|
1524
1559
|
for var_id in phi_and_dirty_vvar_ids:
|
|
1525
1560
|
if var_id in rd.phivarid_to_varids:
|
|
1526
1561
|
for used_by_varid in rd.phivarid_to_varids[var_id]:
|
|
1527
|
-
vvar_used_by
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
for used_vvar, loc in rd.all_vvar_uses[vvar]:
|
|
1532
|
-
if used_vvar is None:
|
|
1533
|
-
# no explicit reference
|
|
1534
|
-
used_by.add(None)
|
|
1535
|
-
else:
|
|
1536
|
-
stmt = blocks_dict[loc.block_addr, loc.block_idx].statements[loc.stmt_idx]
|
|
1537
|
-
if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
|
|
1538
|
-
used_by.add(stmt.dst.varid)
|
|
1539
|
-
else:
|
|
1540
|
-
used_by.add(None)
|
|
1541
|
-
vvar_used_by[var_id] |= used_by
|
|
1562
|
+
if used_by_varid not in vvar_used_by:
|
|
1563
|
+
vvar_used_by[used_by_varid] |= self._get_vvar_used_by(used_by_varid, rd, blocks_dict)
|
|
1564
|
+
vvar_used_by[used_by_varid].add(var_id) # probably unnecessary
|
|
1565
|
+
vvar_used_by[var_id] |= self._get_vvar_used_by(var_id, rd, blocks_dict)
|
|
1542
1566
|
|
|
1543
1567
|
g = networkx.DiGraph()
|
|
1544
1568
|
dummy_vvar_id = -1
|
|
@@ -1557,8 +1581,12 @@ class AILSimplifier(Analysis):
|
|
|
1557
1581
|
|
|
1558
1582
|
bail = False
|
|
1559
1583
|
for varid in scc:
|
|
1560
|
-
#
|
|
1561
|
-
|
|
1584
|
+
# ensure this vvar is not used by anything else outside the scc (regardless of whether this vvar is a
|
|
1585
|
+
# phi variable or not)
|
|
1586
|
+
if varid in vvar_used_by and None in vvar_used_by[varid]:
|
|
1587
|
+
bail = True
|
|
1588
|
+
break
|
|
1589
|
+
if bail is False:
|
|
1562
1590
|
succs = list(g.successors(varid))
|
|
1563
1591
|
if any(succ_varid not in scc for succ_varid in succs):
|
|
1564
1592
|
bail = True
|
|
@@ -45,7 +45,7 @@ class CallSiteMaker(Analysis):
|
|
|
45
45
|
self._ail_manager = ail_manager
|
|
46
46
|
|
|
47
47
|
self.result_block = None
|
|
48
|
-
self.stack_arg_offsets: set[tuple[int, int]] | None = None #
|
|
48
|
+
self.stack_arg_offsets: set[tuple[int, int]] | None = None # call ins addr, stack_offset
|
|
49
49
|
self.removed_vvar_ids: set[int] = set()
|
|
50
50
|
|
|
51
51
|
self._analyze()
|
|
@@ -372,7 +372,9 @@ class CallSiteMaker(Analysis):
|
|
|
372
372
|
|
|
373
373
|
return None
|
|
374
374
|
|
|
375
|
-
def _resolve_stack_argument(
|
|
375
|
+
def _resolve_stack_argument(
|
|
376
|
+
self, call_stmt: Stmt.Call, arg_loc
|
|
377
|
+
) -> tuple[Any, Any]: # pylint:disable=unused-argument
|
|
376
378
|
assert self._stack_pointer_tracker is not None
|
|
377
379
|
|
|
378
380
|
size = arg_loc.size
|
|
@@ -399,15 +401,26 @@ class CallSiteMaker(Analysis):
|
|
|
399
401
|
# FIXME: vvar may be larger than that we ask; we may need to chop the correct value of vvar
|
|
400
402
|
value = view.get_vvar_value(vvar)
|
|
401
403
|
if value is not None and not isinstance(value, Expr.Phi):
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
404
|
+
v: Expr.Expression = value
|
|
405
|
+
else:
|
|
406
|
+
v: Expr.Expression = Expr.VirtualVariable(
|
|
407
|
+
self._atom_idx(),
|
|
408
|
+
vvar.varid,
|
|
409
|
+
vvar.bits,
|
|
410
|
+
vvar.category,
|
|
411
|
+
oident=vvar.oident,
|
|
412
|
+
ins_addr=call_stmt.ins_addr,
|
|
413
|
+
)
|
|
414
|
+
if v.size > size:
|
|
415
|
+
v = Expr.Convert(
|
|
416
|
+
self._atom_idx(),
|
|
417
|
+
v.bits,
|
|
418
|
+
size * self.project.arch.byte_width,
|
|
419
|
+
False,
|
|
420
|
+
v,
|
|
421
|
+
ins_addr=call_stmt.ins_addr,
|
|
422
|
+
)
|
|
423
|
+
return None, v
|
|
411
424
|
|
|
412
425
|
return None, Expr.Load(
|
|
413
426
|
self._atom_idx(),
|
|
@@ -154,6 +154,9 @@ class Clinic(Analysis):
|
|
|
154
154
|
self._mode = mode
|
|
155
155
|
self.vvar_id_start = vvar_id_start
|
|
156
156
|
self.vvar_to_vvar: dict[int, int] | None = None
|
|
157
|
+
# during SSA conversion, we create secondary stack variables because they overlap and are larger than the
|
|
158
|
+
# actual stack variables. these secondary stack variables can be safely eliminated if not used by anything.
|
|
159
|
+
self.secondary_stackvars: set[int] = set()
|
|
157
160
|
|
|
158
161
|
# inlining help
|
|
159
162
|
self._sp_shift = sp_shift
|
|
@@ -1239,6 +1242,7 @@ class Clinic(Analysis):
|
|
|
1239
1242
|
rewrite_ccalls=rewrite_ccalls,
|
|
1240
1243
|
removed_vvar_ids=removed_vvar_ids,
|
|
1241
1244
|
arg_vvars=arg_vvars,
|
|
1245
|
+
secondary_stackvars=self.secondary_stackvars,
|
|
1242
1246
|
)
|
|
1243
1247
|
# cache the simplifier's RDA analysis
|
|
1244
1248
|
self.reaching_definitions = simp._reaching_definitions
|
|
@@ -1364,6 +1368,7 @@ class Clinic(Analysis):
|
|
|
1364
1368
|
vvar_id_start=self.vvar_id_start,
|
|
1365
1369
|
)
|
|
1366
1370
|
self.vvar_id_start = ssailification.max_vvar_id + 1
|
|
1371
|
+
self.secondary_stackvars = ssailification.secondary_stackvars
|
|
1367
1372
|
return ssailification.out_graph
|
|
1368
1373
|
|
|
1369
1374
|
@timethis
|
|
@@ -1864,6 +1869,11 @@ class Clinic(Analysis):
|
|
|
1864
1869
|
if expr.guard:
|
|
1865
1870
|
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, expr.guard)
|
|
1866
1871
|
|
|
1872
|
+
elif isinstance(expr, ailment.Expr.Phi):
|
|
1873
|
+
for _, vvar in expr.src_and_vvars:
|
|
1874
|
+
if vvar is not None:
|
|
1875
|
+
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, vvar)
|
|
1876
|
+
|
|
1867
1877
|
def _function_graph_to_ail_graph(self, func_graph, blocks_by_addr_and_size=None):
|
|
1868
1878
|
if blocks_by_addr_and_size is None:
|
|
1869
1879
|
blocks_by_addr_and_size = self._blocks_by_addr_and_size
|
|
@@ -500,6 +500,7 @@ class Decompiler(Analysis):
|
|
|
500
500
|
scratch=self._optimization_scratch,
|
|
501
501
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
502
502
|
complete_successors=self._complete_successors,
|
|
503
|
+
peephole_optimizations=self._peephole_optimizations,
|
|
503
504
|
**kwargs,
|
|
504
505
|
)
|
|
505
506
|
|
|
@@ -950,7 +950,9 @@ class DuplicationReverter(StructuringOptimizationPass):
|
|
|
950
950
|
#
|
|
951
951
|
|
|
952
952
|
def _share_subregion(self, blocks: list[Block]) -> bool:
|
|
953
|
-
return any(
|
|
953
|
+
return any(
|
|
954
|
+
all((block.addr, block.idx) in region for block in blocks) for region in self._ri.regions_by_block_addrs
|
|
955
|
+
)
|
|
954
956
|
|
|
955
957
|
def _is_valid_candidate(self, b0, b1):
|
|
956
958
|
# blocks must have statements
|