angr 9.2.140__py3-none-manylinux2014_x86_64.whl → 9.2.142__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +105 -35
- angr/analyses/calling_convention/fact_collector.py +44 -18
- angr/analyses/calling_convention/utils.py +3 -1
- angr/analyses/cfg/cfg_base.py +38 -4
- angr/analyses/cfg/cfg_fast.py +23 -7
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +13 -8
- angr/analyses/class_identifier.py +8 -7
- angr/analyses/complete_calling_conventions.py +1 -1
- angr/analyses/decompiler/ail_simplifier.py +105 -62
- angr/analyses/decompiler/callsite_maker.py +24 -11
- angr/analyses/decompiler/clinic.py +83 -5
- angr/analyses/decompiler/condition_processor.py +7 -7
- angr/analyses/decompiler/decompilation_cache.py +2 -1
- angr/analyses/decompiler/decompiler.py +11 -2
- angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
- angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
- angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
- angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +85 -16
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +78 -1
- angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
- angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
- angr/analyses/decompiler/region_identifier.py +76 -51
- angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
- angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
- angr/analyses/decompiler/ssailification/rewriting.py +70 -32
- angr/analyses/decompiler/ssailification/rewriting_engine.py +118 -24
- angr/analyses/decompiler/ssailification/ssailification.py +22 -14
- angr/analyses/decompiler/stack_item.py +36 -0
- angr/analyses/decompiler/structured_codegen/c.py +86 -145
- angr/analyses/decompiler/structuring/dream.py +1 -1
- angr/analyses/decompiler/structuring/phoenix.py +9 -4
- angr/analyses/decompiler/structuring/structurer_base.py +2 -1
- angr/analyses/decompiler/utils.py +46 -20
- angr/analyses/find_objects_static.py +2 -1
- angr/analyses/reaching_definitions/engine_vex.py +13 -0
- angr/analyses/reaching_definitions/function_handler.py +24 -10
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
- angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
- angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
- angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
- angr/analyses/reaching_definitions/rd_state.py +11 -7
- angr/analyses/s_liveness.py +44 -6
- angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
- angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
- angr/analyses/typehoon/simple_solver.py +35 -8
- angr/analyses/typehoon/typehoon.py +3 -1
- angr/analyses/variable_recovery/engine_ail.py +1 -1
- angr/analyses/variable_recovery/engine_vex.py +20 -4
- angr/calling_conventions.py +17 -12
- angr/factory.py +8 -3
- angr/knowledge_plugins/functions/function.py +5 -10
- angr/knowledge_plugins/variables/variable_manager.py +34 -5
- angr/procedures/definitions/__init__.py +3 -10
- angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
- angr/procedures/win32_kernel/__fastfail.py +15 -0
- angr/sim_procedure.py +2 -2
- angr/simos/simos.py +17 -11
- angr/simos/windows.py +42 -1
- angr/utils/ail.py +41 -1
- angr/utils/cpp.py +17 -0
- angr/utils/doms.py +142 -0
- angr/utils/library.py +1 -1
- angr/utils/types.py +59 -0
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/RECORD +75 -70
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
- {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
|
@@ -33,6 +33,7 @@ from angr.knowledge_plugins.key_definitions.rd_model import ReachingDefinitionsM
|
|
|
33
33
|
from angr.knowledge_plugins.variables.variable_access import VariableAccessSort
|
|
34
34
|
from angr.knowledge_plugins.functions import Function
|
|
35
35
|
from angr.utils.constants import DEFAULT_STATEMENT
|
|
36
|
+
from angr.utils.ssa import get_reg_offset_base_and_size, get_reg_offset_base
|
|
36
37
|
from angr import SIM_PROCEDURES
|
|
37
38
|
from angr.analyses import Analysis, register_analysis, ReachingDefinitionsAnalysis
|
|
38
39
|
from angr.analyses.reaching_definitions import get_all_definitions
|
|
@@ -164,6 +165,19 @@ class CallingConventionAnalysis(Analysis):
|
|
|
164
165
|
):
|
|
165
166
|
return
|
|
166
167
|
|
|
168
|
+
if (
|
|
169
|
+
hooker is not None
|
|
170
|
+
and hooker.cc is not None
|
|
171
|
+
and hooker.is_function
|
|
172
|
+
and not hooker.guessed_prototype
|
|
173
|
+
and hooker.prototype is not None
|
|
174
|
+
):
|
|
175
|
+
# copy the calling convention and prototype from the SimProcedure instance
|
|
176
|
+
self.cc = hooker.cc
|
|
177
|
+
self.prototype = hooker.prototype
|
|
178
|
+
self.prototype_libname = hooker.library_name
|
|
179
|
+
return
|
|
180
|
+
|
|
167
181
|
if self._function.prototype is None:
|
|
168
182
|
# try our luck
|
|
169
183
|
# we set ignore_binary_name to True because the binary name SimProcedures is "cle##externs" and does not
|
|
@@ -264,7 +278,7 @@ class CallingConventionAnalysis(Analysis):
|
|
|
264
278
|
self.cc = cc
|
|
265
279
|
self.prototype = prototype
|
|
266
280
|
|
|
267
|
-
def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction] | None:
|
|
281
|
+
def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None] | None:
|
|
268
282
|
"""
|
|
269
283
|
Get the calling convention for a PLT stub.
|
|
270
284
|
|
|
@@ -296,24 +310,36 @@ class CallingConventionAnalysis(Analysis):
|
|
|
296
310
|
real_func = None
|
|
297
311
|
|
|
298
312
|
if real_func is not None:
|
|
313
|
+
if real_func.calling_convention is None:
|
|
314
|
+
cc_cls = default_cc(self.project.arch.name)
|
|
315
|
+
if cc_cls is None:
|
|
316
|
+
# can't determine the default calling convention for this architecture
|
|
317
|
+
return None
|
|
318
|
+
cc = cc_cls(self.project.arch)
|
|
319
|
+
else:
|
|
320
|
+
cc = real_func.calling_convention
|
|
299
321
|
if real_func.is_simprocedure:
|
|
300
322
|
if self.project.is_hooked(real_func.addr):
|
|
301
323
|
# prioritize the hooker
|
|
302
324
|
hooker = self.project.hooked_by(real_func.addr)
|
|
303
|
-
if hooker is not None and
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
325
|
+
if hooker is not None and hooker.is_function and not hooker.guessed_prototype:
|
|
326
|
+
# we only take the prototype from the SimProcedure if
|
|
327
|
+
# - the SimProcedure is a function
|
|
328
|
+
# - the prototype of the SimProcedure is not guessed
|
|
329
|
+
return cc, hooker.prototype
|
|
330
|
+
if real_func.prototype is not None:
|
|
331
|
+
return cc, real_func.prototype
|
|
309
332
|
else:
|
|
310
|
-
return
|
|
333
|
+
return cc, real_func.prototype
|
|
311
334
|
|
|
312
335
|
if self.analyze_callsites:
|
|
313
336
|
# determine the calling convention by analyzing its callsites
|
|
314
337
|
callsite_facts = self._extract_and_analyze_callsites(max_analyzing_callsites=1)
|
|
315
338
|
cc_cls = default_cc(self.project.arch.name)
|
|
316
|
-
|
|
339
|
+
if cc_cls is None:
|
|
340
|
+
# can't determine the default calling convention for this architecture
|
|
341
|
+
return None
|
|
342
|
+
cc = cc_cls(self.project.arch)
|
|
317
343
|
prototype = SimTypeFunction([], None)
|
|
318
344
|
prototype = self._adjust_prototype(
|
|
319
345
|
prototype, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate
|
|
@@ -342,7 +368,7 @@ class CallingConventionAnalysis(Analysis):
|
|
|
342
368
|
input_variables = vm.input_variables()
|
|
343
369
|
input_args = self._args_from_vars(input_variables, vm)
|
|
344
370
|
else:
|
|
345
|
-
input_args = self._input_args
|
|
371
|
+
input_args = set(self._input_args)
|
|
346
372
|
retval_size = self._retval_size
|
|
347
373
|
|
|
348
374
|
# check if this function is a variadic function
|
|
@@ -355,8 +381,14 @@ class CallingConventionAnalysis(Analysis):
|
|
|
355
381
|
# TODO: properly determine sp_delta
|
|
356
382
|
sp_delta = self.project.arch.bytes if self.project.arch.call_pushes_ret else 0
|
|
357
383
|
|
|
358
|
-
|
|
359
|
-
|
|
384
|
+
full_input_args = self._consolidate_input_args(input_args)
|
|
385
|
+
full_input_args_copy = list(full_input_args) # input_args might be modified by find_cc()
|
|
386
|
+
cc = SimCC.find_cc(self.project.arch, full_input_args_copy, sp_delta, platform=self.project.simos.name)
|
|
387
|
+
|
|
388
|
+
# update input_args according to the difference between full_input_args and full_input_args_copy
|
|
389
|
+
for a in full_input_args:
|
|
390
|
+
if a not in full_input_args_copy and a in input_args:
|
|
391
|
+
input_args.remove(a)
|
|
360
392
|
|
|
361
393
|
if cc is None:
|
|
362
394
|
l.warning(
|
|
@@ -657,12 +689,6 @@ class CallingConventionAnalysis(Analysis):
|
|
|
657
689
|
else:
|
|
658
690
|
break
|
|
659
691
|
|
|
660
|
-
if None in temp_args:
|
|
661
|
-
first_none_idx = temp_args.index(None)
|
|
662
|
-
# test if there is at least one argument set after None; if so, we ignore the first None
|
|
663
|
-
if any(arg is not None for arg in temp_args[first_none_idx:]):
|
|
664
|
-
temp_args[first_none_idx] = expected_args[first_none_idx]
|
|
665
|
-
|
|
666
692
|
if None in temp_args:
|
|
667
693
|
# we be very conservative here and ignore all arguments starting from the first missing one
|
|
668
694
|
first_none_idx = temp_args.index(None)
|
|
@@ -681,17 +707,18 @@ class CallingConventionAnalysis(Analysis):
|
|
|
681
707
|
if all(fact.return_value_used is False for fact in facts):
|
|
682
708
|
proto.returnty = SimTypeBottom(label="void")
|
|
683
709
|
else:
|
|
684
|
-
proto.returnty
|
|
710
|
+
if proto.returnty is None or isinstance(proto.returnty, SimTypeBottom):
|
|
711
|
+
proto.returnty = SimTypeInt().with_arch(self.project.arch)
|
|
685
712
|
|
|
686
713
|
if (
|
|
687
714
|
update_arguments == UpdateArgumentsOption.AlwaysUpdate
|
|
688
715
|
or (update_arguments == UpdateArgumentsOption.UpdateWhenCCHasNoArgs and not proto.args)
|
|
689
716
|
) and len({len(fact.args) for fact in facts}) == 1:
|
|
690
717
|
fact = next(iter(facts))
|
|
691
|
-
proto.args =
|
|
718
|
+
proto.args = tuple(
|
|
692
719
|
self._guess_arg_type(arg) if arg is not None else SimTypeInt().with_arch(self.project.arch)
|
|
693
720
|
for arg in fact.args
|
|
694
|
-
|
|
721
|
+
)
|
|
695
722
|
|
|
696
723
|
return proto
|
|
697
724
|
|
|
@@ -730,13 +757,8 @@ class CallingConventionAnalysis(Analysis):
|
|
|
730
757
|
# a register variable, convert it to a register argument
|
|
731
758
|
if not is_sane_register_variable(self.project.arch, variable.reg, variable.size, def_cc=def_cc):
|
|
732
759
|
continue
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
reg_name = self.project.arch.translate_register_name(variable.reg, size=self.project.arch.bytes)
|
|
736
|
-
arg = SimRegArg(reg_name, self.project.arch.bytes)
|
|
737
|
-
else:
|
|
738
|
-
reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
|
|
739
|
-
arg = SimRegArg(reg_name, variable.size)
|
|
760
|
+
reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
|
|
761
|
+
arg = SimRegArg(reg_name, variable.size)
|
|
740
762
|
args.add(arg)
|
|
741
763
|
|
|
742
764
|
accesses = var_manager.get_variable_accesses(variable)
|
|
@@ -778,15 +800,58 @@ class CallingConventionAnalysis(Analysis):
|
|
|
778
800
|
|
|
779
801
|
return args.difference(restored_reg_vars)
|
|
780
802
|
|
|
781
|
-
def
|
|
803
|
+
def _consolidate_input_args(self, input_args: set[SimRegArg | SimStackArg]) -> set[SimRegArg | SimStackArg]:
|
|
804
|
+
"""
|
|
805
|
+
Consolidate register arguments by converting partial registers to full registers on certain architectures.
|
|
806
|
+
|
|
807
|
+
:param input_args: A set of input arguments.
|
|
808
|
+
:return: A set of consolidated input args.
|
|
809
|
+
"""
|
|
810
|
+
|
|
811
|
+
if self.project.arch.name in {"AMD64", "X86"}:
|
|
812
|
+
new_input_args = set()
|
|
813
|
+
for a in input_args:
|
|
814
|
+
if isinstance(a, SimRegArg) and a.size < self.project.arch.bytes:
|
|
815
|
+
# use complete registers on AMD64 and X86
|
|
816
|
+
reg_offset, reg_size = self.project.arch.registers[a.reg_name]
|
|
817
|
+
full_reg_offset, full_reg_size = get_reg_offset_base_and_size(
|
|
818
|
+
reg_offset, self.project.arch, size=reg_size
|
|
819
|
+
)
|
|
820
|
+
full_reg_name = self.project.arch.translate_register_name(full_reg_offset, size=full_reg_size)
|
|
821
|
+
arg = SimRegArg(full_reg_name, full_reg_size)
|
|
822
|
+
if arg not in new_input_args:
|
|
823
|
+
new_input_args.add(arg)
|
|
824
|
+
else:
|
|
825
|
+
new_input_args.add(a)
|
|
826
|
+
return new_input_args
|
|
827
|
+
|
|
828
|
+
return input_args
|
|
829
|
+
|
|
830
|
+
def _reorder_args(self, args: set[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
|
|
782
831
|
"""
|
|
783
832
|
Reorder arguments according to the calling convention identified.
|
|
784
833
|
|
|
785
|
-
:param args: A
|
|
834
|
+
:param args: A set of arguments that haven't been ordered.
|
|
786
835
|
:param cc: The identified calling convention.
|
|
787
836
|
:return: A reordered list of args.
|
|
788
837
|
"""
|
|
789
838
|
|
|
839
|
+
def _is_same_reg(rn0: str, rn1: str) -> bool:
|
|
840
|
+
"""
|
|
841
|
+
Check if rn0 and rn1 belong to the same base register.
|
|
842
|
+
|
|
843
|
+
:param rn0: Register name of the first register.
|
|
844
|
+
:param rn1: Register name of the second register.
|
|
845
|
+
:return: True if they belong to the same base register; False otherwise.
|
|
846
|
+
"""
|
|
847
|
+
if rn0 == rn1:
|
|
848
|
+
return True
|
|
849
|
+
off0, sz0 = self.project.arch.registers[rn0]
|
|
850
|
+
full_off0 = get_reg_offset_base(off0, self.project.arch, sz0)
|
|
851
|
+
off1, sz1 = self.project.arch.registers[rn1]
|
|
852
|
+
full_off1 = get_reg_offset_base(off1, self.project.arch, sz1)
|
|
853
|
+
return full_off0 == full_off1
|
|
854
|
+
|
|
790
855
|
reg_args = []
|
|
791
856
|
|
|
792
857
|
# split args into two lists
|
|
@@ -805,7 +870,7 @@ class CallingConventionAnalysis(Analysis):
|
|
|
805
870
|
# match int args first
|
|
806
871
|
for reg_name in cc.ARG_REGS:
|
|
807
872
|
try:
|
|
808
|
-
arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and a.reg_name
|
|
873
|
+
arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name)))
|
|
809
874
|
except StopIteration:
|
|
810
875
|
# have we reached the end of the args list?
|
|
811
876
|
if [a for a in int_args if isinstance(a, SimRegArg)] or len(stack_int_args) > 0:
|
|
@@ -821,7 +886,9 @@ class CallingConventionAnalysis(Analysis):
|
|
|
821
886
|
if fp_args:
|
|
822
887
|
for reg_name in cc.FP_ARG_REGS:
|
|
823
888
|
try:
|
|
824
|
-
arg = next(
|
|
889
|
+
arg = next(
|
|
890
|
+
iter(a for a in fp_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name))
|
|
891
|
+
)
|
|
825
892
|
except StopIteration:
|
|
826
893
|
# have we reached the end of the args list?
|
|
827
894
|
if [a for a in fp_args if isinstance(a, SimRegArg)] or len(stack_fp_args) > 0:
|
|
@@ -886,12 +953,15 @@ class CallingConventionAnalysis(Analysis):
|
|
|
886
953
|
if 5 <= ret_val_size <= 8:
|
|
887
954
|
return SimTypeLongLong()
|
|
888
955
|
|
|
889
|
-
|
|
890
|
-
return SimTypeInt() if cc.arch.bits == 32 else SimTypeLongLong()
|
|
956
|
+
return SimTypeBottom(label="void")
|
|
891
957
|
|
|
892
958
|
@staticmethod
|
|
893
959
|
def _likely_saving_temp_reg(ail_block: ailment.Block, d: Definition, all_reg_defs: set[Definition]) -> bool:
|
|
894
|
-
if
|
|
960
|
+
if (
|
|
961
|
+
d.codeloc.block_addr == ail_block.addr
|
|
962
|
+
and d.codeloc.stmt_idx is not None
|
|
963
|
+
and d.codeloc.stmt_idx < len(ail_block.statements)
|
|
964
|
+
):
|
|
895
965
|
stmt = ail_block.statements[d.codeloc.stmt_idx]
|
|
896
966
|
if isinstance(stmt, ailment.Stmt.Assignment) and isinstance(stmt.src, ailment.Expr.Register):
|
|
897
967
|
src_offset = stmt.src.reg_offset
|
|
@@ -90,7 +90,7 @@ binop_handler = SimEngineNostmtVEX[FactCollectorState, claripy.ast.BV, FactColle
|
|
|
90
90
|
|
|
91
91
|
class SimEngineFactCollectorVEX(
|
|
92
92
|
SimEngineNostmtVEX[FactCollectorState, SpOffset | RegisterOffset | int, None],
|
|
93
|
-
SimEngineLight[
|
|
93
|
+
SimEngineLight[FactCollectorState, SpOffset | RegisterOffset | int, Block, None],
|
|
94
94
|
):
|
|
95
95
|
"""
|
|
96
96
|
THe engine for FactCollector.
|
|
@@ -101,7 +101,7 @@ class SimEngineFactCollectorVEX(
|
|
|
101
101
|
super().__init__(project)
|
|
102
102
|
|
|
103
103
|
def _process_block_end(self, stmt_result: list, whitelist: set[int] | None) -> None:
|
|
104
|
-
if self.block.vex.jumpkind == "Ijk_Call":
|
|
104
|
+
if self.block.vex.jumpkind == "Ijk_Call" and self.arch.ret_offset is not None:
|
|
105
105
|
self.state.register_written(self.arch.ret_offset, self.arch.bytes)
|
|
106
106
|
|
|
107
107
|
def _top(self, bits: int):
|
|
@@ -110,7 +110,7 @@ class SimEngineFactCollectorVEX(
|
|
|
110
110
|
def _is_top(self, expr: Any) -> bool:
|
|
111
111
|
raise NotImplementedError
|
|
112
112
|
|
|
113
|
-
def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.IRExpr) -> Any:
|
|
113
|
+
def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.expr.IRExpr) -> Any:
|
|
114
114
|
return None
|
|
115
115
|
|
|
116
116
|
def _handle_stmt_Put(self, stmt):
|
|
@@ -142,9 +142,9 @@ class SimEngineFactCollectorVEX(
|
|
|
142
142
|
return expr.con.value
|
|
143
143
|
|
|
144
144
|
def _handle_expr_GSPTR(self, expr):
|
|
145
|
-
return
|
|
145
|
+
return 0
|
|
146
146
|
|
|
147
|
-
def _handle_expr_Get(self, expr) -> SpOffset |
|
|
147
|
+
def _handle_expr_Get(self, expr) -> SpOffset | RegisterOffset:
|
|
148
148
|
if expr.offset == self.arch.sp_offset:
|
|
149
149
|
return SpOffset(self.arch.bits, self.state.sp_value, is_base=False)
|
|
150
150
|
if expr.offset == self.arch.bp_offset and not self.bp_as_gpr:
|
|
@@ -304,7 +304,10 @@ class FactCollector(Analysis):
|
|
|
304
304
|
|
|
305
305
|
def _handle_function(self, state: FactCollectorState, func: Function) -> None:
|
|
306
306
|
try:
|
|
307
|
-
|
|
307
|
+
if func.calling_convention is not None and func.prototype is not None:
|
|
308
|
+
arg_locs = func.calling_convention.arg_locs(func.prototype)
|
|
309
|
+
else:
|
|
310
|
+
return
|
|
308
311
|
except (TypeError, ValueError):
|
|
309
312
|
return
|
|
310
313
|
|
|
@@ -355,6 +358,7 @@ class FactCollector(Analysis):
|
|
|
355
358
|
|
|
356
359
|
if isinstance(node, BlockNode) and node.size == 0:
|
|
357
360
|
continue
|
|
361
|
+
|
|
358
362
|
if isinstance(node, HookNode):
|
|
359
363
|
# attempt to convert it into a function
|
|
360
364
|
if self.kb.functions.contains_addr(node.addr):
|
|
@@ -369,17 +373,43 @@ class FactCollector(Analysis):
|
|
|
369
373
|
and not isinstance(node.prototype.returnty, SimTypeBottom)
|
|
370
374
|
):
|
|
371
375
|
# assume the function overwrites the return variable
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
376
|
+
returnty_size = node.prototype.returnty.with_arch(self.project.arch).size
|
|
377
|
+
assert returnty_size is not None
|
|
378
|
+
retval_size = returnty_size // self.project.arch.byte_width
|
|
375
379
|
retval_sizes.append(retval_size)
|
|
376
380
|
continue
|
|
377
381
|
|
|
382
|
+
# if this block ends with a call to a function, we process the function first
|
|
383
|
+
func_succs = [
|
|
384
|
+
succ
|
|
385
|
+
for succ in func_graph.successors(node)
|
|
386
|
+
if isinstance(succ, (Function, HookNode)) or self.kb.functions.contains_addr(succ.addr)
|
|
387
|
+
]
|
|
388
|
+
if len(func_succs) == 1:
|
|
389
|
+
func_succ = func_succs[0]
|
|
390
|
+
if isinstance(func_succ, (BlockNode, HookNode)) and self.kb.functions.contains_addr(func_succ.addr):
|
|
391
|
+
# attempt to convert it into a function
|
|
392
|
+
func_succ = self.kb.functions.get_by_addr(func_succ.addr)
|
|
393
|
+
if isinstance(func_succ, Function):
|
|
394
|
+
if (
|
|
395
|
+
func_succ.calling_convention is not None
|
|
396
|
+
and func_succ.prototype is not None
|
|
397
|
+
and func_succ.prototype.returnty is not None
|
|
398
|
+
and not isinstance(func_succ.prototype.returnty, SimTypeBottom)
|
|
399
|
+
):
|
|
400
|
+
# assume the function overwrites the return variable
|
|
401
|
+
returnty_size = func_succ.prototype.returnty.with_arch(self.project.arch).size
|
|
402
|
+
assert returnty_size is not None
|
|
403
|
+
retval_size = returnty_size // self.project.arch.byte_width
|
|
404
|
+
retval_sizes.append(retval_size)
|
|
405
|
+
continue
|
|
406
|
+
|
|
378
407
|
block = self.project.factory.block(node.addr, size=node.size)
|
|
379
408
|
# scan the block statements backwards to find writes to the return value register
|
|
380
409
|
retval_size = None
|
|
381
410
|
for stmt in reversed(block.vex.statements):
|
|
382
411
|
if isinstance(stmt, pyvex.IRStmt.Put):
|
|
412
|
+
assert block.vex.tyenv is not None
|
|
383
413
|
size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
|
|
384
414
|
if stmt.offset == retreg_offset:
|
|
385
415
|
retval_size = max(size, 1)
|
|
@@ -391,9 +421,9 @@ class FactCollector(Analysis):
|
|
|
391
421
|
for pred, _, data in func_graph.in_edges(node, data=True):
|
|
392
422
|
edge_type = data.get("type")
|
|
393
423
|
if pred not in traversed and depth + 1 <= self._max_depth:
|
|
394
|
-
if edge_type == "
|
|
424
|
+
if edge_type == "call":
|
|
395
425
|
continue
|
|
396
|
-
if edge_type in {"transition", "
|
|
426
|
+
if edge_type in {"transition", "fake_return"}:
|
|
397
427
|
queue.append((depth + 1, pred))
|
|
398
428
|
|
|
399
429
|
self.retval_size = max(retval_sizes) if retval_sizes else None
|
|
@@ -472,6 +502,7 @@ class FactCollector(Analysis):
|
|
|
472
502
|
):
|
|
473
503
|
tmps[stmt.tmp] = "sp"
|
|
474
504
|
if isinstance(stmt, pyvex.IRStmt.Put):
|
|
505
|
+
assert block.vex.tyenv is not None
|
|
475
506
|
size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
|
|
476
507
|
# is the data loaded from the stack?
|
|
477
508
|
if (
|
|
@@ -532,13 +563,8 @@ class FactCollector(Analysis):
|
|
|
532
563
|
):
|
|
533
564
|
continue
|
|
534
565
|
reg_offset_created.add(offset)
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
reg_name = self.project.arch.translate_register_name(offset, size=self.project.arch.bytes)
|
|
538
|
-
arg = SimRegArg(reg_name, self.project.arch.bytes)
|
|
539
|
-
else:
|
|
540
|
-
reg_name = self.project.arch.translate_register_name(offset, size=size)
|
|
541
|
-
arg = SimRegArg(reg_name, size)
|
|
566
|
+
reg_name = self.project.arch.translate_register_name(offset, size=size)
|
|
567
|
+
arg = SimRegArg(reg_name, size)
|
|
542
568
|
self.input_args.append(arg)
|
|
543
569
|
|
|
544
570
|
stack_offset_created = set()
|
|
@@ -9,7 +9,9 @@ from angr.calling_conventions import SimCC
|
|
|
9
9
|
l = logging.getLogger(__name__)
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def is_sane_register_variable(
|
|
12
|
+
def is_sane_register_variable(
|
|
13
|
+
arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | type[SimCC] | None = None
|
|
14
|
+
) -> bool:
|
|
13
15
|
"""
|
|
14
16
|
Filters all registers that are surly not members of function arguments.
|
|
15
17
|
This can be seen as a workaround, since VariableRecoveryFast sometimes gives input variables of cc_ndep (which
|
angr/analyses/cfg/cfg_base.py
CHANGED
|
@@ -11,7 +11,7 @@ import pyvex
|
|
|
11
11
|
from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec, XBE
|
|
12
12
|
from cle.backends import NamedRegion
|
|
13
13
|
import archinfo
|
|
14
|
-
from archinfo.arch_soot import SootAddressDescriptor
|
|
14
|
+
from archinfo.arch_soot import SootAddressDescriptor, SootMethodDescriptor
|
|
15
15
|
from archinfo.arch_arm import is_arm_arch, get_real_address_if_arm
|
|
16
16
|
|
|
17
17
|
from angr.knowledge_plugins.functions.function_manager import FunctionManager
|
|
@@ -129,7 +129,7 @@ class CFGBase(Analysis):
|
|
|
129
129
|
|
|
130
130
|
# Store all the functions analyzed before the set is cleared
|
|
131
131
|
# Used for performance optimization
|
|
132
|
-
self._updated_nonreturning_functions: set[int] | None = None
|
|
132
|
+
self._updated_nonreturning_functions: set[int | SootMethodDescriptor] | None = None
|
|
133
133
|
|
|
134
134
|
self._normalize = normalize
|
|
135
135
|
|
|
@@ -246,7 +246,7 @@ class CFGBase(Analysis):
|
|
|
246
246
|
)
|
|
247
247
|
|
|
248
248
|
self._regions_size = sum((end - start) for start, end in regions)
|
|
249
|
-
self._regions:
|
|
249
|
+
self._regions: SortedDict = SortedDict(regions)
|
|
250
250
|
|
|
251
251
|
l.debug("CFG recovery covers %d regions:", len(self._regions))
|
|
252
252
|
for start, end in self._regions.items():
|
|
@@ -1556,6 +1556,7 @@ class CFGBase(Analysis):
|
|
|
1556
1556
|
self.kb.functions[func_addr].alignment = True
|
|
1557
1557
|
continue
|
|
1558
1558
|
node = function.get_node(block.addr)
|
|
1559
|
+
assert node is not None
|
|
1559
1560
|
successors = list(function.graph.successors(node))
|
|
1560
1561
|
if len(successors) == 1 and successors[0].addr == node.addr:
|
|
1561
1562
|
# self loop. mark this function as a function alignment
|
|
@@ -2151,6 +2152,11 @@ class CFGBase(Analysis):
|
|
|
2151
2152
|
f = self.kb.functions.function(addr=addr)
|
|
2152
2153
|
assert f is not None
|
|
2153
2154
|
|
|
2155
|
+
# copy over existing metadata
|
|
2156
|
+
if known_functions.contains_addr(addr):
|
|
2157
|
+
kf = known_functions.get_by_addr(addr)
|
|
2158
|
+
f.is_plt = kf.is_plt
|
|
2159
|
+
|
|
2154
2160
|
blockaddr_to_function[addr] = f
|
|
2155
2161
|
|
|
2156
2162
|
function_is_returning = False
|
|
@@ -2532,6 +2538,34 @@ class CFGBase(Analysis):
|
|
|
2532
2538
|
# Other functions
|
|
2533
2539
|
#
|
|
2534
2540
|
|
|
2541
|
+
@staticmethod
|
|
2542
|
+
def _is_noop_jump_block(block) -> bool:
|
|
2543
|
+
"""
|
|
2544
|
+
Check if the block does nothing but jumping to a constant address.
|
|
2545
|
+
|
|
2546
|
+
:param block: The block instance. We assume the block is already optimized.
|
|
2547
|
+
:return: True if the entire block is a jump to a constant address, False otherwise.
|
|
2548
|
+
"""
|
|
2549
|
+
|
|
2550
|
+
vex = block.vex
|
|
2551
|
+
if vex.jumpkind != "Ijk_Boring":
|
|
2552
|
+
return False
|
|
2553
|
+
if isinstance(vex.next, pyvex.expr.Const):
|
|
2554
|
+
return all(isinstance(stmt, pyvex.stmt.IMark) for stmt in vex.statements)
|
|
2555
|
+
if isinstance(vex.next, pyvex.expr.RdTmp):
|
|
2556
|
+
next_tmp = vex.next.tmp
|
|
2557
|
+
return all(
|
|
2558
|
+
isinstance(stmt, pyvex.stmt.IMark)
|
|
2559
|
+
or (
|
|
2560
|
+
isinstance(stmt, pyvex.stmt.WrTmp)
|
|
2561
|
+
and stmt.tmp == next_tmp
|
|
2562
|
+
and isinstance(stmt.data, pyvex.expr.Load)
|
|
2563
|
+
and isinstance(stmt.data.addr, pyvex.expr.Const)
|
|
2564
|
+
)
|
|
2565
|
+
for stmt in vex.statements
|
|
2566
|
+
)
|
|
2567
|
+
return False
|
|
2568
|
+
|
|
2535
2569
|
@staticmethod
|
|
2536
2570
|
def _is_noop_block(arch: archinfo.Arch, block) -> bool:
|
|
2537
2571
|
"""
|
|
@@ -2755,7 +2789,7 @@ class CFGBase(Analysis):
|
|
|
2755
2789
|
cfg_node: CFGNode,
|
|
2756
2790
|
irsb: pyvex.IRSB,
|
|
2757
2791
|
func_addr: int,
|
|
2758
|
-
stmt_idx: int
|
|
2792
|
+
stmt_idx: int = DEFAULT_STATEMENT,
|
|
2759
2793
|
) -> tuple[bool, set[int], IndirectJump | None]:
|
|
2760
2794
|
"""
|
|
2761
2795
|
Called when we encounter an indirect jump. We will try to resolve this indirect jump using timeless (fast)
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -1782,7 +1782,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1782
1782
|
self.project.loader.discard_ro_memview()
|
|
1783
1783
|
|
|
1784
1784
|
# Clean up
|
|
1785
|
-
self._traced_addresses = None
|
|
1785
|
+
self._traced_addresses = None # type: ignore
|
|
1786
1786
|
self._lifter_deregister_readonly_regions()
|
|
1787
1787
|
self._function_returns = None
|
|
1788
1788
|
|
|
@@ -1838,6 +1838,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1838
1838
|
xrefs = self.kb.xrefs.get_xrefs_by_dst(security_cookie_addr)
|
|
1839
1839
|
tested_func_addrs = set()
|
|
1840
1840
|
for xref in xrefs:
|
|
1841
|
+
assert xref.block_addr is not None
|
|
1841
1842
|
cfg_node = self.model.get_any_node(xref.block_addr)
|
|
1842
1843
|
if cfg_node is None:
|
|
1843
1844
|
continue
|
|
@@ -2081,13 +2082,20 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2081
2082
|
|
|
2082
2083
|
if (
|
|
2083
2084
|
cfg_job.src_node is not None
|
|
2084
|
-
and self.functions.contains_addr(cfg_job.src_node.addr)
|
|
2085
|
-
and self.functions[cfg_job.src_node.addr].is_default_name
|
|
2086
2085
|
and cfg_job.src_node.addr not in self.kb.labels
|
|
2087
2086
|
and cfg_job.jumpkind == "Ijk_Boring"
|
|
2087
|
+
and self._is_noop_jump_block(cfg_job.src_node.block)
|
|
2088
2088
|
):
|
|
2089
|
-
#
|
|
2090
|
-
self.functions
|
|
2089
|
+
# the caller node is very likely to be a PLT stub
|
|
2090
|
+
if not self.functions.contains_addr(cfg_job.src_node.addr):
|
|
2091
|
+
src_func = self.functions.function(addr=cfg_job.src_node.addr, create=True)
|
|
2092
|
+
else:
|
|
2093
|
+
src_func = self.functions.get_by_addr(cfg_job.src_node.addr)
|
|
2094
|
+
if len(src_func.block_addrs_set) <= 1 and src_func.is_default_name:
|
|
2095
|
+
# assign a name to the caller function that jumps to this procedure
|
|
2096
|
+
src_func.name = procedure.display_name
|
|
2097
|
+
# mark it as PLT
|
|
2098
|
+
src_func.is_plt = True
|
|
2091
2099
|
|
|
2092
2100
|
if procedure.ADDS_EXITS:
|
|
2093
2101
|
# Get two blocks ahead
|
|
@@ -3714,7 +3722,12 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
3714
3722
|
#
|
|
3715
3723
|
|
|
3716
3724
|
def _graph_add_edge(
|
|
3717
|
-
self,
|
|
3725
|
+
self,
|
|
3726
|
+
cfg_node: CFGNode,
|
|
3727
|
+
src_node: CFGNode | None,
|
|
3728
|
+
src_jumpkind: str,
|
|
3729
|
+
src_ins_addr: int | None,
|
|
3730
|
+
src_stmt_idx: int | None,
|
|
3718
3731
|
):
|
|
3719
3732
|
"""
|
|
3720
3733
|
Add edge between nodes, or add node if entry point
|
|
@@ -4584,6 +4597,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4584
4597
|
elif (
|
|
4585
4598
|
lifted_block is not None
|
|
4586
4599
|
and is_x86_x64_arch
|
|
4600
|
+
and lifted_block.bytes is not None
|
|
4587
4601
|
and len(lifted_block.bytes) - irsb_size > 2
|
|
4588
4602
|
and lifted_block.bytes[irsb_size : irsb_size + 2]
|
|
4589
4603
|
in {
|
|
@@ -4659,7 +4673,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4659
4673
|
self._seg_list.occupy(real_addr + irsb_size, nodecode_size, "nodecode")
|
|
4660
4674
|
|
|
4661
4675
|
# Occupy the block in segment list
|
|
4662
|
-
if irsb.size > 0:
|
|
4676
|
+
if irsb is not None and irsb.size > 0:
|
|
4663
4677
|
self._seg_list.occupy(real_addr, irsb.size, "code")
|
|
4664
4678
|
|
|
4665
4679
|
# Create a CFG node, and add it to the graph
|
|
@@ -4969,6 +4983,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4969
4983
|
|
|
4970
4984
|
for assumption_addr in to_remove:
|
|
4971
4985
|
# remove this assumption from the graph (since we may have new relationships formed later)
|
|
4986
|
+
assert self._decoding_assumption_relations is not None
|
|
4972
4987
|
if assumption_addr in self._decoding_assumption_relations:
|
|
4973
4988
|
self._decoding_assumption_relations.remove_node(assumption_addr)
|
|
4974
4989
|
|
|
@@ -5159,6 +5174,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
5159
5174
|
target_func = edges[0][1]
|
|
5160
5175
|
if isinstance(target_func, (HookNode, Function)) and self.project.is_hooked(target_func.addr):
|
|
5161
5176
|
hooker = self.project.hooked_by(target_func.addr)
|
|
5177
|
+
assert hooker is not None
|
|
5162
5178
|
if hooker.DYNAMIC_RET:
|
|
5163
5179
|
return self._is_call_returning(callsite_cfgnode, target_func.addr)
|
|
5164
5180
|
|
|
@@ -182,23 +182,28 @@ class ConstantValueManager:
|
|
|
182
182
|
|
|
183
183
|
# determine blocks to run FCP on
|
|
184
184
|
|
|
185
|
-
# - include at most three levels of successors from the entrypoint
|
|
185
|
+
# - include at most three levels of superblock successors from the entrypoint
|
|
186
|
+
self.mapping = {}
|
|
186
187
|
startpoint = self.func.startpoint
|
|
188
|
+
if startpoint is None:
|
|
189
|
+
return
|
|
190
|
+
|
|
187
191
|
blocks = set()
|
|
188
|
-
|
|
189
|
-
|
|
192
|
+
succ_and_levels = [(startpoint, 0)]
|
|
193
|
+
while succ_and_levels:
|
|
190
194
|
new_succs = []
|
|
191
|
-
for node in
|
|
195
|
+
for node, level in succ_and_levels:
|
|
192
196
|
if node in blocks:
|
|
193
197
|
continue
|
|
194
198
|
blocks.add(node)
|
|
195
199
|
if node.addr == self.indirect_jump_addr:
|
|
196
200
|
# stop at the indirect jump block
|
|
197
201
|
continue
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
+
for _, succ, data in self.func.graph.out_edges(node, data=True):
|
|
203
|
+
new_level = level if data.get("type") == "fake_return" else level + 1
|
|
204
|
+
if new_level <= 3:
|
|
205
|
+
new_succs.append((succ, new_level))
|
|
206
|
+
succ_and_levels = new_succs
|
|
202
207
|
|
|
203
208
|
# - include at most six levels of predecessors from the indirect jump block
|
|
204
209
|
ij_block = self.func.get_node(self.indirect_jump_addr)
|