angr 9.2.141__py3-none-win_amd64.whl → 9.2.142__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (59) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +17 -3
  3. angr/analyses/cfg/cfg_base.py +38 -4
  4. angr/analyses/cfg/cfg_fast.py +23 -7
  5. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +4 -0
  6. angr/analyses/class_identifier.py +8 -7
  7. angr/analyses/complete_calling_conventions.py +1 -1
  8. angr/analyses/decompiler/ail_simplifier.py +61 -46
  9. angr/analyses/decompiler/clinic.py +73 -5
  10. angr/analyses/decompiler/condition_processor.py +7 -7
  11. angr/analyses/decompiler/decompilation_cache.py +2 -1
  12. angr/analyses/decompiler/decompiler.py +10 -2
  13. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  14. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  15. angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
  16. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  17. angr/analyses/decompiler/optimization_passes/optimization_pass.py +2 -0
  18. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  19. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  20. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  21. angr/analyses/decompiler/region_identifier.py +70 -47
  22. angr/analyses/decompiler/ssailification/rewriting.py +47 -17
  23. angr/analyses/decompiler/ssailification/rewriting_engine.py +13 -0
  24. angr/analyses/decompiler/stack_item.py +36 -0
  25. angr/analyses/decompiler/structured_codegen/c.py +14 -9
  26. angr/analyses/decompiler/structuring/phoenix.py +3 -3
  27. angr/analyses/find_objects_static.py +2 -1
  28. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  29. angr/analyses/reaching_definitions/function_handler.py +24 -10
  30. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  31. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  32. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  33. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  34. angr/analyses/reaching_definitions/rd_state.py +11 -7
  35. angr/analyses/s_liveness.py +44 -6
  36. angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
  37. angr/analyses/typehoon/simple_solver.py +35 -8
  38. angr/analyses/typehoon/typehoon.py +3 -1
  39. angr/calling_conventions.py +2 -2
  40. angr/knowledge_plugins/functions/function.py +5 -10
  41. angr/knowledge_plugins/variables/variable_manager.py +27 -0
  42. angr/lib/angr_native.dll +0 -0
  43. angr/procedures/definitions/__init__.py +3 -10
  44. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  45. angr/procedures/win32_kernel/__fastfail.py +15 -0
  46. angr/sim_procedure.py +2 -2
  47. angr/simos/simos.py +14 -10
  48. angr/simos/windows.py +42 -1
  49. angr/utils/ail.py +41 -1
  50. angr/utils/cpp.py +17 -0
  51. angr/utils/doms.py +142 -0
  52. angr/utils/library.py +1 -1
  53. angr/utils/types.py +12 -1
  54. {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
  55. {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/RECORD +59 -55
  56. {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
  57. {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
  58. {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
  59. {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ import claripy
7
7
  from angr.analyses.reaching_definitions.function_handler import FunctionCallDataUnwrapped, FunctionHandler
8
8
  from angr.knowledge_plugins.key_definitions.atoms import Atom
9
9
  from angr.knowledge_plugins.key_definitions.live_definitions import DerefSize
10
-
10
+ from angr.knowledge_plugins.key_definitions.definition import Definition
11
11
 
12
12
  if TYPE_CHECKING:
13
13
  from angr.analyses.reaching_definitions.rd_state import ReachingDefinitionsState
@@ -75,7 +75,7 @@ class LibcStdlibHandlers(FunctionHandler):
75
75
  @FunctionCallDataUnwrapped.decorate
76
76
  def handle_impl_calloc(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
77
77
  nmemb = state.get_concrete_value(data.args_atoms[0]) or 48
78
- size = state.get_concrete_value(data.args_atoms[0]) or 1
78
+ size = state.get_concrete_value(data.args_atoms[1]) or 1
79
79
  heap_ptr = state.heap_address(state.heap_allocator.allocate(nmemb * size))
80
80
  data.depends(state.deref(heap_ptr, nmemb * size), value=0)
81
81
  data.depends(data.ret_atoms, value=heap_ptr)
@@ -84,18 +84,51 @@ class LibcStdlibHandlers(FunctionHandler):
84
84
  def handle_impl_getenv(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
85
85
  name_atom = state.deref(data.args_atoms[0], DerefSize.NULL_TERMINATE)
86
86
  name_value = state.get_concrete_value(name_atom, cast_to=bytes)
87
- if name_value is not None:
88
- name_value = name_value.strip(b"\0").decode()
87
+ length = 2
88
+ heap_value = None
89
+
89
90
  data.depends(None, name_atom)
90
91
 
91
92
  # store a buffer, registering it as an output of this function
92
93
  # we store this two-byte mixed value because we don't want the value to be picked up by get_concrete_value()
93
94
  # but also it should be able to be picked up by NULL_TERMINATE reads
94
- heap_ptr = state.heap_allocator.allocate(2)
95
- heap_atom = state.deref(heap_ptr, 2)
96
- heap_value = claripy.BVS("weh", 8).concat(claripy.BVV(0, 8))
97
- data.depends(heap_atom, EnvironAtom(2, name_value), value=heap_value)
98
- data.depends(data.ret_atoms, value=state.heap_address(heap_ptr))
95
+ heap_atom = None
96
+ env_atom = None
97
+ heap_ptr = None
98
+ sources = []
99
+ if name_value is not None:
100
+ name_value = name_value.strip(b"\0").decode()
101
+ for env_atom, env_value in state.others.items():
102
+ if not isinstance(env_atom, EnvironAtom) or env_atom.name != name_value:
103
+ continue
104
+
105
+ # There exists an environment variable with this name
106
+ heap_value = env_value
107
+ length = env_atom.size
108
+ heap_ptr = state.heap_allocator.allocate(length)
109
+ heap_atom = state.deref(heap_ptr, length)
110
+ break
111
+
112
+ else:
113
+ heap_value = None
114
+
115
+ if name_value is None or heap_value is None or heap_atom is None or env_atom is None:
116
+ heap_ptr = state.heap_allocator.allocate(length)
117
+ heap_atom = state.deref(heap_ptr, length)
118
+ heap_value = claripy.BVS("weh", 8)
119
+ env_atom = EnvironAtom(length, name_value)
120
+ if heap_atom is not None:
121
+ heap_value = state.annotate_with_def(heap_value, Definition(heap_atom, state.codeloc))
122
+ heap_value = heap_value.concat(claripy.BVV(0, 8))
123
+ data.depends(env_atom, value=heap_value) # Puts the env_atom in the others dict
124
+
125
+ data.depends(heap_atom, env_atom, value=heap_value)
126
+ sources = [heap_atom, env_atom]
127
+ if name_atom is not None:
128
+ sources.append(name_atom)
129
+
130
+ value = state.heap_address(heap_ptr) if heap_ptr is not None else state.top(state.arch.bits)
131
+ data.depends(data.ret_atoms, *sources, value=value)
99
132
 
100
133
  @FunctionCallDataUnwrapped.decorate
101
134
  def handle_impl_setenv(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
@@ -107,9 +140,9 @@ class LibcStdlibHandlers(FunctionHandler):
107
140
 
108
141
  src_atom = state.deref(data.args_atoms[1], DerefSize.NULL_TERMINATE)
109
142
  src_value = state.get_values(src_atom)
110
- data.depends(
111
- EnvironAtom(len(src_value) // 8 if src_value is not None else 1, name_value), src_atom, value=src_value
112
- )
143
+
144
+ env_atom = EnvironAtom(len(src_value) // 8 if src_value is not None else 1, name_value)
145
+ data.depends(env_atom, src_atom, value=src_value)
113
146
 
114
147
  @FunctionCallDataUnwrapped.decorate
115
148
  def handle_impl_system(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
@@ -1,8 +1,10 @@
1
1
  from __future__ import annotations
2
2
  import archinfo
3
+ import claripy
3
4
  from angr.analyses.reaching_definitions.function_handler import FunctionCallDataUnwrapped, FunctionHandler
4
5
  from angr.analyses.reaching_definitions.rd_state import ReachingDefinitionsState
5
6
  from angr.knowledge_plugins.key_definitions.live_definitions import DerefSize
7
+ from angr.knowledge_plugins.key_definitions.live_definitions import MultiValues
6
8
 
7
9
  # pylint: disable=no-self-use,missing-class-docstring,unused-argument
8
10
 
@@ -12,16 +14,26 @@ class LibcStringHandlers(FunctionHandler):
12
14
  def handle_impl_strcat(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
13
15
  src0_atom = state.deref(data.args_atoms[0], DerefSize.NULL_TERMINATE)
14
16
  src1_atom = state.deref(data.args_atoms[1], DerefSize.NULL_TERMINATE)
15
- src0_value = state.get_values(src0_atom)
16
- src1_value = state.get_values(src1_atom)
17
+ src0_value = state.get_values(src0_atom) if src0_atom is not None else None
18
+ src1_value = state.get_values(src1_atom) if src1_atom is not None else None
19
+
17
20
  if src0_value is not None and src1_value is not None:
18
21
  src0_value = src0_value.extract(0, len(src0_value) // 8 - 1, archinfo.Endness.BE)
19
22
  dest_value = src0_value.concat(src1_value)
20
23
  dest_atom = state.deref(data.args_atoms[0], len(dest_value) // 8, endness=archinfo.Endness.BE)
24
+ elif src0_value is not None:
25
+ src0_value = src0_value.extract(0, len(src0_value) // 8 - 1, archinfo.Endness.BE)
26
+ top_val = state.top(state.arch.bits)
27
+ if src1_atom is not None:
28
+ for defn in state.get_definitions(src1_atom):
29
+ top_val = state.annotate_with_def(top_val, defn)
30
+ dest_value = src0_value.concat(MultiValues(top_val))
31
+ dest_atom = state.deref(data.args_atoms[0], len(dest_value) // 8, endness=archinfo.Endness.BE)
21
32
  else:
22
33
  dest_value = None
23
34
  dest_atom = src0_atom
24
- data.depends(dest_atom, src0_atom, src1_atom, value=dest_value)
35
+ if src0_atom is not None and src1_atom is not None:
36
+ data.depends(dest_atom, src0_atom, src1_atom, value=dest_value)
25
37
  data.depends(data.ret_atoms, data.args_atoms[0], value=src0_value)
26
38
 
27
39
  handle_impl_strncat = handle_impl_strcat
@@ -29,39 +41,76 @@ class LibcStringHandlers(FunctionHandler):
29
41
  @FunctionCallDataUnwrapped.decorate
30
42
  def handle_impl_strlen(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
31
43
  src_atom = state.deref(data.args_atoms[0], DerefSize.NULL_TERMINATE)
32
- src_str = state.get_values(src_atom)
33
- if src_str is not None:
34
- data.depends(data.ret_atoms, src_atom, value=len(src_str) // 8 - 1)
44
+ if src_atom is not None:
45
+ src_str = state.get_values(src_atom) if src_atom is not None else None
46
+ if src_str is not None:
47
+ data.depends(data.ret_atoms, src_atom, value=len(src_str) // 8 - 1)
48
+ else:
49
+ data.depends(data.ret_atoms, src_atom)
35
50
  else:
36
- data.depends(data.ret_atoms, src_atom)
51
+ data.depends(data.ret_atoms, data.args_atoms[0])
37
52
 
38
53
  @FunctionCallDataUnwrapped.decorate
39
54
  def handle_impl_strcpy(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
40
55
  src_atom = state.deref(data.args_atoms[1], DerefSize.NULL_TERMINATE)
41
- src_str = state.get_values(src_atom)
42
- if src_str is not None:
43
- dst_atom = state.deref(data.args_atoms[0], len(src_str) // 8)
56
+ src_str = state.get_values(src_atom) if src_atom is not None else None
57
+ if src_str is None:
58
+ src_str = state.top(state.arch.bits)
59
+ if src_atom is not None:
60
+ for defn in state.get_definitions(src_atom):
61
+ src_str = state.annotate_with_def(src_str, defn)
62
+ src_str = MultiValues(src_str)
63
+
64
+ dst_atom = state.deref(data.args_atoms[0], len(src_str) // 8)
65
+ if src_atom is not None:
44
66
  data.depends(dst_atom, src_atom, value=src_str)
45
67
  data.depends(data.ret_atoms, data.args_atoms[0], value=state.get_values(data.args_atoms[0]))
46
68
 
47
69
  @FunctionCallDataUnwrapped.decorate
48
70
  def handle_impl_strncpy(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
49
71
  n = state.get_concrete_value(data.args_atoms[2])
50
- src_atom = state.deref(data.args_atoms[1], DerefSize.NULL_TERMINATE if n is None else n)
51
- src_str = state.get_values(src_atom)
52
- if src_str is not None:
72
+ src_atom = state.deref(data.args_atoms[1], DerefSize.NULL_TERMINATE)
73
+ src_str = state.get_values(src_atom) if src_atom is not None else None
74
+ if src_str is None and src_atom is not None:
75
+ tmp_atom = state.deref(data.args_atoms[1], 1)
76
+ if tmp_atom is not None:
77
+ tmp_str = state.get_values(tmp_atom)
78
+ val_defns = None if tmp_str is None else state.get_definitions(tmp_str)
79
+ if tmp_str is None or not val_defns: # There's no data at all or no valid definitions
80
+ src_str = state.top(state.arch.bits if n is None or n > state.arch.bytes else n * 8)
81
+ defns = state.get_definitions(src_atom) if src_atom is not None else []
82
+ for defn in defns:
83
+ src_str = state.annotate_with_def(src_str, defn)
84
+ src_str = MultiValues(src_str)
85
+ else: # We found some data, but it's not NULL_TERIMINATED or of size n
86
+ src_atoms = set()
87
+ for defn in val_defns:
88
+ a = defn.atom
89
+ a.size = a.size if n is None or a.size < n else n
90
+ src_atoms.add(a)
91
+ src_str = state.get_values(src_atoms)
92
+
93
+ elif n is not None and src_str is not None and n < len(src_str) // 8:
94
+ # We have a src_str, but need to truncate it if n is not None and less than the size of src_str
95
+ src_atom = state.deref(data.args_atoms[1], n)
96
+ if src_atom is not None:
97
+ src_str = state.get_values(src_atom)
98
+
99
+ if src_str is not None and src_atom is not None:
53
100
  dst_atom = state.deref(data.args_atoms[0], len(src_str) // 8)
54
101
  data.depends(dst_atom, src_atom, value=src_str)
102
+
55
103
  data.depends(data.ret_atoms, data.args_atoms[0], value=state.get_values(data.args_atoms[0]))
56
104
 
57
105
  @FunctionCallDataUnwrapped.decorate
58
106
  def handle_impl_strdup(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
59
- src_atom = state.deref(data.args_atoms[1], DerefSize.NULL_TERMINATE)
60
- src_str = state.get_values(src_atom)
61
- malloc_size = len(src_str) // 8 if src_str is not None else 1
62
- heap_ptr = state.heap_allocator.allocate(malloc_size)
63
- dst_atom = state.deref(heap_ptr, malloc_size)
64
- data.depends(dst_atom, src_atom, value=src_str)
107
+ src_atom = state.deref(data.args_atoms[0], DerefSize.NULL_TERMINATE)
108
+ if src_atom is not None:
109
+ src_str = state.get_values(src_atom)
110
+ malloc_size = len(src_str) // 8 if src_str is not None else 1
111
+ heap_ptr = state.heap_allocator.allocate(malloc_size)
112
+ dst_atom = state.deref(heap_ptr, malloc_size)
113
+ data.depends(dst_atom, src_atom, value=src_str)
65
114
  data.depends(data.ret_atoms, data.args_atoms[0], value=state.get_values(data.args_atoms[0]))
66
115
 
67
116
  @FunctionCallDataUnwrapped.decorate
@@ -70,15 +119,22 @@ class LibcStringHandlers(FunctionHandler):
70
119
  if size is not None:
71
120
  src_atom = state.deref(data.args_atoms[1], size)
72
121
  dst_atom = state.deref(data.args_atoms[0], size)
73
- data.depends(dst_atom, src_atom, value=state.get_values(src_atom))
122
+ if src_atom is not None:
123
+ data.depends(dst_atom, src_atom, value=state.get_values(src_atom))
74
124
  data.depends(data.ret_atoms, data.args_atoms[0], value=state.get_values(data.args_atoms[0]))
75
125
 
76
126
  @FunctionCallDataUnwrapped.decorate
77
127
  def handle_impl_memset(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
78
128
  size = state.get_concrete_value(data.args_atoms[2])
129
+ c = state.get_concrete_value(data.args_atoms[1])
79
130
  if size is not None:
80
131
  dst_atom = state.deref(data.args_atoms[0], size)
81
- data.depends(dst_atom, data.args_atoms[1])
132
+ if c is not None:
133
+ value = MultiValues(claripy.BVV(chr(c) * size, size * 8))
134
+ data.depends(dst_atom, data.args_atoms[1], value=value)
135
+ else:
136
+ data.depends(dst_atom, data.args_atoms[1], value=state.get_values(data.args_atoms[1]))
137
+
82
138
  data.depends(data.ret_atoms, data.args_atoms[0], value=state.get_values(data.args_atoms[0]))
83
139
 
84
140
  @FunctionCallDataUnwrapped.decorate
@@ -1,17 +1,37 @@
1
1
  from __future__ import annotations
2
+ import random
2
3
  from angr.analyses.reaching_definitions.function_handler import FunctionCallDataUnwrapped, FunctionHandler
3
4
  from angr.analyses.reaching_definitions.function_handler_library.stdio import StdinAtom, StdoutAtom
4
5
  from angr.analyses.reaching_definitions.rd_state import ReachingDefinitionsState
6
+ from angr.knowledge_plugins.key_definitions.atoms import Atom
5
7
 
6
8
  # pylint: disable=no-self-use,missing-class-docstring,unused-argument
7
9
 
8
10
 
11
+ class FDAtom(Atom):
12
+ def __init__(self, fd: int | None, source: str, size: int = 1):
13
+ self.source = source
14
+ self.fd = fd
15
+ self.nonce = random.randint(0, 999999999999)
16
+ super().__init__(size)
17
+
18
+ def _identity(self):
19
+ if self.fd is not None:
20
+ return (self.fd,)
21
+ return (self.nonce,)
22
+
23
+
9
24
  class LibcUnistdHandlers(FunctionHandler):
10
25
  @FunctionCallDataUnwrapped.decorate
11
26
  def handle_impl_read(self, state: ReachingDefinitionsState, data: FunctionCallDataUnwrapped):
12
27
  size = state.get_concrete_value(data.args_atoms[2]) or 1
13
28
  dst_atom = state.deref(data.args_atoms[1], size)
14
- data.depends(dst_atom, StdinAtom(data.function.name, size))
29
+ real_fd = state.get_concrete_value(data.args_atoms[0])
30
+
31
+ fd_atom = StdinAtom(data.function.name, size) if real_fd == 0 else FDAtom(real_fd, data.function.name, size)
32
+ buf_data = state.top(size * 8) if size is not None else state.top(state.arch.bits)
33
+
34
+ data.depends(dst_atom, fd_atom, value=buf_data)
15
35
 
16
36
  handle_impl_recv = handle_impl_recvfrom = handle_impl_read
17
37
 
@@ -215,14 +215,14 @@ class ReachingDefinitionsState:
215
215
  def tmp_uses(self):
216
216
  return self.live_definitions.tmp_uses
217
217
 
218
- @property
219
- def register_uses(self):
220
- return self.live_definitions.register_uses
221
-
222
218
  @property
223
219
  def registers(self) -> MultiValuedMemory:
224
220
  return self.live_definitions.registers
225
221
 
222
+ @property
223
+ def register_uses(self):
224
+ return self.live_definitions.register_uses
225
+
226
226
  @property
227
227
  def stack(self) -> MultiValuedMemory:
228
228
  return self.live_definitions.stack
@@ -239,13 +239,17 @@ class ReachingDefinitionsState:
239
239
  def heap_uses(self):
240
240
  return self.live_definitions.heap_uses
241
241
 
242
+ @property
243
+ def memory(self) -> MultiValuedMemory:
244
+ return self.live_definitions.memory
245
+
242
246
  @property
243
247
  def memory_uses(self):
244
248
  return self.live_definitions.memory_uses
245
249
 
246
250
  @property
247
- def memory(self) -> MultiValuedMemory:
248
- return self.live_definitions.memory
251
+ def others(self) -> dict[Atom, MultiValues]:
252
+ return self.live_definitions.others
249
253
 
250
254
  @property
251
255
  def uses_by_codeloc(self):
@@ -493,7 +497,7 @@ class ReachingDefinitionsState:
493
497
  self.live_definitions.add_memory_use_by_def(definition, self.codeloc, expr=expr)
494
498
 
495
499
  def get_definitions(
496
- self, atom: Atom | Definition[Atom] | Iterable[Atom] | Iterable[Definition[Atom]]
500
+ self, atom: Atom | Definition[Atom] | Iterable[Atom] | Iterable[Definition[Atom]] | MultiValues
497
501
  ) -> set[Definition[Atom]]:
498
502
  return self.live_definitions.get_definitions(atom)
499
503
 
@@ -2,9 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  import networkx
4
4
  from ailment.expression import VirtualVariable
5
- from ailment.statement import Assignment, Call
5
+ from ailment.statement import Assignment, Call, ConditionalJump
6
6
 
7
7
  from angr.analyses import Analysis, register_analysis
8
+ from angr.utils.ail import is_head_controlled_loop_block, is_phi_assignment
8
9
  from angr.utils.ssa import VVarUsesCollector, phi_assignment_get_src
9
10
 
10
11
 
@@ -69,8 +70,14 @@ class SLivenessAnalysis(Analysis):
69
70
  block_key = block.addr, block.idx
70
71
  changed = False
71
72
 
73
+ head_controlled_loop = is_head_controlled_loop_block(block)
74
+
72
75
  live = set()
73
76
  for succ in graph.successors(block):
77
+ if head_controlled_loop and (block.addr, block.idx) == (succ.addr, succ.idx):
78
+ # this is a head-controlled loop block; we ignore the self-loop edge because all variables defined
79
+ # in the block after the conditional jump will be dead after leaving the current block
80
+ continue
74
81
  edge = (block.addr, block.idx), (succ.addr, succ.idx)
75
82
  if edge in live_on_edges:
76
83
  live |= live_on_edges[edge]
@@ -81,8 +88,18 @@ class SLivenessAnalysis(Analysis):
81
88
  changed = True
82
89
  live_outs[block_key] = live.copy()
83
90
 
91
+ if head_controlled_loop:
92
+ # this is a head-controlled loop block; we start scanning from the first condition jump backwards
93
+ condjump_idx = next(
94
+ iter(i for i, stmt in enumerate(block.statements) if isinstance(stmt, ConditionalJump)), None
95
+ )
96
+ assert condjump_idx is not None
97
+ stmts = block.statements[: condjump_idx + 1]
98
+ else:
99
+ stmts = block.statements
100
+
84
101
  live_in_by_pred = {}
85
- for stmt in reversed(block.statements):
102
+ for stmt in reversed(stmts):
86
103
  # handle assignments: a defined vvar is not live before the assignment
87
104
  if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
88
105
  live.discard(stmt.dst.varid)
@@ -92,6 +109,10 @@ class SLivenessAnalysis(Analysis):
92
109
  phi_expr = phi_assignment_get_src(stmt)
93
110
  if phi_expr is not None:
94
111
  for src, vvar in phi_expr.src_and_vvars:
112
+ if head_controlled_loop and src == (block.addr, block.idx):
113
+ # this is a head-controlled loop block; we ignore the self-loop edge
114
+ continue
115
+
95
116
  if src not in live_in_by_pred:
96
117
  live_in_by_pred[src] = live.copy()
97
118
  if vvar is not None:
@@ -99,9 +120,15 @@ class SLivenessAnalysis(Analysis):
99
120
  live_in_by_pred[src].discard(stmt.dst.varid)
100
121
 
101
122
  # handle the statement: add used vvars to the live set
102
- vvar_use_collector = VVarUsesCollector()
103
- vvar_use_collector.walk_statement(stmt)
104
- live |= vvar_use_collector.vvars
123
+ if head_controlled_loop and is_phi_assignment(stmt):
124
+ for src, vvar in stmt.src.src_and_vvars:
125
+ # this is a head-controlled loop block; we ignore the self-loop edge
126
+ if src != (block.addr, block.idx) and vvar is not None:
127
+ live |= {vvar.varid}
128
+ else:
129
+ vvar_use_collector = VVarUsesCollector()
130
+ vvar_use_collector.walk_statement(stmt)
131
+ live |= vvar_use_collector.vvars
105
132
 
106
133
  if live_ins[block_key] != live:
107
134
  live_ins[block_key] = live
@@ -135,7 +162,18 @@ class SLivenessAnalysis(Analysis):
135
162
 
136
163
  for block in self.func_graph.nodes():
137
164
  live = self.model.live_outs[(block.addr, block.idx)].copy()
138
- for stmt in reversed(block.statements):
165
+
166
+ if is_head_controlled_loop_block(block):
167
+ # this is a head-controlled loop block; we start scanning from the first condition jump backwards
168
+ condjump_idx = next(
169
+ iter(i for i, stmt in enumerate(block.statements) if isinstance(stmt, ConditionalJump)), None
170
+ )
171
+ assert condjump_idx is not None
172
+ stmts = block.statements[: condjump_idx + 1]
173
+ else:
174
+ stmts = block.statements
175
+
176
+ for stmt in reversed(stmts):
139
177
  if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
140
178
  def_vvar = stmt.dst.varid
141
179
  elif isinstance(stmt, Call) and isinstance(stmt.ret_expr, VirtualVariable):
@@ -91,13 +91,15 @@ class SRDAModel:
91
91
  )
92
92
  return defs
93
93
 
94
- def get_vvar_uses(self, obj: atoms.VirtualVariable) -> set[CodeLocation]:
94
+ def get_vvar_uses(self, obj: VirtualVariable | atoms.VirtualVariable) -> set[CodeLocation]:
95
95
  the_vvar = self.varid_to_vvar.get(obj.varid, None)
96
96
  if the_vvar is not None:
97
97
  return {loc for _, loc in self.all_vvar_uses[the_vvar]}
98
98
  return set()
99
99
 
100
- def get_vvar_uses_with_expr(self, obj: atoms.VirtualVariable) -> set[tuple[CodeLocation, VirtualVariable]]:
100
+ def get_vvar_uses_with_expr(
101
+ self, obj: VirtualVariable | atoms.VirtualVariable
102
+ ) -> set[tuple[CodeLocation, VirtualVariable]]:
101
103
  the_vvar = self.varid_to_vvar.get(obj.varid, None)
102
104
  if the_vvar is not None:
103
105
  return {(loc, expr) for expr, loc in self.all_vvar_uses[the_vvar]}
@@ -185,7 +185,9 @@ class Sketch:
185
185
  return self.node_mapping[typevar]
186
186
  node: SketchNodeBase | None = None
187
187
  if isinstance(typevar, DerivedTypeVariable):
188
- node = self.node_mapping[SimpleSolver._to_typevar_or_typeconst(typevar.type_var)]
188
+ t = SimpleSolver._to_typevar_or_typeconst(typevar.type_var)
189
+ assert isinstance(t, TypeVariable)
190
+ node = self.node_mapping[t]
189
191
  for label in typevar.labels:
190
192
  succs = []
191
193
  for _, dst, data in self.graph.out_edges(node, data=True):
@@ -210,11 +212,26 @@ class Sketch:
210
212
  # sub <: super
211
213
  if not isinstance(constraint, Subtype):
212
214
  return
213
- subtype = self.flatten_typevar(constraint.sub_type)
214
- supertype = self.flatten_typevar(constraint.super_type)
215
+ subtype, _ = self.flatten_typevar(constraint.sub_type)
216
+ supertype, try_maxsize = self.flatten_typevar(constraint.super_type)
217
+
218
+ if (
219
+ try_maxsize
220
+ and isinstance(subtype, TypeVariable)
221
+ and subtype in self.solver.stackvar_max_sizes
222
+ and isinstance(supertype, TypeConstant)
223
+ and not isinstance(supertype, BottomType)
224
+ ):
225
+ basetype = supertype
226
+ assert basetype.size is not None
227
+ max_size = self.solver.stackvar_max_sizes.get(subtype, None)
228
+ if max_size not in {0, None} and max_size // basetype.size > 0: # type: ignore
229
+ supertype = Array(element=basetype, count=max_size // basetype.size) # type: ignore
230
+
215
231
  if SimpleSolver._typevar_inside_set(subtype, PRIMITIVE_TYPES) and not SimpleSolver._typevar_inside_set(
216
232
  supertype, PRIMITIVE_TYPES
217
233
  ):
234
+ assert isinstance(supertype, (TypeVariable, DerivedTypeVariable))
218
235
  super_node = self.lookup(supertype)
219
236
  assert super_node is None or isinstance(super_node, SketchNode)
220
237
  if super_node is not None:
@@ -222,6 +239,7 @@ class Sketch:
222
239
  elif SimpleSolver._typevar_inside_set(supertype, PRIMITIVE_TYPES) and not SimpleSolver._typevar_inside_set(
223
240
  subtype, PRIMITIVE_TYPES
224
241
  ):
242
+ assert isinstance(subtype, (TypeVariable, DerivedTypeVariable))
225
243
  sub_node = self.lookup(subtype)
226
244
  assert sub_node is None or isinstance(sub_node, SketchNode)
227
245
  # assert sub_node is not None
@@ -231,7 +249,7 @@ class Sketch:
231
249
  @staticmethod
232
250
  def flatten_typevar(
233
251
  derived_typevar: TypeVariable | TypeConstant | DerivedTypeVariable,
234
- ) -> DerivedTypeVariable | TypeVariable | TypeConstant:
252
+ ) -> tuple[DerivedTypeVariable | TypeVariable | TypeConstant, bool]:
235
253
  # pylint:disable=too-many-boolean-expressions
236
254
  if (
237
255
  isinstance(derived_typevar, DerivedTypeVariable)
@@ -243,8 +261,10 @@ class Sketch:
243
261
  and derived_typevar.labels[1].offset == 0
244
262
  and derived_typevar.labels[1].bits == MAX_POINTSTO_BITS
245
263
  ):
246
- return derived_typevar.type_var.basetype
247
- return derived_typevar
264
+ bt = derived_typevar.type_var.basetype
265
+ assert bt is not None
266
+ return bt, True
267
+ return derived_typevar, False
248
268
 
249
269
 
250
270
  #
@@ -313,6 +333,11 @@ class ConstraintGraphNode:
313
333
  else:
314
334
  prefix = DerivedTypeVariable(self.typevar.type_var, None, labels=self.typevar.labels[:-1])
315
335
  variance = Variance.COVARIANT if self.variance == last_label.variance else Variance.CONTRAVARIANT
336
+ if not isinstance(prefix, (TypeVariable, DerivedTypeVariable)):
337
+ # we may see incorrectly generated type constraints that attempt to load from an int:
338
+ # int64.load
339
+ # we don't want to entertain such constraints
340
+ return None
316
341
  return (
317
342
  ConstraintGraphNode(prefix, variance, self.tag, FORGOTTEN.PRE_FORGOTTEN),
318
343
  self.typevar.labels[-1],
@@ -330,6 +355,7 @@ class ConstraintGraphNode:
330
355
  raise TypeError(f"Unsupported type {type(self.typevar)}")
331
356
  variance = Variance.COVARIANT if self.variance == label.variance else Variance.CONTRAVARIANT
332
357
  var = typevar if not labels else DerivedTypeVariable(typevar, None, labels=labels)
358
+ assert isinstance(var, (TypeVariable, DerivedTypeVariable))
333
359
  return ConstraintGraphNode(var, variance, self.tag, FORGOTTEN.PRE_FORGOTTEN)
334
360
 
335
361
  def inverse(self) -> ConstraintGraphNode:
@@ -366,13 +392,14 @@ class SimpleSolver:
366
392
  improvements.
367
393
  """
368
394
 
369
- def __init__(self, bits: int, constraints, typevars):
395
+ def __init__(self, bits: int, constraints, typevars, stackvar_max_sizes: dict[TypeVariable, int] | None = None):
370
396
  if bits not in (32, 64):
371
397
  raise ValueError(f"Pointer size {bits} is not supported. Expect 32 or 64.")
372
398
 
373
399
  self.bits = bits
374
400
  self._constraints: dict[TypeVariable, set[TypeConstraint]] = constraints
375
401
  self._typevars: set[TypeVariable] = typevars
402
+ self.stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
376
403
  self._base_lattice = BASE_LATTICES[bits]
377
404
  self._base_lattice_inverted = networkx.DiGraph()
378
405
  for src, dst in self._base_lattice.edges:
@@ -1289,7 +1316,7 @@ class SimpleSolver:
1289
1316
  for _, succ, data in out_edges:
1290
1317
  if isinstance(succ, RecursiveRefNode):
1291
1318
  ref = succ
1292
- succ: SketchNode | None = sketch.lookup(succ.target)
1319
+ succ: SketchNode | None = sketch.lookup(succ.target) # type: ignore
1293
1320
  if succ is None:
1294
1321
  # failed to resolve...
1295
1322
  _l.warning(
@@ -37,6 +37,7 @@ class Typehoon(Analysis):
37
37
  ground_truth=None,
38
38
  var_mapping: dict[SimVariable, set[TypeVariable]] | None = None,
39
39
  must_struct: set[TypeVariable] | None = None,
40
+ stackvar_max_sizes: dict[TypeVariable, int] | None = None,
40
41
  ):
41
42
  """
42
43
 
@@ -52,6 +53,7 @@ class Typehoon(Analysis):
52
53
  self._ground_truth: dict[TypeVariable, SimType] | None = ground_truth
53
54
  self._var_mapping = var_mapping
54
55
  self._must_struct = must_struct
56
+ self._stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
55
57
 
56
58
  self.bits = self.project.arch.bits
57
59
  self.solution = None
@@ -163,7 +165,7 @@ class Typehoon(Analysis):
163
165
  typevars.add(constraint.sub_type)
164
166
  if isinstance(constraint.super_type, TypeVariable):
165
167
  typevars.add(constraint.super_type)
166
- solver = SimpleSolver(self.bits, self._constraints, typevars)
168
+ solver = SimpleSolver(self.bits, self._constraints, typevars, stackvar_max_sizes=self._stackvar_max_sizes)
167
169
  self.solution = solver.solution
168
170
 
169
171
  def _specialize(self):
@@ -1433,7 +1433,7 @@ class SimCCX86LinuxSyscall(SimCCSyscall):
1433
1433
 
1434
1434
  class SimCCX86WindowsSyscall(SimCCSyscall):
1435
1435
  # TODO: Make sure the information is correct
1436
- ARG_REGS = []
1436
+ ARG_REGS = ["ecx"]
1437
1437
  FP_ARG_REGS = []
1438
1438
  RETURN_VAL = SimRegArg("eax", 4)
1439
1439
  RETURN_ADDR = SimRegArg("ip_at_syscall", 4)
@@ -1673,7 +1673,7 @@ class SimCCAMD64LinuxSyscall(SimCCSyscall):
1673
1673
 
1674
1674
  class SimCCAMD64WindowsSyscall(SimCCSyscall):
1675
1675
  # TODO: Make sure the information is correct
1676
- ARG_REGS = []
1676
+ ARG_REGS = ["rcx"]
1677
1677
  FP_ARG_REGS = []
1678
1678
  RETURN_VAL = SimRegArg("rax", 8)
1679
1679
  RETURN_ADDR = SimRegArg("ip_at_syscall", 8)
@@ -9,7 +9,7 @@ import contextlib
9
9
  from typing import overload
10
10
 
11
11
  import networkx
12
- from itanium_demangler import parse
12
+ import pydemumble
13
13
 
14
14
  from cle.backends.symbol import Symbol
15
15
  from archinfo.arch_arm import get_real_address_if_arm
@@ -202,7 +202,8 @@ class Function(Serializable):
202
202
  if is_plt is not None:
203
203
  self.is_plt = is_plt
204
204
  else:
205
- # Whether this function is a PLT entry or not is fully relying on the PLT detection in CLE
205
+ # Whether this function is a PLT entry or not is primarily relying on the PLT detection in CLE; it may also
206
+ # be updated (to True) during CFG recovery.
206
207
  if self.project is None:
207
208
  raise ValueError(
208
209
  "'is_plt' must be specified if you do not specify a function manager for this new function."
@@ -1568,14 +1569,8 @@ class Function(Serializable):
1568
1569
 
1569
1570
  @property
1570
1571
  def demangled_name(self):
1571
- if self.name[0:2] == "_Z":
1572
- try:
1573
- ast = parse(self.name)
1574
- except (NotImplementedError, KeyError): # itanium demangler is not the most robust package in the world
1575
- return self.name
1576
- if ast:
1577
- return ast.__str__()
1578
- return self.name
1572
+ ast = pydemumble.demangle(self.name)
1573
+ return ast if ast else self.name
1579
1574
 
1580
1575
  def get_unambiguous_name(self, display_name: str | None = None) -> str:
1581
1576
  """