smallworld-re 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. smallworld/__init__.py +35 -0
  2. smallworld/analyses/__init__.py +14 -0
  3. smallworld/analyses/analysis.py +88 -0
  4. smallworld/analyses/code_coverage.py +31 -0
  5. smallworld/analyses/colorizer.py +682 -0
  6. smallworld/analyses/colorizer_summary.py +100 -0
  7. smallworld/analyses/field_detection/__init__.py +14 -0
  8. smallworld/analyses/field_detection/field_analysis.py +536 -0
  9. smallworld/analyses/field_detection/guards.py +26 -0
  10. smallworld/analyses/field_detection/hints.py +133 -0
  11. smallworld/analyses/field_detection/malloc.py +211 -0
  12. smallworld/analyses/forced_exec/__init__.py +3 -0
  13. smallworld/analyses/forced_exec/forced_exec.py +87 -0
  14. smallworld/analyses/underlays/__init__.py +4 -0
  15. smallworld/analyses/underlays/basic.py +13 -0
  16. smallworld/analyses/underlays/underlay.py +31 -0
  17. smallworld/analyses/unstable/__init__.py +4 -0
  18. smallworld/analyses/unstable/angr/__init__.py +0 -0
  19. smallworld/analyses/unstable/angr/base.py +12 -0
  20. smallworld/analyses/unstable/angr/divergence.py +274 -0
  21. smallworld/analyses/unstable/angr/model.py +383 -0
  22. smallworld/analyses/unstable/angr/nwbt.py +63 -0
  23. smallworld/analyses/unstable/angr/typedefs.py +170 -0
  24. smallworld/analyses/unstable/angr/utils.py +25 -0
  25. smallworld/analyses/unstable/angr/visitor.py +315 -0
  26. smallworld/analyses/unstable/angr_nwbt.py +106 -0
  27. smallworld/analyses/unstable/code_coverage.py +54 -0
  28. smallworld/analyses/unstable/code_reachable.py +44 -0
  29. smallworld/analyses/unstable/control_flow_tracer.py +71 -0
  30. smallworld/analyses/unstable/pointer_finder.py +90 -0
  31. smallworld/arch/__init__.py +0 -0
  32. smallworld/arch/aarch64_arch.py +286 -0
  33. smallworld/arch/amd64_arch.py +86 -0
  34. smallworld/arch/i386_arch.py +44 -0
  35. smallworld/emulators/__init__.py +14 -0
  36. smallworld/emulators/angr/__init__.py +7 -0
  37. smallworld/emulators/angr/angr.py +1652 -0
  38. smallworld/emulators/angr/default.py +15 -0
  39. smallworld/emulators/angr/exceptions.py +7 -0
  40. smallworld/emulators/angr/exploration/__init__.py +9 -0
  41. smallworld/emulators/angr/exploration/bounds.py +27 -0
  42. smallworld/emulators/angr/exploration/default.py +17 -0
  43. smallworld/emulators/angr/exploration/terminate.py +22 -0
  44. smallworld/emulators/angr/factory.py +55 -0
  45. smallworld/emulators/angr/machdefs/__init__.py +35 -0
  46. smallworld/emulators/angr/machdefs/aarch64.py +292 -0
  47. smallworld/emulators/angr/machdefs/amd64.py +192 -0
  48. smallworld/emulators/angr/machdefs/arm.py +387 -0
  49. smallworld/emulators/angr/machdefs/i386.py +221 -0
  50. smallworld/emulators/angr/machdefs/machdef.py +138 -0
  51. smallworld/emulators/angr/machdefs/mips.py +184 -0
  52. smallworld/emulators/angr/machdefs/mips64.py +189 -0
  53. smallworld/emulators/angr/machdefs/ppc.py +101 -0
  54. smallworld/emulators/angr/machdefs/riscv.py +261 -0
  55. smallworld/emulators/angr/machdefs/xtensa.py +255 -0
  56. smallworld/emulators/angr/memory/__init__.py +7 -0
  57. smallworld/emulators/angr/memory/default.py +10 -0
  58. smallworld/emulators/angr/memory/fixups.py +43 -0
  59. smallworld/emulators/angr/memory/memtrack.py +105 -0
  60. smallworld/emulators/angr/scratch.py +43 -0
  61. smallworld/emulators/angr/simos.py +53 -0
  62. smallworld/emulators/angr/utils.py +70 -0
  63. smallworld/emulators/emulator.py +1013 -0
  64. smallworld/emulators/hookable.py +252 -0
  65. smallworld/emulators/panda/__init__.py +5 -0
  66. smallworld/emulators/panda/machdefs/__init__.py +28 -0
  67. smallworld/emulators/panda/machdefs/aarch64.py +93 -0
  68. smallworld/emulators/panda/machdefs/amd64.py +71 -0
  69. smallworld/emulators/panda/machdefs/arm.py +89 -0
  70. smallworld/emulators/panda/machdefs/i386.py +36 -0
  71. smallworld/emulators/panda/machdefs/machdef.py +86 -0
  72. smallworld/emulators/panda/machdefs/mips.py +94 -0
  73. smallworld/emulators/panda/machdefs/mips64.py +91 -0
  74. smallworld/emulators/panda/machdefs/ppc.py +79 -0
  75. smallworld/emulators/panda/panda.py +575 -0
  76. smallworld/emulators/unicorn/__init__.py +13 -0
  77. smallworld/emulators/unicorn/machdefs/__init__.py +28 -0
  78. smallworld/emulators/unicorn/machdefs/aarch64.py +310 -0
  79. smallworld/emulators/unicorn/machdefs/amd64.py +326 -0
  80. smallworld/emulators/unicorn/machdefs/arm.py +321 -0
  81. smallworld/emulators/unicorn/machdefs/i386.py +137 -0
  82. smallworld/emulators/unicorn/machdefs/machdef.py +117 -0
  83. smallworld/emulators/unicorn/machdefs/mips.py +202 -0
  84. smallworld/emulators/unicorn/unicorn.py +684 -0
  85. smallworld/exceptions/__init__.py +5 -0
  86. smallworld/exceptions/exceptions.py +85 -0
  87. smallworld/exceptions/unstable/__init__.py +1 -0
  88. smallworld/exceptions/unstable/exceptions.py +25 -0
  89. smallworld/extern/__init__.py +4 -0
  90. smallworld/extern/ctypes.py +94 -0
  91. smallworld/extern/unstable/__init__.py +1 -0
  92. smallworld/extern/unstable/ghidra.py +129 -0
  93. smallworld/helpers.py +107 -0
  94. smallworld/hinting/__init__.py +8 -0
  95. smallworld/hinting/hinting.py +214 -0
  96. smallworld/hinting/hints.py +427 -0
  97. smallworld/hinting/unstable/__init__.py +2 -0
  98. smallworld/hinting/utils.py +19 -0
  99. smallworld/instructions/__init__.py +18 -0
  100. smallworld/instructions/aarch64.py +20 -0
  101. smallworld/instructions/arm.py +18 -0
  102. smallworld/instructions/bsid.py +67 -0
  103. smallworld/instructions/instructions.py +258 -0
  104. smallworld/instructions/mips.py +21 -0
  105. smallworld/instructions/x86.py +100 -0
  106. smallworld/logging.py +90 -0
  107. smallworld/platforms.py +95 -0
  108. smallworld/py.typed +0 -0
  109. smallworld/state/__init__.py +6 -0
  110. smallworld/state/cpus/__init__.py +32 -0
  111. smallworld/state/cpus/aarch64.py +563 -0
  112. smallworld/state/cpus/amd64.py +676 -0
  113. smallworld/state/cpus/arm.py +630 -0
  114. smallworld/state/cpus/cpu.py +71 -0
  115. smallworld/state/cpus/i386.py +239 -0
  116. smallworld/state/cpus/mips.py +374 -0
  117. smallworld/state/cpus/mips64.py +372 -0
  118. smallworld/state/cpus/powerpc.py +229 -0
  119. smallworld/state/cpus/riscv.py +357 -0
  120. smallworld/state/cpus/xtensa.py +80 -0
  121. smallworld/state/memory/__init__.py +7 -0
  122. smallworld/state/memory/code.py +70 -0
  123. smallworld/state/memory/elf/__init__.py +3 -0
  124. smallworld/state/memory/elf/elf.py +564 -0
  125. smallworld/state/memory/elf/rela/__init__.py +32 -0
  126. smallworld/state/memory/elf/rela/aarch64.py +27 -0
  127. smallworld/state/memory/elf/rela/amd64.py +32 -0
  128. smallworld/state/memory/elf/rela/arm.py +51 -0
  129. smallworld/state/memory/elf/rela/i386.py +32 -0
  130. smallworld/state/memory/elf/rela/mips.py +45 -0
  131. smallworld/state/memory/elf/rela/ppc.py +45 -0
  132. smallworld/state/memory/elf/rela/rela.py +63 -0
  133. smallworld/state/memory/elf/rela/riscv64.py +27 -0
  134. smallworld/state/memory/elf/rela/xtensa.py +15 -0
  135. smallworld/state/memory/elf/structs.py +55 -0
  136. smallworld/state/memory/heap.py +85 -0
  137. smallworld/state/memory/memory.py +181 -0
  138. smallworld/state/memory/stack/__init__.py +31 -0
  139. smallworld/state/memory/stack/aarch64.py +22 -0
  140. smallworld/state/memory/stack/amd64.py +42 -0
  141. smallworld/state/memory/stack/arm.py +66 -0
  142. smallworld/state/memory/stack/i386.py +22 -0
  143. smallworld/state/memory/stack/mips.py +34 -0
  144. smallworld/state/memory/stack/mips64.py +34 -0
  145. smallworld/state/memory/stack/ppc.py +34 -0
  146. smallworld/state/memory/stack/riscv.py +22 -0
  147. smallworld/state/memory/stack/stack.py +127 -0
  148. smallworld/state/memory/stack/xtensa.py +34 -0
  149. smallworld/state/models/__init__.py +6 -0
  150. smallworld/state/models/mmio.py +186 -0
  151. smallworld/state/models/model.py +163 -0
  152. smallworld/state/models/posix.py +455 -0
  153. smallworld/state/models/x86/__init__.py +2 -0
  154. smallworld/state/models/x86/microsoftcdecl.py +35 -0
  155. smallworld/state/models/x86/systemv.py +240 -0
  156. smallworld/state/state.py +962 -0
  157. smallworld/state/unstable/__init__.py +0 -0
  158. smallworld/state/unstable/elf.py +393 -0
  159. smallworld/state/x86_registers.py +30 -0
  160. smallworld/utils.py +935 -0
  161. smallworld_re-1.0.0.dist-info/LICENSE.txt +21 -0
  162. smallworld_re-1.0.0.dist-info/METADATA +189 -0
  163. smallworld_re-1.0.0.dist-info/RECORD +166 -0
  164. smallworld_re-1.0.0.dist-info/WHEEL +5 -0
  165. smallworld_re-1.0.0.dist-info/entry_points.txt +2 -0
  166. smallworld_re-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,100 @@
1
+ # mypy: ignore-errors
2
+ import logging
3
+
4
+ import networkx as nx
5
+
6
+ from .. import hinting
7
+ from . import analysis
8
+
9
+ hinter = hinting.get_hinter(__name__)
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class DefUseGraph(nx.MultiDiGraph):
14
+ def add_def_use(self, def_node, use_node, def_info, use_info, color):
15
+ self.add_edges_from(
16
+ [
17
+ (
18
+ def_node,
19
+ use_node,
20
+ {"def_info": def_info, "use_info": use_info, "color": color},
21
+ )
22
+ ]
23
+ )
24
+
25
+
26
+ du_graph = DefUseGraph()
27
+ color2nodeDv = {}
28
+
29
+
30
+ class ColorizerSummary(analysis.Filter):
31
+ name = "colorizer_summary"
32
+ description = "summarized version of colorizer"
33
+ version = "0.0.1"
34
+
35
+ @staticmethod
36
+ def dynamic_value_summary(hint: hinting.Hint):
37
+ # print(hint)
38
+ # instr_node = hint.instruction
39
+ # du_graph.add_node(instr_node)
40
+ du_graph.add_node(hint.pc)
41
+ if type(hint) is hinting.DynamicRegisterValueProbHint:
42
+ dv_info = [
43
+ ("prob", hint.prob),
44
+ ("type", "reg"),
45
+ ("reg_name", hint.reg_name),
46
+ ]
47
+ elif type(hint) is hinting.DynamicMemoryValueProbHint:
48
+ dv_info = [
49
+ ("prob", hint.prob),
50
+ ("type", "mem"),
51
+ ("base", hint.base),
52
+ ("index", hint.index),
53
+ ("scale", hint.scale),
54
+ ("offset", hint.offset),
55
+ ]
56
+ else:
57
+ assert 1 == 0
58
+ if hint.new:
59
+ # this is a new color so its either an input or a write of a computed value
60
+ assert "def" in hint.message
61
+ assert hint.color not in color2nodeDv
62
+ if hint.use:
63
+ assert "read" in hint.message
64
+ # color2nodeDv[hint.color] = (instr_node, dv_info)
65
+ color2nodeDv[hint.color] = (hint.pc, dv_info)
66
+ else:
67
+ assert "write" in hint.message
68
+ color2nodeDv[hint.color] = (hint.pc, dv_info)
69
+ # if its a read then
70
+ # hallucinate a node representing the creation of that color
71
+ if hint.use:
72
+ color_node = f"input color-{hint.color}"
73
+ du_graph.add_node(color_node)
74
+ color2nodeDv[hint.color] = (color_node, None)
75
+ # and an edge between that and this instruction
76
+ du_graph.add_def_use(
77
+ color_node, hint.pc, color_node, dv_info, hint.color
78
+ )
79
+ else:
80
+ # not a new color. so its a flow
81
+ assert "def" not in hint.message
82
+ # we should never see new && !use since that is just a value copy
83
+
84
+ # assert hint.use
85
+ (def_node, def_dv_info) = color2nodeDv[hint.color]
86
+ du_graph.add_def_use(def_node, hint.pc, def_dv_info, dv_info, hint.color)
87
+
88
+ def activate(self):
89
+ # pdb.set_trace()
90
+ print("activating colorizer_summary")
91
+ self.listen(hinting.DynamicRegisterValueProbHint, self.dynamic_value_summary)
92
+ self.listen(hinting.DynamicMemoryValueProbHint, self.dynamic_value_summary)
93
+
94
+ def deactivate(self):
95
+ # import pdb
96
+ # pdb.set_trace()
97
+
98
+ jsg = nx.node_link_data(du_graph)
99
+ with open("dugraph.json", "w") as j:
100
+ j.write(str(jsg))
@@ -0,0 +1,14 @@
1
+ from .field_analysis import (
2
+ FDAState,
3
+ FieldDetectionAnalysis,
4
+ ForcedFieldDetectionAnalysis,
5
+ )
6
+ from .malloc import FreeModel, MallocModel
7
+
8
+ __all__ = [
9
+ "FDAState",
10
+ "FieldDetectionAnalysis",
11
+ "ForcedFieldDetectionAnalysis",
12
+ "FreeModel",
13
+ "MallocModel",
14
+ ]
@@ -0,0 +1,536 @@
1
+ import logging
2
+ import typing
3
+
4
+ import angr
5
+ import claripy
6
+
7
+ from ... import analyses, emulators, exceptions, hinting, platforms, state
8
+ from ...emulators.angr.exceptions import PathTerminationSignal
9
+ from .. import forced_exec, underlays
10
+ from .guards import GuardTrackingScratchPlugin
11
+ from .hints import (
12
+ ClaripySerializable,
13
+ FieldEventHint,
14
+ PartialBitFieldAccessHint,
15
+ PartialByteFieldAccessHint,
16
+ PartialByteFieldWriteHint,
17
+ TrackedFieldHint,
18
+ UnknownFieldHint,
19
+ )
20
+
21
+ log = logging.getLogger(__name__)
22
+ hinter = hinting.get_hinter(__name__)
23
+
24
+ # Tell angr to be quiet please
25
+ logging.getLogger("angr").setLevel(logging.WARNING)
26
+
27
+
28
+ class FDAState:
29
+ def __init__(self):
30
+ self.fda_labels = set()
31
+ self.fda_addr_to_label = dict()
32
+ self.fda_label_to_addr = dict()
33
+ self.fda_mem_ranges = set()
34
+ self.fda_bindings = dict()
35
+ self.fda_byte_labels = set()
36
+ self.fda_addr_to_unk_label = dict()
37
+
38
+
39
+ class FieldDetectionMixin(underlays.AnalysisUnderlay):
40
+ """Analysis comparing state labels to field accesses.
41
+
42
+ This assumes that memory labels correspond to fields
43
+ of a structure, or a struct-coded buffer.
44
+
45
+ The hypothesis is that any memory accesses that don't correspond
46
+ to a defined field mean that your understanding of
47
+ the as-implemented program is incorrect.
48
+
49
+ This is an initial experiment,
50
+ but the goal is to either help lift data formats,
51
+ or compare a program against a known format.
52
+
53
+ This approach works best against fixed-size binary data formats.
54
+ It's got a limited ability to handle run-length-encoded formats
55
+ by concretizing the run lengths.
56
+ Attempting to handle a delimited or text-based format
57
+ will likely lead to insanity.
58
+ """
59
+
60
+ name = "Field Detection Analysis"
61
+ version = "0.0"
62
+ description = "Detects discrepancies between labels and field accesses"
63
+
64
+ halt_on_hint = True
65
+
66
+ def translate_global_addr(self, fda, addr):
67
+ # Convert address into a label
68
+ for r, label in fda.fda_addr_to_label.items():
69
+ if addr in r:
70
+ return label
71
+ return "UNKNOWN"
72
+
73
+ def add_unk_label(self, emu, fda, addr, expr):
74
+ size = len(expr) // 8
75
+ r = range(addr, addr + size)
76
+ label = f"UNK.{hex(addr)}"
77
+ sym = claripy.BVS(f"UNK.{hex(addr)}", size * 8)
78
+ emu.add_constraint(sym == expr)
79
+
80
+ fda.fda_addr_to_unk_label[r] = label
81
+ return sym
82
+
83
+ def mem_read_hook(self, emu, addr, size, expr):
84
+ fda = emu.get_extension("fda")
85
+ cheat = self.translate_global_addr(fda, addr)
86
+ log.warning(
87
+ f"{hex(emu.state._ip.concrete_value)}: READING {hex(addr)} - {hex(addr + size)} ({cheat})"
88
+ )
89
+ log.warning(f" {expr}")
90
+
91
+ r = range(addr, addr + size)
92
+ if r in fda.fda_addr_to_unk_label:
93
+ log.warning(" Read from already-hinted field")
94
+ return None
95
+
96
+ if expr.op == "Reverse":
97
+ # Bytes are reversed.
98
+ # angr's memory is big-endian,
99
+ # and the program interpreted it as a little-endian value.
100
+ expr = expr.args[0]
101
+
102
+ if expr.op == "BVV":
103
+ # This is a concrete value. These are safe
104
+ return None
105
+
106
+ if expr.op == "BVS":
107
+ # This is a symbol. It's okay if it's a field, or an UNK field
108
+ label = expr.args[0].split("_")[0]
109
+ if label in fda.fda_labels or label.startswith("UNK."):
110
+ return
111
+
112
+ hint = UnknownFieldHint(
113
+ pc=emu.read_register("pc"),
114
+ guards=list(
115
+ map(
116
+ lambda x: (x[0], ClaripySerializable(x[1])),
117
+ filter(lambda x: x[1].op != "BoolV", emu.state.scratch.guards),
118
+ )
119
+ ),
120
+ address=addr,
121
+ size=size,
122
+ expr=str(expr),
123
+ )
124
+ hinter.info(hint)
125
+ if self.halt_on_hint:
126
+ raise PathTerminationSignal()
127
+ else:
128
+ self.add_unk_label(emu, fda, addr, expr)
129
+ return None
130
+
131
+ if expr.op == "Extract":
132
+ # This is a slice of another expression.
133
+ if expr.args[2].op == "BVS":
134
+ # This is a slice of a symbol, which means
135
+ # the program read part of a field we labeled.
136
+ # Dollars to ducats this means our labeling isn't precise enough.
137
+ var = expr.args[2]
138
+ label = var.args[0].split("_")[0]
139
+
140
+ if var.args[0] not in fda.fda_labels:
141
+ # This is not a label we know
142
+ hint = UnknownFieldHint(
143
+ pc=emu.read_register("pc"),
144
+ guards=list(
145
+ map(
146
+ lambda x: (x[0], ClaripySerializable(x[1])),
147
+ filter(
148
+ lambda x: x[1].op != "BoolV",
149
+ emu.state.scratch.guards,
150
+ ),
151
+ )
152
+ ),
153
+ address=addr,
154
+ size=size,
155
+ access="read",
156
+ expr=str(var),
157
+ )
158
+ else:
159
+ # This is a partial read from a known field
160
+ start = expr.args[0]
161
+ end = expr.args[1]
162
+
163
+ # Angr's bit numbering is annoying.
164
+ r = fda.fda_label_to_addr[var.args[0]]
165
+ field_size = r.stop - r.start
166
+ start = field_size * 8 - start - 1
167
+ end = field_size * 8 - end
168
+
169
+ if start % 8 != 0 or end % 8 != 0:
170
+ hint = PartialBitFieldAccessHint(
171
+ pc=emu.read_register("pc"),
172
+ guards=list(
173
+ map(
174
+ lambda x: (x[0], ClaripySerializable(x[1])),
175
+ filter(
176
+ lambda x: x[1].op != "BoolV",
177
+ emu.state.scratch.guards,
178
+ ),
179
+ )
180
+ ),
181
+ address=addr,
182
+ size=size,
183
+ access="read",
184
+ label=var.args[0],
185
+ start=start,
186
+ end=end,
187
+ )
188
+ else:
189
+ hint = PartialByteFieldAccessHint(
190
+ pc=emu.read_register("pc"),
191
+ guards=list(
192
+ map(
193
+ lambda x: (x[0], ClaripySerializable(x[1])),
194
+ filter(
195
+ lambda x: x[1].op != "BoolV",
196
+ emu.state.scratch.guards,
197
+ ),
198
+ )
199
+ ),
200
+ address=addr,
201
+ size=size,
202
+ access="read",
203
+ label=var.args[0],
204
+ start=start // 8,
205
+ end=end // 8,
206
+ )
207
+ hinter.info(hint)
208
+ if self.halt_on_hint:
209
+ raise PathTerminationSignal()
210
+ else:
211
+ self.add_unk_label(emu, fda, addr, expr)
212
+ return None
213
+
214
+ # This is a complex expression.
215
+ # We're accessing the results of a computation
216
+ hint = UnknownFieldHint(
217
+ pc=emu.read_register("pc"),
218
+ guards=list(
219
+ map(
220
+ lambda x: (x[0], ClaripySerializable(x[1])),
221
+ filter(lambda x: x[1].op != "BoolV", emu.state.scratch.guards),
222
+ )
223
+ ),
224
+ address=addr,
225
+ size=size,
226
+ access="read",
227
+ expr=str(expr),
228
+ )
229
+ hinter.info(hint)
230
+ if self.halt_on_hint:
231
+ raise PathTerminationSignal()
232
+ else:
233
+ self.add_unk_label(emu, fda, addr, expr)
234
+ return None
235
+
236
+ def mem_write_hook(self, emu, addr, size, expr):
237
+ fda = emu.get_extension("fda")
238
+ cheat = self.translate_global_addr(fda, addr)
239
+ log.warning(
240
+ f"{hex(emu.state._ip.concrete_value)}: WRITING {hex(addr)} - {hex(addr + size)} ({cheat})"
241
+ )
242
+ log.warning(f" {expr}")
243
+
244
+ good = False
245
+ bad = False
246
+
247
+ r = range(addr, addr + size)
248
+ if r in fda.fda_addr_to_unk_label:
249
+ label = fda.fda_addr_to_unk_label[r]
250
+ log.warning(f" Write to already-hinted field {label}")
251
+ sym = claripy.BVS(label, size * 8)
252
+ emu.add_constraint(sym == expr)
253
+ emu.state.inspect.mem_write_expr = sym
254
+
255
+ return
256
+
257
+ for r, label in fda.fda_addr_to_label.items():
258
+ if addr in r:
259
+ # Write is within an existing field
260
+ if r.start == addr and r.stop == addr + size:
261
+ # Write lines up with the existing field.
262
+ # Create a new symbol for the new def of this field.
263
+ log.warning(f" Write to entire field {cheat}")
264
+ sym = claripy.BVS(label, size * 8)
265
+ emu.add_constraint(sym == expr)
266
+ emu.state.inspect.mem_write_expr = sym
267
+ fda.fda_bindings[sym.args[0]] = expr
268
+ good = True
269
+ else:
270
+ # Write does not line up with the existing field
271
+ start = addr - r.start
272
+ end = addr + size - r.start
273
+ hint = PartialByteFieldWriteHint(
274
+ pc=emu.read_register("pc"),
275
+ guards=list(
276
+ map(
277
+ lambda x: (x[0], ClaripySerializable(x[1])),
278
+ filter(
279
+ lambda x: x[1].op != "BoolV",
280
+ emu.state.scratch.guards,
281
+ ),
282
+ )
283
+ ),
284
+ address=addr,
285
+ size=size,
286
+ label=label,
287
+ start=start,
288
+ end=end,
289
+ expr=str(expr),
290
+ )
291
+ hinter.info(hint)
292
+ bad = True
293
+ if bad and good:
294
+ log.error("Write was complete and partial; your labels overlap.")
295
+ raise exceptions.ConfigurationError("Overlapping labels")
296
+ elif bad:
297
+ if self.halt_on_hint:
298
+ raise PathTerminationSignal()
299
+ else:
300
+ sym = self.add_unk_label(emu, fda, addr, expr)
301
+ emu.state.inspect.mem_write_expr = sym
302
+ return
303
+ elif good:
304
+ return
305
+ else:
306
+ # Write doesn't overlap any known fields
307
+ hint = UnknownFieldHint(
308
+ pc=emu.read_register("pc"),
309
+ guards=list(
310
+ map(
311
+ lambda x: (x[0], ClaripySerializable(x[1])),
312
+ filter(lambda x: x[1].op != "BoolV", emu.state.scratch.guards),
313
+ )
314
+ ),
315
+ address=addr,
316
+ size=size,
317
+ access="write",
318
+ expr=str(expr),
319
+ )
320
+ hinter.info(hint)
321
+ if self.halt_on_hint:
322
+ raise PathTerminationSignal()
323
+ else:
324
+ sym = self.add_unk_label(emu, fda, addr, expr)
325
+ emu.state.inspect.mem_write_expr = sym
326
+ return
327
+
328
+ def angr_preinit(self, emu):
329
+ preset = angr.SimState._presets["default"].copy()
330
+ preset.add_default_plugin("scratch", GuardTrackingScratchPlugin)
331
+ emu._plugin_preset = preset
332
+
333
+ def run(self, machine):
334
+ # Set up the filter analysis
335
+ fda = FDAState()
336
+ filt = FieldDetectionFilter()
337
+ filt.activate()
338
+
339
+ # Set up the emulator
340
+
341
+ machine.apply(self.emulator)
342
+ self.emulator.add_extension("fda", fda)
343
+
344
+ # Capture the labeled memory ranges
345
+ for s in machine:
346
+ if isinstance(s, state.memory.Memory):
347
+ start = s.address
348
+ end = start + s.get_capacity()
349
+ log.warning(f"{hex(start)} - {hex(end)}")
350
+ for off, val in s.items():
351
+ label = val.get_label()
352
+ if label is not None:
353
+ # This is a labeled value. It represents a field we want to track.
354
+ val_start = start + off
355
+ val_end = val_start + val.get_size()
356
+ log.warning(
357
+ f" {hex(val_start)} - {hex(val_end)}: {val} := {label}"
358
+ )
359
+ hint = TrackedFieldHint(
360
+ message="Tracking new field",
361
+ address=val_start,
362
+ size=val.get_size(),
363
+ label=label,
364
+ )
365
+ hinter.info(hint)
366
+
367
+ if label in fda.fda_labels:
368
+ log.error(
369
+ f"You reused label {label}; please give it a unique name"
370
+ )
371
+ raise exceptions.ConfigurationError("Duplicate field name")
372
+
373
+ r = range(val_start, val_end)
374
+ fda.fda_labels.add(label)
375
+ fda.fda_addr_to_label[r] = label
376
+ fda.fda_label_to_addr[label] = r
377
+ fda.fda_bindings[label] = claripy.BVS(
378
+ label, val.get_size() * 8, explicit_name=True
379
+ )
380
+ fda.fda_mem_ranges.add((val_start, val_end))
381
+
382
+ for start, end in fda.fda_mem_ranges:
383
+ self.emulator.hook_memory_read_symbolic(start, end, self.mem_read_hook)
384
+ self.emulator.hook_memory_write_symbolic(start, end, self.mem_write_hook)
385
+
386
+ self.execute()
387
+
388
+ log.warning(self.emulator.mgr)
389
+ log.warning(self.emulator.mgr.errored)
390
+
391
+ def state_visitor(emu: emulators.Emulator) -> None:
392
+ if not isinstance(emu, emulators.AngrEmulator):
393
+ raise TypeError(type(emu))
394
+ fda = emu.get_extension("fda")
395
+ for start, end in fda.fda_mem_ranges:
396
+ emu.unhook_memory_read(start, end)
397
+ emu.unhook_memory_write(start, end)
398
+
399
+ log.warning(f"State at {emu.state._ip}:")
400
+ log.warning(" Guards:")
401
+ for ip, guard in emu.state.scratch.guards:
402
+ if guard.op == "BoolV":
403
+ continue
404
+ log.warning(f" {hex(ip)}: {guard}")
405
+ log.warning(" Fields:")
406
+ for r in fda.fda_addr_to_label:
407
+ val = emu.state.memory.load(r.start, r.stop - r.start)
408
+ if len(val.variables) == 1:
409
+ # val is extremely likely a bound symbol.
410
+ # Fetch the binding
411
+ (label,) = val.variables
412
+ if label in fda.fda_bindings:
413
+ val = fda.fda_bindings[label]
414
+ else:
415
+ log.error(f" Unknown variable {label}")
416
+
417
+ log.warning(f" {hex(r.start)} - {hex(r.stop)}: {label} = {val}")
418
+
419
+ self.emulator.visit_states(state_visitor, stash="deadended")
420
+ filt.deactivate()
421
+
422
+
423
+ class FieldDetectionFilter(analyses.Filter):
424
+ """Secondary field definition analysis.
425
+
426
+ This picks up patterns that aren't noticeable from any
427
+ single field access detection.
428
+ """
429
+
430
+ name = "field-detection-filter"
431
+ version = "0.0"
432
+ description = ""
433
+
434
+ def __init__(self):
435
+ super().__init__()
436
+ self.active = True
437
+ self.partial_ranges = dict()
438
+
439
+ def analyze(self, hint: hinting.Hint):
440
+ # Step 0: Print hints in a sane format.
441
+ # The raw hint logging is unreadable.
442
+ if not isinstance(hint, FieldEventHint):
443
+ return
444
+ hint.pp(log.error)
445
+
446
+ if isinstance(hint, PartialByteFieldAccessHint):
447
+ # Step 1: If this is a field access, remember it for later
448
+ field = (hint.start, hint.end)
449
+ self.partial_ranges.setdefault(hint.label, dict()).setdefault(
450
+ field, list()
451
+ ).append(hint)
452
+
453
+ def activate(self):
454
+ self.listen(FieldEventHint, self.analyze)
455
+
456
+ def deactivate(self):
457
+ super().deactivate()
458
+ if not self.active:
459
+ return
460
+ self.active = False
461
+ # Post-process results to detect trends we'd miss on single hints
462
+ solver = claripy.solvers.Solver()
463
+
464
+ for label, fields in self.partial_ranges.items():
465
+ # Detect multiple fields at the same label/offset
466
+ # Step 1: Collate the field info for this label
467
+ fields_by_start = dict()
468
+ for (start, end), hints in fields.items():
469
+ guards = fields_by_start.setdefault(start, dict()).setdefault(
470
+ end, set()
471
+ )
472
+ for hint in hints:
473
+ # Unify all guards across all hints.
474
+ # I'm only going to care about pairwise matches.
475
+ guards.update(
476
+ map(lambda x: x.expr, map(lambda x: x[1], hint.guards))
477
+ )
478
+
479
+ # Step 2: See if we actually have overlaps
480
+ for start, rest in fields_by_start.items():
481
+ if len(rest) > 1:
482
+ log.error(f"Multiple field defs starting at {label}[{start}]:")
483
+ for end, guards in rest.items():
484
+ log.error(f" {label}[{start}:{end}]")
485
+
486
+ # Step 3: see if we can pinpoint the controlling variables
487
+ # The way angr works, two states will have compilmentary guards;
488
+ # guard vs !guard. If we can find these expressions,
489
+ # we have a short list of known fields to consider
490
+ control_vars = set()
491
+ for a_end, a_guards in rest.items():
492
+ for b_end, b_guards in rest.items():
493
+ for a_guard in a_guards:
494
+ for b_guard in b_guards:
495
+ # Forgive me, Prof. Sleator, for I have sinned...
496
+ # I ran an SMT solver in a quadruple for-loop.
497
+ if not solver.satisfiable([a_guard == b_guard]):
498
+ control_vars.update(a_guard.variables)
499
+ control_vars.update(b_guard.variables)
500
+ if len(control_vars) != 0:
501
+ log.error(
502
+ " One or more of the following fields may control the format,"
503
+ )
504
+ log.error(" either as a flag, type code or length:")
505
+ for var in control_vars:
506
+ log.error(f" {var}")
507
+
508
+
509
+ class FieldDetectionAnalysis(FieldDetectionMixin, underlays.BasicAnalysisUnderlay):
510
+ """Detect fields on full path exploration"""
511
+
512
+ name = "Field Detection Analysis"
513
+ version = "0.0"
514
+ description = "Detects discrepancies between labels and field accesses"
515
+
516
+ def __init__(self, platform: platforms.Platform):
517
+ self.platform = platform
518
+ self.emulator = emulators.AngrEmulator(self.platform, preinit=self.angr_preinit)
519
+
520
+
521
+ class ForcedFieldDetectionAnalysis(
522
+ FieldDetectionMixin, forced_exec.ForcedExecutionUnderlay
523
+ ):
524
+ name = "Field Detection Analysis - Forced"
525
+ version = "0.0"
526
+ description = "Detects discrepancies between labels and field accesses"
527
+
528
+ halt_on_hint = False
529
+
530
+ def __init__(
531
+ self, platform: platforms.Platform, trace: typing.List[typing.Dict[str, int]]
532
+ ):
533
+ self.platform = platform
534
+ self.emulator = emulators.AngrEmulator(self.platform, preinit=self.angr_preinit)
535
+ self.emulator.enable_linear()
536
+ super().__init__(trace)
@@ -0,0 +1,26 @@
1
+ from ...emulators.angr.scratch import ExpandedScratchPlugin
2
+
3
+
4
+ class GuardTrackingScratchPlugin(ExpandedScratchPlugin):
5
+ def __init__(self, scratch=None):
6
+ self._guard = None
7
+ self.guards = []
8
+ super().__init__(scratch=scratch)
9
+ if scratch is not None:
10
+ self.guards.extend(scratch.guards)
11
+
12
+ @property
13
+ def guard(self):
14
+ return self._guard
15
+
16
+ @guard.setter
17
+ def guard(self, expr):
18
+ self._guard = expr
19
+ if self.state is not None:
20
+ out = (self.state._ip.concrete_value, expr)
21
+ if len(self.guards) == 0:
22
+ self.guards.append(out)
23
+ elif self.guards[-1][0] != out[0]:
24
+ self.guards.append(out)
25
+ else:
26
+ self.guards[-1] = out