smallworld-re 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. smallworld/__init__.py +35 -0
  2. smallworld/analyses/__init__.py +14 -0
  3. smallworld/analyses/analysis.py +88 -0
  4. smallworld/analyses/code_coverage.py +31 -0
  5. smallworld/analyses/colorizer.py +682 -0
  6. smallworld/analyses/colorizer_summary.py +100 -0
  7. smallworld/analyses/field_detection/__init__.py +14 -0
  8. smallworld/analyses/field_detection/field_analysis.py +536 -0
  9. smallworld/analyses/field_detection/guards.py +26 -0
  10. smallworld/analyses/field_detection/hints.py +133 -0
  11. smallworld/analyses/field_detection/malloc.py +211 -0
  12. smallworld/analyses/forced_exec/__init__.py +3 -0
  13. smallworld/analyses/forced_exec/forced_exec.py +87 -0
  14. smallworld/analyses/underlays/__init__.py +4 -0
  15. smallworld/analyses/underlays/basic.py +13 -0
  16. smallworld/analyses/underlays/underlay.py +31 -0
  17. smallworld/analyses/unstable/__init__.py +4 -0
  18. smallworld/analyses/unstable/angr/__init__.py +0 -0
  19. smallworld/analyses/unstable/angr/base.py +12 -0
  20. smallworld/analyses/unstable/angr/divergence.py +274 -0
  21. smallworld/analyses/unstable/angr/model.py +383 -0
  22. smallworld/analyses/unstable/angr/nwbt.py +63 -0
  23. smallworld/analyses/unstable/angr/typedefs.py +170 -0
  24. smallworld/analyses/unstable/angr/utils.py +25 -0
  25. smallworld/analyses/unstable/angr/visitor.py +315 -0
  26. smallworld/analyses/unstable/angr_nwbt.py +106 -0
  27. smallworld/analyses/unstable/code_coverage.py +54 -0
  28. smallworld/analyses/unstable/code_reachable.py +44 -0
  29. smallworld/analyses/unstable/control_flow_tracer.py +71 -0
  30. smallworld/analyses/unstable/pointer_finder.py +90 -0
  31. smallworld/arch/__init__.py +0 -0
  32. smallworld/arch/aarch64_arch.py +286 -0
  33. smallworld/arch/amd64_arch.py +86 -0
  34. smallworld/arch/i386_arch.py +44 -0
  35. smallworld/emulators/__init__.py +14 -0
  36. smallworld/emulators/angr/__init__.py +7 -0
  37. smallworld/emulators/angr/angr.py +1652 -0
  38. smallworld/emulators/angr/default.py +15 -0
  39. smallworld/emulators/angr/exceptions.py +7 -0
  40. smallworld/emulators/angr/exploration/__init__.py +9 -0
  41. smallworld/emulators/angr/exploration/bounds.py +27 -0
  42. smallworld/emulators/angr/exploration/default.py +17 -0
  43. smallworld/emulators/angr/exploration/terminate.py +22 -0
  44. smallworld/emulators/angr/factory.py +55 -0
  45. smallworld/emulators/angr/machdefs/__init__.py +35 -0
  46. smallworld/emulators/angr/machdefs/aarch64.py +292 -0
  47. smallworld/emulators/angr/machdefs/amd64.py +192 -0
  48. smallworld/emulators/angr/machdefs/arm.py +387 -0
  49. smallworld/emulators/angr/machdefs/i386.py +221 -0
  50. smallworld/emulators/angr/machdefs/machdef.py +138 -0
  51. smallworld/emulators/angr/machdefs/mips.py +184 -0
  52. smallworld/emulators/angr/machdefs/mips64.py +189 -0
  53. smallworld/emulators/angr/machdefs/ppc.py +101 -0
  54. smallworld/emulators/angr/machdefs/riscv.py +261 -0
  55. smallworld/emulators/angr/machdefs/xtensa.py +255 -0
  56. smallworld/emulators/angr/memory/__init__.py +7 -0
  57. smallworld/emulators/angr/memory/default.py +10 -0
  58. smallworld/emulators/angr/memory/fixups.py +43 -0
  59. smallworld/emulators/angr/memory/memtrack.py +105 -0
  60. smallworld/emulators/angr/scratch.py +43 -0
  61. smallworld/emulators/angr/simos.py +53 -0
  62. smallworld/emulators/angr/utils.py +70 -0
  63. smallworld/emulators/emulator.py +1013 -0
  64. smallworld/emulators/hookable.py +252 -0
  65. smallworld/emulators/panda/__init__.py +5 -0
  66. smallworld/emulators/panda/machdefs/__init__.py +28 -0
  67. smallworld/emulators/panda/machdefs/aarch64.py +93 -0
  68. smallworld/emulators/panda/machdefs/amd64.py +71 -0
  69. smallworld/emulators/panda/machdefs/arm.py +89 -0
  70. smallworld/emulators/panda/machdefs/i386.py +36 -0
  71. smallworld/emulators/panda/machdefs/machdef.py +86 -0
  72. smallworld/emulators/panda/machdefs/mips.py +94 -0
  73. smallworld/emulators/panda/machdefs/mips64.py +91 -0
  74. smallworld/emulators/panda/machdefs/ppc.py +79 -0
  75. smallworld/emulators/panda/panda.py +575 -0
  76. smallworld/emulators/unicorn/__init__.py +13 -0
  77. smallworld/emulators/unicorn/machdefs/__init__.py +28 -0
  78. smallworld/emulators/unicorn/machdefs/aarch64.py +310 -0
  79. smallworld/emulators/unicorn/machdefs/amd64.py +326 -0
  80. smallworld/emulators/unicorn/machdefs/arm.py +321 -0
  81. smallworld/emulators/unicorn/machdefs/i386.py +137 -0
  82. smallworld/emulators/unicorn/machdefs/machdef.py +117 -0
  83. smallworld/emulators/unicorn/machdefs/mips.py +202 -0
  84. smallworld/emulators/unicorn/unicorn.py +684 -0
  85. smallworld/exceptions/__init__.py +5 -0
  86. smallworld/exceptions/exceptions.py +85 -0
  87. smallworld/exceptions/unstable/__init__.py +1 -0
  88. smallworld/exceptions/unstable/exceptions.py +25 -0
  89. smallworld/extern/__init__.py +4 -0
  90. smallworld/extern/ctypes.py +94 -0
  91. smallworld/extern/unstable/__init__.py +1 -0
  92. smallworld/extern/unstable/ghidra.py +129 -0
  93. smallworld/helpers.py +107 -0
  94. smallworld/hinting/__init__.py +8 -0
  95. smallworld/hinting/hinting.py +214 -0
  96. smallworld/hinting/hints.py +427 -0
  97. smallworld/hinting/unstable/__init__.py +2 -0
  98. smallworld/hinting/utils.py +19 -0
  99. smallworld/instructions/__init__.py +18 -0
  100. smallworld/instructions/aarch64.py +20 -0
  101. smallworld/instructions/arm.py +18 -0
  102. smallworld/instructions/bsid.py +67 -0
  103. smallworld/instructions/instructions.py +258 -0
  104. smallworld/instructions/mips.py +21 -0
  105. smallworld/instructions/x86.py +100 -0
  106. smallworld/logging.py +90 -0
  107. smallworld/platforms.py +95 -0
  108. smallworld/py.typed +0 -0
  109. smallworld/state/__init__.py +6 -0
  110. smallworld/state/cpus/__init__.py +32 -0
  111. smallworld/state/cpus/aarch64.py +563 -0
  112. smallworld/state/cpus/amd64.py +676 -0
  113. smallworld/state/cpus/arm.py +630 -0
  114. smallworld/state/cpus/cpu.py +71 -0
  115. smallworld/state/cpus/i386.py +239 -0
  116. smallworld/state/cpus/mips.py +374 -0
  117. smallworld/state/cpus/mips64.py +372 -0
  118. smallworld/state/cpus/powerpc.py +229 -0
  119. smallworld/state/cpus/riscv.py +357 -0
  120. smallworld/state/cpus/xtensa.py +80 -0
  121. smallworld/state/memory/__init__.py +7 -0
  122. smallworld/state/memory/code.py +70 -0
  123. smallworld/state/memory/elf/__init__.py +3 -0
  124. smallworld/state/memory/elf/elf.py +564 -0
  125. smallworld/state/memory/elf/rela/__init__.py +32 -0
  126. smallworld/state/memory/elf/rela/aarch64.py +27 -0
  127. smallworld/state/memory/elf/rela/amd64.py +32 -0
  128. smallworld/state/memory/elf/rela/arm.py +51 -0
  129. smallworld/state/memory/elf/rela/i386.py +32 -0
  130. smallworld/state/memory/elf/rela/mips.py +45 -0
  131. smallworld/state/memory/elf/rela/ppc.py +45 -0
  132. smallworld/state/memory/elf/rela/rela.py +63 -0
  133. smallworld/state/memory/elf/rela/riscv64.py +27 -0
  134. smallworld/state/memory/elf/rela/xtensa.py +15 -0
  135. smallworld/state/memory/elf/structs.py +55 -0
  136. smallworld/state/memory/heap.py +85 -0
  137. smallworld/state/memory/memory.py +181 -0
  138. smallworld/state/memory/stack/__init__.py +31 -0
  139. smallworld/state/memory/stack/aarch64.py +22 -0
  140. smallworld/state/memory/stack/amd64.py +42 -0
  141. smallworld/state/memory/stack/arm.py +66 -0
  142. smallworld/state/memory/stack/i386.py +22 -0
  143. smallworld/state/memory/stack/mips.py +34 -0
  144. smallworld/state/memory/stack/mips64.py +34 -0
  145. smallworld/state/memory/stack/ppc.py +34 -0
  146. smallworld/state/memory/stack/riscv.py +22 -0
  147. smallworld/state/memory/stack/stack.py +127 -0
  148. smallworld/state/memory/stack/xtensa.py +34 -0
  149. smallworld/state/models/__init__.py +6 -0
  150. smallworld/state/models/mmio.py +186 -0
  151. smallworld/state/models/model.py +163 -0
  152. smallworld/state/models/posix.py +455 -0
  153. smallworld/state/models/x86/__init__.py +2 -0
  154. smallworld/state/models/x86/microsoftcdecl.py +35 -0
  155. smallworld/state/models/x86/systemv.py +240 -0
  156. smallworld/state/state.py +962 -0
  157. smallworld/state/unstable/__init__.py +0 -0
  158. smallworld/state/unstable/elf.py +393 -0
  159. smallworld/state/x86_registers.py +30 -0
  160. smallworld/utils.py +935 -0
  161. smallworld_re-1.0.0.dist-info/LICENSE.txt +21 -0
  162. smallworld_re-1.0.0.dist-info/METADATA +189 -0
  163. smallworld_re-1.0.0.dist-info/RECORD +166 -0
  164. smallworld_re-1.0.0.dist-info/WHEEL +5 -0
  165. smallworld_re-1.0.0.dist-info/entry_points.txt +2 -0
  166. smallworld_re-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,682 @@
1
+ import base64
2
+ import copy
3
+ import logging
4
+ import random
5
+ import typing
6
+
7
+ import capstone
8
+
9
+ from .. import hinting, state
10
+ from ..emulators import (
11
+ UnicornEmulationMemoryReadError,
12
+ UnicornEmulationMemoryWriteError,
13
+ UnicornEmulator,
14
+ )
15
+ from ..exceptions import AnalysisRunError, EmulationBounds
16
+ from ..instructions import (
17
+ BSIDMemoryReferenceOperand,
18
+ Instruction,
19
+ Operand,
20
+ RegisterOperand,
21
+ )
22
+ from . import analysis
23
+
24
+ logger = logging.getLogger(__name__)
25
+ hinter = hinting.get_hinter(__name__)
26
+
27
+ MIN_ACCEPTABLE_COLOR_INT = 20
28
+ BAD_COLOR = "BAD_COLOR"
29
+
30
+ Colors = typing.Dict[str, typing.Tuple[Operand, int, int, Instruction, int]]
31
+
32
+
33
+ class Colorizer(analysis.Analysis):
34
+ """A simple kind of data flow analysis via tracking distinct values (colors)
35
+ and employing instruction use/def analysis
36
+
37
+ We run multiple micro-executions of the code starting from same entry. At
38
+ the start of each, we randomize register values that have not already been
39
+ initialized. We maintain a "colors" map from values to when we first
40
+ observed them. This map is initially empty. Before emulating an instruction,
41
+ we examine the values (registers and memory) it will read. If any are NOT in
42
+ the colors map, that is the initial sighting of that value and we emit a
43
+ hint to that effect. If any color IS already in the map, then that is a flow
44
+ from the time at which that value was first observed to this
45
+ instruction. Similarly, after emulating an instruction, we examine every
46
+ value (register and memory) written. If a value is not in the colors map, it
47
+ is a new, computed result and we hint about its creation. If it is in the
48
+ colors map, we do nothing since it just a copy.
49
+
50
+ Whilst looking at reads and writes for instructions, we hint if any
51
+ correspond to unavailable memory.
52
+
53
+ Arguments:
54
+ num_micro_executions: The number of micro-executions to run.
55
+ num_insns: The number of instructions to micro-execute.
56
+ seed: Random seed for test stability, or None.
57
+
58
+ """
59
+
60
+ name = "colorizer"
61
+ description = "it's almost taint"
62
+ version = "0.0.1"
63
+
64
+ def __init__(
65
+ self,
66
+ *args,
67
+ num_micro_executions: int = 5,
68
+ num_insns: int = 200,
69
+ seed: typing.Optional[int] = 99,
70
+ **kwargs
71
+ # self, *args, num_micro_executions: int = 1, num_insns: int = 10, **kwargs
72
+ ):
73
+ super().__init__(*args, **kwargs)
74
+ # Create our own random so we can avoid contention.
75
+ self.random = random.Random()
76
+ self.seed = seed
77
+ self.num_micro_executions = num_micro_executions
78
+ self.num_insns = num_insns
79
+
80
+ def _get_instr_at_pc(self, pc: int) -> capstone.CsInsn:
81
+ code = self.emu.read_memory(pc, 15) # longest possible instruction
82
+ if code is None:
83
+ raise AnalysisRunError(
84
+ "Unable to read next instruction out of emulator memory"
85
+ )
86
+ (insns, disas) = self.emu._disassemble(code, pc, 2)
87
+ insn = insns[0]
88
+ return insn
89
+
90
+ def _operand_size(self, operand: Operand) -> int:
91
+ if type(operand) is RegisterOperand:
92
+ # return size of a reg based on its name
93
+ return getattr(self.cpu, operand.name).size
94
+ elif type(operand) is BSIDMemoryReferenceOperand:
95
+ # memory operand knows its size
96
+ return operand.size
97
+ return 0
98
+
99
+ def run(self, machine: state.Machine) -> None:
100
+ # note that start pc is in start_cpustate
101
+
102
+ # collect hints for each microexecution, in a list of lists
103
+ hint_list_list: typing.List[typing.List[hinting.Hint]] = []
104
+
105
+ self.orig_machine = copy.deepcopy(machine)
106
+ self.orig_cpu = self.orig_machine.get_cpu()
107
+ self.platform = self.orig_cpu.platform
108
+
109
+ for i in range(self.num_micro_executions):
110
+ logger.info("-------------------------")
111
+ logger.info(f"micro exec #{i}")
112
+
113
+ if self.seed is not None:
114
+ self.random.seed(a=self.seed)
115
+
116
+ self.machine = copy.deepcopy(self.orig_machine)
117
+ self.cpu = self.machine.get_cpu()
118
+ self.emu = UnicornEmulator(self.platform)
119
+ self.machine.apply(self.emu)
120
+
121
+ # initialize registers with random values
122
+ self._randomize_registers()
123
+
124
+ # map from color values to first use / def
125
+ self.colors: Colors = {}
126
+
127
+ hint_list: typing.List[hinting.Hint] = []
128
+ for j in range(self.num_insns):
129
+ logger.info(f"instr_count = {j}")
130
+ # obtain instr about to be emulated
131
+ pc = self.emu.read_register("pc")
132
+ if pc in self.emu.get_exit_points():
133
+ break
134
+ cs_insn = self._get_instr_at_pc(pc)
135
+ sw_insn = Instruction.from_capstone(cs_insn)
136
+
137
+ logger.debug(sw_insn)
138
+
139
+ # pull state back out of the emulator for inspection
140
+ m = copy.deepcopy(self.machine)
141
+ m.extract(self.emu)
142
+ self.cpu = m.get_cpu()
143
+ # self.cpu = copy.deepcopy(self.machine).extract(self.emu).get_cpu()
144
+ # curr_machine = copy.deepcopy(self.machine)
145
+ # curr_machine.extract(self.emu)
146
+ # curr_machine = self.eself.cpu.load(self.emu)
147
+ # self.cpu = curr_machien.get_cpu()
148
+
149
+ # print(f"pc={pc:x} {sw_insn}")
150
+ # import pdb
151
+ # pdb.set_trace()
152
+
153
+ reads: typing.List[typing.Tuple[Operand, str, int]] = []
154
+ for read_operand in sw_insn.reads:
155
+ logger.debug(f"pc={pc:x} read_operand={read_operand}")
156
+
157
+ if (
158
+ type(read_operand) is RegisterOperand
159
+ and read_operand.name == "rflags"
160
+ ):
161
+ continue
162
+
163
+ sz = self._operand_size(read_operand)
164
+ if type(read_operand) is BSIDMemoryReferenceOperand:
165
+ a = read_operand.address(self.emu)
166
+ ar = (a, a + sz)
167
+ if not self.emu._is_address_range_mapped(ar):
168
+ # at least one byte in this range is not mapped
169
+ # so dont add this read to the list
170
+ continue
171
+ read_operand_color = self._concrete_val_to_color(
172
+ read_operand.concretize(self.emu), sz
173
+ )
174
+ # discard bad colors
175
+ if read_operand_color == BAD_COLOR:
176
+ continue
177
+ # except UnicornEmulationMemoryReadError as e:
178
+ # # ignore bc self.emu.step() will also raise
179
+ # # same error, which will generate a hint
180
+ # pass
181
+ # except Exception as e:
182
+ # import pdb
183
+ # pdb.set_trace()
184
+ # print(e)
185
+ tup = (read_operand, read_operand_color, sz)
186
+ reads.append(tup)
187
+ reads.sort(key=lambda e: e[0].__repr__())
188
+ # logger.info(f"reads: {reads}")
189
+ self._check_colors_instruction_reads(reads, sw_insn, i, j, hint_list)
190
+
191
+ try:
192
+ # print(f"pc={pc:x} {sw_insn}")
193
+ # import pdb
194
+ # pdb.set_trace()
195
+
196
+ self.emu.step()
197
+
198
+ except EmulationBounds:
199
+ # import pdb
200
+ # pdb.set_trace()
201
+ logger.info(
202
+ "emulation complete. encountered exit point or went out of bounds"
203
+ )
204
+ break
205
+ except UnicornEmulationMemoryWriteError as e:
206
+ # import pdb
207
+ # pdb.set_trace()
208
+ for write_operand, conc_val in e.details["writes"]:
209
+ if type(write_operand) is BSIDMemoryReferenceOperand:
210
+ if conc_val is None:
211
+ h = self._mem_unavailable_hint(
212
+ write_operand, e.pc, i, j, False
213
+ )
214
+ hint_list.append(h)
215
+ break
216
+
217
+ except UnicornEmulationMemoryReadError as e:
218
+ # import pdb
219
+ # pdb.set_trace()
220
+ for read_operand in e.details["unmapped_reads"]:
221
+ if type(read_operand) is BSIDMemoryReferenceOperand:
222
+ h = self._mem_unavailable_hint(
223
+ read_operand, e.pc, i, j, True
224
+ )
225
+ hint_list.append(h)
226
+ break
227
+ except Exception as e:
228
+ # emulating this instruction failed
229
+ # import pdb
230
+ # pdb.set_trace()
231
+ import pdb
232
+
233
+ pdb.set_trace()
234
+ exhint = hinting.EmulationException(
235
+ message=f"In analysis, single step raised an exception {e}",
236
+ pc=pc,
237
+ # instruction=sw_insn,
238
+ instruction_num=j,
239
+ exception=str(e),
240
+ )
241
+ hint_list.append(exhint)
242
+ hinter.debug(exhint)
243
+ logger.info(e)
244
+ break
245
+
246
+ writes: typing.List[typing.Tuple[Operand, str, int]] = []
247
+
248
+ # print(sw_insn.writes)
249
+ # import pdb
250
+ # pdb.set_trace()
251
+
252
+ for write_operand in sw_insn.writes:
253
+ logger.debug(f"pc={pc:x} write_operand={write_operand}")
254
+
255
+ if (
256
+ type(write_operand) is RegisterOperand
257
+ and write_operand.name == "rflags"
258
+ ):
259
+ continue
260
+
261
+ sz = self._operand_size(write_operand)
262
+ try:
263
+ write_operand_color = self._concrete_val_to_color(
264
+ write_operand.concretize(self.emu), sz
265
+ )
266
+ # discard bad colors
267
+ if write_operand_color == BAD_COLOR:
268
+ continue
269
+ except Exception as e:
270
+ print(e)
271
+ h = self._mem_unavailable_hint(write_operand, pc, i, j, False)
272
+ hint_list.append(h)
273
+ continue
274
+ tup = (write_operand, write_operand_color, sz)
275
+ writes.append(tup)
276
+ writes.sort(key=lambda e: e[0].__repr__())
277
+ # import pdb
278
+ # pdb.set_trace()
279
+ self._check_colors_instruction_writes(writes, sw_insn, i, j, hint_list)
280
+
281
+ hint_list_list.append(hint_list)
282
+
283
+ logger.info("-------------------------")
284
+
285
+ # if two hints map to the same key then they are in same equivalence class
286
+ def hint_key(hint):
287
+ if type(hint) is hinting.DynamicRegisterValueHint:
288
+ return (
289
+ "dynamic_register_value",
290
+ hint.pc,
291
+ not hint.use,
292
+ hint.color,
293
+ hint.new,
294
+ hint.message,
295
+ hint.reg_name,
296
+ )
297
+ if type(hint) is hinting.DynamicMemoryValueHint:
298
+ return (
299
+ "dynamic_memory_value",
300
+ hint.pc,
301
+ not hint.use,
302
+ hint.color,
303
+ hint.new,
304
+ hint.message,
305
+ hint.base,
306
+ hint.index,
307
+ hint.scale,
308
+ hint.offset,
309
+ )
310
+ if type(hint) is hinting.MemoryUnavailableHint:
311
+ return (
312
+ "memory_unavailable",
313
+ hint.pc,
314
+ hint.size,
315
+ hint.message,
316
+ hint.base_reg_name,
317
+ hint.index_reg_name,
318
+ hint.offset,
319
+ hint.scale,
320
+ )
321
+ if type(hint) is hinting.EmulationException:
322
+ return (
323
+ "emulation_exception",
324
+ hint.pc,
325
+ hint.instruction_num,
326
+ hint.exception,
327
+ )
328
+
329
+ all_hint_keys = set([])
330
+ hk_exemplar = {}
331
+ for hint_list in hint_list_list:
332
+ for hint in hint_list:
333
+ hk = hint_key(hint)
334
+ all_hint_keys.add(hk)
335
+ # keep one exemplar
336
+ if hk not in hk_exemplar:
337
+ hk_exemplar[hk] = hint
338
+
339
+ # import pdb
340
+ # pdb.set_trace()
341
+ hint_keys_sorted = sorted(list(all_hint_keys))
342
+
343
+ # given the equivalence classes established by `hint_key`, determine
344
+ # which of those were observed in each micro-execution
345
+ hk_observed: typing.Dict[
346
+ int, typing.Set[typing.Tuple[int, bool, str, bool, str, str, str, int, int]]
347
+ ] = {}
348
+ for me in range(self.num_micro_executions):
349
+ hk_observed[me] = set([])
350
+ for hint in hint_list_list[me]:
351
+ # this hint key was observed in micro execution me
352
+ hk_observed[me].add(hint_key(hint))
353
+
354
+ # estimate "probability" of observing a hint in an equiv class as
355
+ # fraction of micro executions in which it was observed at least once
356
+ hk_c = {}
357
+ for hk in hint_keys_sorted:
358
+ hk_c[hk] = 0
359
+ for me in range(self.num_micro_executions):
360
+ for hk2 in hk_observed[me]:
361
+ if hk == hk2:
362
+ hk_c[hk] += 1
363
+
364
+ for hk in hint_keys_sorted:
365
+ prob = (float(hk_c[hk])) / self.num_micro_executions
366
+ assert prob <= 1.0
367
+ hint = hk_exemplar[hk]
368
+
369
+ if type(hint) is hinting.DynamicRegisterValueHint:
370
+ hinter.info(
371
+ hinting.DynamicRegisterValueProbHint(
372
+ # instruction=hint.instruction,
373
+ pc=hint.pc,
374
+ reg_name=hint.reg_name,
375
+ color=hint.color,
376
+ size=hint.size,
377
+ use=hint.use,
378
+ new=hint.new,
379
+ prob=prob,
380
+ message=hint.message + "-prob",
381
+ )
382
+ )
383
+ if type(hint) is hinting.DynamicMemoryValueHint:
384
+ hinter.info(
385
+ hinting.DynamicMemoryValueProbHint(
386
+ # instruction=hint.instruction,
387
+ pc=hint.pc,
388
+ size=hint.size,
389
+ base=hint.base,
390
+ index=hint.index,
391
+ scale=hint.scale,
392
+ offset=hint.offset,
393
+ color=hint.color,
394
+ use=hint.use,
395
+ new=hint.new,
396
+ prob=prob,
397
+ message=hint.message + "-prob",
398
+ )
399
+ )
400
+ if type(hint) is hinting.MemoryUnavailableHint:
401
+ hinter.info(
402
+ hinting.MemoryUnavailableProbHint(
403
+ is_read=hint.is_read,
404
+ size=hint.size,
405
+ base_reg_name=hint.base_reg_name,
406
+ index_reg_name=hint.index_reg_name,
407
+ offset=hint.offset,
408
+ scale=hint.scale,
409
+ pc=hint.pc,
410
+ prob=prob,
411
+ message=hint.message + "-prob",
412
+ )
413
+ )
414
+
415
+ def _concrete_val_to_color(
416
+ self, concrete_value: typing.Union[int, bytes, bytearray], size: int
417
+ ) -> str:
418
+ # this concrete value can be an int (if it came from a register)
419
+ # or bytes (if it came from memory read)
420
+ # we want these in a common format so that we can see them as colors
421
+ the_bytes: bytes = b""
422
+ if type(concrete_value) is int:
423
+ if concrete_value < MIN_ACCEPTABLE_COLOR_INT:
424
+ return BAD_COLOR
425
+ the_bytes = concrete_value.to_bytes(size, byteorder="little")
426
+ elif (type(concrete_value) is bytes) or (type(concrete_value) is bytearray):
427
+ # assuming little-endian
428
+ if (
429
+ int.from_bytes(concrete_value, byteorder="little")
430
+ < MIN_ACCEPTABLE_COLOR_INT
431
+ ):
432
+ return BAD_COLOR
433
+ the_bytes = concrete_value
434
+ else:
435
+ assert 1 == 0
436
+ return base64.b64encode(the_bytes).decode()
437
+
438
+ def _randomize_registers(self) -> None:
439
+ for reg in self.orig_cpu:
440
+ # only colorize the "regular" registers
441
+ if (type(reg) is not state.Register) or (
442
+ reg.name not in self.orig_cpu.get_general_purpose_registers()
443
+ ):
444
+ continue
445
+ orig_val = self.emu.read_register(reg.name)
446
+ logger.debug(f"_randomize_registers {reg.name} orig_val={orig_val:x}")
447
+ # if reg.name == "rip" or reg.name == "rsp":
448
+ # import pdb
449
+ # pdb.set_trace()
450
+ new_val = 0
451
+ bc = 0
452
+ for i in range(0, reg.size):
453
+ new_val = new_val << 8
454
+ if (
455
+ reg.name in self.emu.initialized_registers
456
+ and i in self.emu.initialized_registers[reg.name]
457
+ ):
458
+ bs = 8 * (reg.size - i - 1)
459
+ b = (orig_val >> bs) & 0xFF
460
+ # b = (orig_val >> (i * 8)) & 0xFF
461
+ new_val |= b
462
+ else:
463
+ new_val |= random.randint(0, 255)
464
+ bc += 1
465
+ if bc == 0:
466
+ logger.debug(
467
+ f"Not colorizing register {reg.name} since it is already fully initialized with {orig_val:x}"
468
+ )
469
+ else:
470
+ # make sure to update cpu as well as emu not sure why
471
+ self.emu.write_register(reg.name, new_val)
472
+ setattr(self.cpu, reg.name, new_val)
473
+ logger.debug(
474
+ f"Colorized {bc} bytes in register {reg.name}, old value was {orig_val:x} new is {new_val:x}"
475
+ )
476
+
477
+ # helper for read/write unavailable hint
478
+ def _mem_unavailable_hint(
479
+ self,
480
+ operand: typing.Optional[BSIDMemoryReferenceOperand],
481
+ pc: int,
482
+ exec_num: int,
483
+ insn_num: int,
484
+ is_read: bool,
485
+ ) -> hinting.Hint:
486
+ (base_name, base_val) = ("None", 0)
487
+ (index_name, index_val) = ("None", 0)
488
+ (operand_size, operand_scale, operand_offset, operand_address) = (0, 0, 0, 0)
489
+ if operand:
490
+ operand_size = operand.size
491
+ operand_scale = operand.scale
492
+ operand_offset = operand.offset
493
+ operand_address = operand.address(self.emu)
494
+ if operand.base is not None:
495
+ base_val = self.emu.read_register(operand.base)
496
+ base_name = operand.base
497
+ if operand.index is not None:
498
+ index_val = self.emu.read_register(operand.index)
499
+ index_name = operand.index
500
+ hint = hinting.MemoryUnavailableHint(
501
+ is_read=is_read,
502
+ size=operand_size,
503
+ base_reg_name=base_name,
504
+ base_reg_val=base_val,
505
+ index_reg_name=index_name,
506
+ index_reg_val=index_val,
507
+ offset=operand_offset,
508
+ scale=operand_scale,
509
+ address=operand_address,
510
+ pc=pc,
511
+ micro_exec_num=exec_num,
512
+ instruction_num=insn_num,
513
+ message="mem_unavailable",
514
+ )
515
+ hinter.debug(hint)
516
+ return hint
517
+
518
+ def _get_color_num(self, color: str) -> int:
519
+ (_, _, _, _, color_num) = self.colors[color]
520
+ return color_num
521
+
522
+ def _add_color(
523
+ self,
524
+ color: str,
525
+ operand: Operand,
526
+ insn: Instruction,
527
+ exec_num: int,
528
+ insn_num: int,
529
+ ) -> None:
530
+ self.colors[color] = (operand, exec_num, insn_num, insn, 1 + len(self.colors))
531
+
532
+ def _check_colors_instruction_reads(
533
+ self,
534
+ reads: typing.List[typing.Tuple[Operand, str, int]],
535
+ insn: Instruction,
536
+ exec_num: int,
537
+ insn_num: int,
538
+ hint_list: typing.List[hinting.Hint],
539
+ ):
540
+ # import pdb
541
+ # pdb.set_trace()
542
+ for operand, color, operand_size in reads:
543
+ if color in self.colors.keys():
544
+ # read-flow: use of a previously recorded color value
545
+ hint = self._dynamic_value_hint(
546
+ operand,
547
+ operand_size,
548
+ color,
549
+ insn,
550
+ True,
551
+ False,
552
+ exec_num,
553
+ insn_num,
554
+ "read-flow",
555
+ )
556
+ hinter.debug(hint)
557
+ hint_list.append(hint)
558
+ else:
559
+ # red-def: use of a NOT previously recorded color value. As
560
+ # long as the value is something reasonable, we'll record it as
561
+ # a new color
562
+ self._add_color(color, operand, insn, exec_num, insn_num)
563
+ # logger.info(
564
+ # f"new color {color} color_num {self._get_color_num(color)} instruction [{insn}] operand {operand}"
565
+ # )
566
+ hint = self._dynamic_value_hint(
567
+ operand,
568
+ operand_size,
569
+ color,
570
+ insn,
571
+ True,
572
+ True,
573
+ exec_num,
574
+ insn_num,
575
+ "read-def",
576
+ )
577
+ hinter.debug(hint)
578
+ hint_list.append(hint)
579
+
580
+ def _check_colors_instruction_writes(
581
+ self,
582
+ writes: typing.List[typing.Tuple[Operand, str, int]],
583
+ insn: Instruction,
584
+ exec_num: int,
585
+ insn_num: int,
586
+ hint_list: typing.List[hinting.Hint],
587
+ ):
588
+ # NB: This should be called *AFTER the instruction emulates!
589
+ for operand, color, operand_size in writes:
590
+ if color in self.colors.keys():
591
+ # write of a previously seen value
592
+ # ... its just a copy so no hint, right?
593
+ hint = self._dynamic_value_hint(
594
+ operand,
595
+ operand_size,
596
+ color,
597
+ insn,
598
+ False,
599
+ False,
600
+ exec_num,
601
+ insn_num,
602
+ "write-copy",
603
+ )
604
+ hinter.debug(hint)
605
+ hint_list.append(hint)
606
+ pass
607
+ else:
608
+ # write-def: write of a NOT previously recorded color value as
609
+ # long as the value is something reasonable, we'll record it as
610
+ # a new color
611
+ self._add_color(color, operand, insn, exec_num, insn_num)
612
+ # logger.info(
613
+ # f"new color {color} color_num {self._get_color_num(color)} instruction [{insn}] operand {operand}"
614
+ # )
615
+ hint = self._dynamic_value_hint(
616
+ operand,
617
+ operand_size,
618
+ color,
619
+ insn,
620
+ False,
621
+ True,
622
+ exec_num,
623
+ insn_num,
624
+ "write-def",
625
+ )
626
+ hinter.debug(hint)
627
+ hint_list.append(hint)
628
+
629
+ def _dynamic_value_hint(
630
+ self,
631
+ operand: Operand,
632
+ size: int,
633
+ color: str,
634
+ insn: Instruction,
635
+ is_use: bool,
636
+ is_new: bool,
637
+ exec_num: int,
638
+ insn_num: int,
639
+ message: str,
640
+ ):
641
+ pc = insn.address
642
+ color_num = self._get_color_num(color)
643
+ if type(operand) is RegisterOperand:
644
+ return hinting.DynamicRegisterValueHint(
645
+ reg_name=operand.name,
646
+ size=size,
647
+ color=color_num,
648
+ dynamic_value=color,
649
+ use=is_use,
650
+ new=is_new,
651
+ # instruction=insn,
652
+ pc=pc,
653
+ micro_exec_num=exec_num,
654
+ instruction_num=insn_num,
655
+ message=message,
656
+ )
657
+ elif type(operand) is BSIDMemoryReferenceOperand:
658
+ base_name = "None"
659
+ if operand.base is not None:
660
+ base_name = operand.base
661
+ index_name = "None"
662
+ if operand.index is not None:
663
+ index_name = operand.index
664
+ return hinting.DynamicMemoryValueHint(
665
+ address=operand.address(self.emu),
666
+ base=base_name,
667
+ index=index_name,
668
+ scale=operand.scale,
669
+ offset=operand.offset,
670
+ color=color_num,
671
+ dynamic_value=color,
672
+ size=operand.size,
673
+ use=is_use,
674
+ new=is_new,
675
+ # instruction=insn,
676
+ pc=pc,
677
+ micro_exec_num=exec_num,
678
+ instruction_num=insn_num,
679
+ message=message,
680
+ )
681
+ else:
682
+ assert 1 == 0