smallworld-re 1.0.3__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. smallworld/analyses/__init__.py +8 -0
  2. smallworld/analyses/analysis.py +8 -67
  3. smallworld/analyses/code_coverage.py +1 -2
  4. smallworld/analyses/colorizer.py +301 -534
  5. smallworld/analyses/colorizer_def_use.py +217 -0
  6. smallworld/analyses/colorizer_summary.py +173 -83
  7. smallworld/analyses/field_detection/field_analysis.py +7 -8
  8. smallworld/analyses/field_detection/hints.py +1 -1
  9. smallworld/analyses/field_detection/malloc.py +2 -2
  10. smallworld/analyses/trace_execution.py +160 -0
  11. smallworld/analyses/trace_execution_types.py +42 -0
  12. smallworld/analyses/unstable/angr/divergence.py +1 -2
  13. smallworld/analyses/unstable/angr/model.py +5 -6
  14. smallworld/analyses/unstable/angr_nwbt.py +3 -4
  15. smallworld/analyses/unstable/code_coverage.py +2 -3
  16. smallworld/analyses/unstable/code_reachable.py +2 -3
  17. smallworld/analyses/unstable/control_flow_tracer.py +2 -3
  18. smallworld/analyses/unstable/pointer_finder.py +2 -3
  19. smallworld/analyses/unstable/utils/tui.py +71 -0
  20. smallworld/emulators/__init__.py +3 -1
  21. smallworld/emulators/angr/angr.py +30 -9
  22. smallworld/emulators/angr/machdefs/__init__.py +2 -0
  23. smallworld/emulators/angr/machdefs/aarch64.py +1 -1
  24. smallworld/emulators/angr/machdefs/amd64.py +0 -4
  25. smallworld/emulators/angr/machdefs/arm.py +0 -2
  26. smallworld/emulators/angr/machdefs/i386.py +0 -2
  27. smallworld/emulators/angr/machdefs/loongarch.py +340 -0
  28. smallworld/emulators/angr/machdefs/machdef.py +1 -8
  29. smallworld/emulators/angr/machdefs/mips.py +0 -2
  30. smallworld/emulators/angr/machdefs/mips64.py +0 -2
  31. smallworld/emulators/angr/machdefs/ppc.py +1 -2
  32. smallworld/emulators/angr/machdefs/riscv.py +8 -10
  33. smallworld/emulators/angr/machdefs/xtensa.py +7 -4
  34. smallworld/emulators/emulator.py +22 -0
  35. smallworld/emulators/ghidra/__init__.py +37 -0
  36. smallworld/emulators/ghidra/ghidra.py +513 -0
  37. smallworld/emulators/ghidra/machdefs/__init__.py +31 -0
  38. smallworld/emulators/ghidra/machdefs/aarch64.py +289 -0
  39. smallworld/emulators/ghidra/machdefs/amd64.py +185 -0
  40. smallworld/emulators/ghidra/machdefs/arm.py +370 -0
  41. smallworld/emulators/ghidra/machdefs/i386.py +109 -0
  42. smallworld/emulators/ghidra/machdefs/loongarch.py +162 -0
  43. smallworld/emulators/ghidra/machdefs/machdef.py +81 -0
  44. smallworld/emulators/ghidra/machdefs/mips.py +163 -0
  45. smallworld/emulators/ghidra/machdefs/mips64.py +186 -0
  46. smallworld/emulators/ghidra/machdefs/ppc.py +98 -0
  47. smallworld/emulators/ghidra/machdefs/riscv.py +208 -0
  48. smallworld/emulators/ghidra/machdefs/xtensa.py +21 -0
  49. smallworld/emulators/ghidra/typing.py +28 -0
  50. smallworld/emulators/hookable.py +18 -4
  51. smallworld/emulators/panda/machdefs/__init__.py +2 -2
  52. smallworld/emulators/panda/machdefs/aarch64.py +186 -11
  53. smallworld/emulators/panda/machdefs/amd64.py +103 -11
  54. smallworld/emulators/panda/machdefs/arm.py +216 -20
  55. smallworld/emulators/panda/machdefs/i386.py +30 -7
  56. smallworld/emulators/panda/machdefs/machdef.py +9 -16
  57. smallworld/emulators/panda/machdefs/mips.py +49 -5
  58. smallworld/emulators/panda/machdefs/mips64.py +57 -5
  59. smallworld/emulators/panda/machdefs/ppc.py +38 -13
  60. smallworld/emulators/panda/panda.py +146 -44
  61. smallworld/emulators/unicorn/__init__.py +2 -0
  62. smallworld/emulators/unicorn/machdefs/aarch64.py +253 -264
  63. smallworld/emulators/unicorn/machdefs/amd64.py +254 -259
  64. smallworld/emulators/unicorn/machdefs/arm.py +200 -212
  65. smallworld/emulators/unicorn/machdefs/i386.py +84 -90
  66. smallworld/emulators/unicorn/machdefs/machdef.py +2 -23
  67. smallworld/emulators/unicorn/machdefs/mips.py +127 -135
  68. smallworld/emulators/unicorn/unicorn.py +52 -13
  69. smallworld/helpers.py +4 -19
  70. smallworld/hinting/hinting.py +22 -192
  71. smallworld/hinting/hints.py +50 -18
  72. smallworld/instructions/bsid.py +8 -8
  73. smallworld/logging.py +4 -2
  74. smallworld/platforms/__init__.py +12 -0
  75. smallworld/platforms/defs/__init__.py +36 -0
  76. smallworld/platforms/defs/aarch64.py +450 -0
  77. smallworld/platforms/defs/amd64.py +463 -0
  78. smallworld/platforms/defs/arm.py +519 -0
  79. smallworld/platforms/defs/i386.py +258 -0
  80. smallworld/platforms/defs/loongarch.py +270 -0
  81. smallworld/platforms/defs/mips.py +321 -0
  82. smallworld/platforms/defs/mips64.py +313 -0
  83. smallworld/platforms/defs/platformdef.py +97 -0
  84. smallworld/platforms/defs/powerpc.py +259 -0
  85. smallworld/platforms/defs/riscv.py +257 -0
  86. smallworld/platforms/defs/xtensa.py +96 -0
  87. smallworld/{platforms.py → platforms/platforms.py} +3 -0
  88. smallworld/state/cpus/__init__.py +2 -0
  89. smallworld/state/cpus/aarch64.py +0 -9
  90. smallworld/state/cpus/amd64.py +6 -28
  91. smallworld/state/cpus/arm.py +0 -11
  92. smallworld/state/cpus/cpu.py +0 -11
  93. smallworld/state/cpus/i386.py +0 -7
  94. smallworld/state/cpus/loongarch.py +299 -0
  95. smallworld/state/cpus/mips.py +4 -47
  96. smallworld/state/cpus/mips64.py +18 -58
  97. smallworld/state/cpus/powerpc.py +2 -9
  98. smallworld/state/cpus/riscv.py +1 -11
  99. smallworld/state/cpus/xtensa.py +0 -5
  100. smallworld/state/memory/code.py +38 -2
  101. smallworld/state/memory/elf/__init__.py +5 -1
  102. smallworld/state/memory/elf/coredump/__init__.py +3 -0
  103. smallworld/state/memory/elf/coredump/coredump.py +46 -0
  104. smallworld/state/memory/elf/coredump/prstatus/__init__.py +27 -0
  105. smallworld/state/memory/elf/coredump/prstatus/aarch64.py +46 -0
  106. smallworld/state/memory/elf/coredump/prstatus/amd64.py +40 -0
  107. smallworld/state/memory/elf/coredump/prstatus/arm.py +53 -0
  108. smallworld/state/memory/elf/coredump/prstatus/i386.py +30 -0
  109. smallworld/state/memory/elf/coredump/prstatus/mips.py +55 -0
  110. smallworld/state/memory/elf/coredump/prstatus/mips64.py +57 -0
  111. smallworld/state/memory/elf/coredump/prstatus/ppc.py +82 -0
  112. smallworld/state/memory/elf/coredump/prstatus/prstatus.py +129 -0
  113. smallworld/state/memory/elf/elf.py +211 -57
  114. smallworld/state/memory/elf/register_state.py +36 -0
  115. smallworld/state/memory/elf/rela/__init__.py +2 -0
  116. smallworld/state/memory/elf/rela/aarch64.py +3 -1
  117. smallworld/state/memory/elf/rela/amd64.py +4 -2
  118. smallworld/state/memory/elf/rela/arm.py +4 -2
  119. smallworld/state/memory/elf/rela/i386.py +4 -2
  120. smallworld/state/memory/elf/rela/loongarch.py +32 -0
  121. smallworld/state/memory/elf/rela/mips.py +39 -18
  122. smallworld/state/memory/elf/rela/ppc.py +31 -14
  123. smallworld/state/memory/elf/structs.py +3 -0
  124. smallworld/state/memory/heap.py +2 -2
  125. smallworld/state/memory/memory.py +18 -0
  126. smallworld/state/memory/pe/__init__.py +3 -0
  127. smallworld/state/memory/pe/pe.py +361 -0
  128. smallworld/state/memory/pe/structs.py +60 -0
  129. smallworld/state/memory/stack/__init__.py +2 -0
  130. smallworld/state/memory/stack/loongarch.py +26 -0
  131. smallworld/state/models/__init__.py +29 -2
  132. smallworld/state/models/aarch64/__init__.py +1 -0
  133. smallworld/state/models/aarch64/systemv/__init__.py +6 -0
  134. smallworld/state/models/aarch64/systemv/c99/__init__.py +12 -0
  135. smallworld/state/models/aarch64/systemv/c99/signal.py +16 -0
  136. smallworld/state/models/aarch64/systemv/c99/stdio.py +265 -0
  137. smallworld/state/models/aarch64/systemv/c99/stdlib.py +169 -0
  138. smallworld/state/models/aarch64/systemv/c99/string.py +139 -0
  139. smallworld/state/models/aarch64/systemv/c99/time.py +61 -0
  140. smallworld/state/models/aarch64/systemv/posix/__init__.py +6 -0
  141. smallworld/state/models/aarch64/systemv/posix/libgen.py +16 -0
  142. smallworld/state/models/aarch64/systemv/posix/signal.py +157 -0
  143. smallworld/state/models/aarch64/systemv/systemv.py +80 -0
  144. smallworld/state/models/amd64/__init__.py +1 -0
  145. smallworld/state/models/amd64/systemv/__init__.py +6 -0
  146. smallworld/state/models/amd64/systemv/c99/__init__.py +12 -0
  147. smallworld/state/models/amd64/systemv/c99/signal.py +16 -0
  148. smallworld/state/models/amd64/systemv/c99/stdio.py +265 -0
  149. smallworld/state/models/amd64/systemv/c99/stdlib.py +169 -0
  150. smallworld/state/models/amd64/systemv/c99/string.py +139 -0
  151. smallworld/state/models/amd64/systemv/c99/time.py +61 -0
  152. smallworld/state/models/amd64/systemv/posix/__init__.py +6 -0
  153. smallworld/state/models/amd64/systemv/posix/libgen.py +16 -0
  154. smallworld/state/models/amd64/systemv/posix/signal.py +157 -0
  155. smallworld/state/models/amd64/systemv/systemv.py +78 -0
  156. smallworld/state/models/armel/__init__.py +1 -0
  157. smallworld/state/models/armel/systemv/__init__.py +6 -0
  158. smallworld/state/models/armel/systemv/c99/__init__.py +12 -0
  159. smallworld/state/models/armel/systemv/c99/signal.py +16 -0
  160. smallworld/state/models/armel/systemv/c99/stdio.py +265 -0
  161. smallworld/state/models/armel/systemv/c99/stdlib.py +169 -0
  162. smallworld/state/models/armel/systemv/c99/string.py +139 -0
  163. smallworld/state/models/armel/systemv/c99/time.py +61 -0
  164. smallworld/state/models/armel/systemv/posix/__init__.py +6 -0
  165. smallworld/state/models/armel/systemv/posix/libgen.py +16 -0
  166. smallworld/state/models/armel/systemv/posix/signal.py +157 -0
  167. smallworld/state/models/armel/systemv/systemv.py +82 -0
  168. smallworld/state/models/armhf/__init__.py +1 -0
  169. smallworld/state/models/armhf/systemv/__init__.py +6 -0
  170. smallworld/state/models/armhf/systemv/c99/__init__.py +12 -0
  171. smallworld/state/models/armhf/systemv/c99/signal.py +16 -0
  172. smallworld/state/models/armhf/systemv/c99/stdio.py +265 -0
  173. smallworld/state/models/armhf/systemv/c99/stdlib.py +169 -0
  174. smallworld/state/models/armhf/systemv/c99/string.py +139 -0
  175. smallworld/state/models/armhf/systemv/c99/time.py +61 -0
  176. smallworld/state/models/armhf/systemv/posix/__init__.py +6 -0
  177. smallworld/state/models/armhf/systemv/posix/libgen.py +16 -0
  178. smallworld/state/models/armhf/systemv/posix/signal.py +157 -0
  179. smallworld/state/models/armhf/systemv/systemv.py +77 -0
  180. smallworld/state/models/c99/__init__.py +12 -0
  181. smallworld/state/models/c99/fmt_print.py +915 -0
  182. smallworld/state/models/c99/fmt_scan.py +864 -0
  183. smallworld/state/models/c99/math.py +362 -0
  184. smallworld/state/models/c99/signal.py +71 -0
  185. smallworld/state/models/c99/stdio.py +1305 -0
  186. smallworld/state/models/c99/stdlib.py +595 -0
  187. smallworld/state/models/c99/string.py +674 -0
  188. smallworld/state/models/c99/time.py +340 -0
  189. smallworld/state/models/c99/utils.py +89 -0
  190. smallworld/state/models/cstd.py +759 -0
  191. smallworld/state/models/errno.py +581 -0
  192. smallworld/state/models/filedesc.py +515 -0
  193. smallworld/state/models/i386/__init__.py +1 -0
  194. smallworld/state/models/i386/systemv/__init__.py +6 -0
  195. smallworld/state/models/i386/systemv/c99/__init__.py +12 -0
  196. smallworld/state/models/i386/systemv/c99/signal.py +16 -0
  197. smallworld/state/models/i386/systemv/c99/stdio.py +265 -0
  198. smallworld/state/models/i386/systemv/c99/stdlib.py +169 -0
  199. smallworld/state/models/i386/systemv/c99/string.py +139 -0
  200. smallworld/state/models/i386/systemv/c99/time.py +61 -0
  201. smallworld/state/models/i386/systemv/posix/__init__.py +6 -0
  202. smallworld/state/models/i386/systemv/posix/libgen.py +16 -0
  203. smallworld/state/models/i386/systemv/posix/signal.py +157 -0
  204. smallworld/state/models/i386/systemv/systemv.py +71 -0
  205. smallworld/state/models/loongarch64/__init__.py +1 -0
  206. smallworld/state/models/loongarch64/systemv/__init__.py +6 -0
  207. smallworld/state/models/loongarch64/systemv/c99/__init__.py +12 -0
  208. smallworld/state/models/loongarch64/systemv/c99/signal.py +16 -0
  209. smallworld/state/models/loongarch64/systemv/c99/stdio.py +265 -0
  210. smallworld/state/models/loongarch64/systemv/c99/stdlib.py +169 -0
  211. smallworld/state/models/loongarch64/systemv/c99/string.py +139 -0
  212. smallworld/state/models/loongarch64/systemv/c99/time.py +61 -0
  213. smallworld/state/models/loongarch64/systemv/posix/__init__.py +6 -0
  214. smallworld/state/models/loongarch64/systemv/posix/libgen.py +16 -0
  215. smallworld/state/models/loongarch64/systemv/posix/signal.py +157 -0
  216. smallworld/state/models/loongarch64/systemv/systemv.py +83 -0
  217. smallworld/state/models/mips/__init__.py +1 -0
  218. smallworld/state/models/mips/systemv/__init__.py +6 -0
  219. smallworld/state/models/mips/systemv/c99/__init__.py +12 -0
  220. smallworld/state/models/mips/systemv/c99/signal.py +16 -0
  221. smallworld/state/models/mips/systemv/c99/stdio.py +265 -0
  222. smallworld/state/models/mips/systemv/c99/stdlib.py +169 -0
  223. smallworld/state/models/mips/systemv/c99/string.py +139 -0
  224. smallworld/state/models/mips/systemv/c99/time.py +61 -0
  225. smallworld/state/models/mips/systemv/posix/__init__.py +6 -0
  226. smallworld/state/models/mips/systemv/posix/libgen.py +16 -0
  227. smallworld/state/models/mips/systemv/posix/signal.py +157 -0
  228. smallworld/state/models/mips/systemv/systemv.py +78 -0
  229. smallworld/state/models/mips64/__init__.py +1 -0
  230. smallworld/state/models/mips64/systemv/__init__.py +6 -0
  231. smallworld/state/models/mips64/systemv/c99/__init__.py +12 -0
  232. smallworld/state/models/mips64/systemv/c99/signal.py +16 -0
  233. smallworld/state/models/mips64/systemv/c99/stdio.py +265 -0
  234. smallworld/state/models/mips64/systemv/c99/stdlib.py +169 -0
  235. smallworld/state/models/mips64/systemv/c99/string.py +139 -0
  236. smallworld/state/models/mips64/systemv/c99/time.py +61 -0
  237. smallworld/state/models/mips64/systemv/posix/__init__.py +6 -0
  238. smallworld/state/models/mips64/systemv/posix/libgen.py +16 -0
  239. smallworld/state/models/mips64/systemv/posix/signal.py +157 -0
  240. smallworld/state/models/mips64/systemv/systemv.py +98 -0
  241. smallworld/state/models/mips64el/__init__.py +1 -0
  242. smallworld/state/models/mips64el/systemv/__init__.py +6 -0
  243. smallworld/state/models/mips64el/systemv/c99/__init__.py +12 -0
  244. smallworld/state/models/mips64el/systemv/c99/signal.py +16 -0
  245. smallworld/state/models/mips64el/systemv/c99/stdio.py +265 -0
  246. smallworld/state/models/mips64el/systemv/c99/stdlib.py +169 -0
  247. smallworld/state/models/mips64el/systemv/c99/string.py +139 -0
  248. smallworld/state/models/mips64el/systemv/c99/time.py +61 -0
  249. smallworld/state/models/mips64el/systemv/posix/__init__.py +6 -0
  250. smallworld/state/models/mips64el/systemv/posix/libgen.py +16 -0
  251. smallworld/state/models/mips64el/systemv/posix/signal.py +157 -0
  252. smallworld/state/models/mips64el/systemv/systemv.py +96 -0
  253. smallworld/state/models/mipsel/__init__.py +1 -0
  254. smallworld/state/models/mipsel/systemv/__init__.py +6 -0
  255. smallworld/state/models/mipsel/systemv/c99/__init__.py +12 -0
  256. smallworld/state/models/mipsel/systemv/c99/signal.py +16 -0
  257. smallworld/state/models/mipsel/systemv/c99/stdio.py +265 -0
  258. smallworld/state/models/mipsel/systemv/c99/stdlib.py +169 -0
  259. smallworld/state/models/mipsel/systemv/c99/string.py +139 -0
  260. smallworld/state/models/mipsel/systemv/c99/time.py +61 -0
  261. smallworld/state/models/mipsel/systemv/posix/__init__.py +6 -0
  262. smallworld/state/models/mipsel/systemv/posix/libgen.py +16 -0
  263. smallworld/state/models/mipsel/systemv/posix/signal.py +157 -0
  264. smallworld/state/models/mipsel/systemv/systemv.py +78 -0
  265. smallworld/state/models/model.py +27 -2
  266. smallworld/state/models/posix/__init__.py +6 -0
  267. smallworld/state/models/posix/libgen.py +123 -0
  268. smallworld/state/models/posix/signal.py +690 -0
  269. smallworld/state/models/powerpc/__init__.py +1 -0
  270. smallworld/state/models/powerpc/systemv/__init__.py +6 -0
  271. smallworld/state/models/powerpc/systemv/c99/__init__.py +12 -0
  272. smallworld/state/models/powerpc/systemv/c99/signal.py +16 -0
  273. smallworld/state/models/powerpc/systemv/c99/stdio.py +265 -0
  274. smallworld/state/models/powerpc/systemv/c99/stdlib.py +169 -0
  275. smallworld/state/models/powerpc/systemv/c99/string.py +139 -0
  276. smallworld/state/models/powerpc/systemv/c99/time.py +61 -0
  277. smallworld/state/models/powerpc/systemv/posix/__init__.py +6 -0
  278. smallworld/state/models/powerpc/systemv/posix/libgen.py +16 -0
  279. smallworld/state/models/powerpc/systemv/posix/signal.py +157 -0
  280. smallworld/state/models/powerpc/systemv/systemv.py +93 -0
  281. smallworld/state/models/riscv64/__init__.py +1 -0
  282. smallworld/state/models/riscv64/systemv/__init__.py +6 -0
  283. smallworld/state/models/riscv64/systemv/c99/__init__.py +12 -0
  284. smallworld/state/models/riscv64/systemv/c99/signal.py +16 -0
  285. smallworld/state/models/riscv64/systemv/c99/stdio.py +265 -0
  286. smallworld/state/models/riscv64/systemv/c99/stdlib.py +169 -0
  287. smallworld/state/models/riscv64/systemv/c99/string.py +139 -0
  288. smallworld/state/models/riscv64/systemv/c99/time.py +61 -0
  289. smallworld/state/models/riscv64/systemv/posix/__init__.py +6 -0
  290. smallworld/state/models/riscv64/systemv/posix/libgen.py +16 -0
  291. smallworld/state/models/riscv64/systemv/posix/signal.py +157 -0
  292. smallworld/state/models/riscv64/systemv/systemv.py +85 -0
  293. smallworld/state/state.py +65 -24
  294. smallworld/state/unstable/elf.py +16 -31
  295. smallworld/utils.py +6 -1
  296. {smallworld_re-1.0.3.dist-info → smallworld_re-2.0.0.dist-info}/METADATA +74 -42
  297. smallworld_re-2.0.0.dist-info/RECORD +374 -0
  298. {smallworld_re-1.0.3.dist-info → smallworld_re-2.0.0.dist-info}/WHEEL +1 -1
  299. smallworld/state/models/x86/__init__.py +0 -2
  300. smallworld/state/models/x86/microsoftcdecl.py +0 -35
  301. smallworld/state/models/x86/systemv.py +0 -240
  302. smallworld_re-1.0.3.dist-info/RECORD +0 -166
  303. /smallworld/state/models/{posix.py → _posix.py} +0 -0
  304. {smallworld_re-1.0.3.dist-info → smallworld_re-2.0.0.dist-info}/entry_points.txt +0 -0
  305. {smallworld_re-1.0.3.dist-info → smallworld_re-2.0.0.dist-info}/licenses/LICENSE.txt +0 -0
  306. {smallworld_re-1.0.3.dist-info → smallworld_re-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,14 @@
1
- import base64
2
1
  import copy
2
+ import hashlib
3
3
  import logging
4
4
  import random
5
+ import struct
5
6
  import typing
6
7
 
7
8
  import capstone
8
9
 
9
- from .. import hinting, state
10
- from ..emulators import (
11
- UnicornEmulationMemoryReadError,
12
- UnicornEmulationMemoryWriteError,
13
- UnicornEmulator,
14
- )
15
- from ..exceptions import AnalysisRunError, EmulationBounds
10
+ from .. import hinting, platforms, state
11
+ from ..exceptions import AnalysisRunError # , EmulationBounds
16
12
  from ..instructions import (
17
13
  BSIDMemoryReferenceOperand,
18
14
  Instruction,
@@ -20,40 +16,112 @@ from ..instructions import (
20
16
  RegisterOperand,
21
17
  )
22
18
  from . import analysis
19
+ from .trace_execution import TraceExecution, TraceExecutionCBPoint
23
20
 
24
21
  logger = logging.getLogger(__name__)
25
- hinter = hinting.get_hinter(__name__)
26
22
 
27
- MIN_ACCEPTABLE_COLOR_INT = 20
28
- BAD_COLOR = "BAD_COLOR"
29
23
 
30
- Colors = typing.Dict[str, typing.Tuple[Operand, int, int, Instruction, int]]
24
+ MIN_ACCEPTABLE_COLOR_INT = 0x20
25
+ BAD_COLOR = (2**64) - 1
26
+
27
+ Colors = typing.Dict[int, typing.Tuple[Operand, int, Instruction, int]]
28
+
29
+ Shad = typing.Dict[int, typing.Tuple[int, bool, int]]
30
+
31
+
32
+ def randomize_uninitialized(
33
+ machine: state.Machine, seed: int = 123456, extra_regs: typing.List[str] = []
34
+ ) -> state.Machine:
35
+ """Consider all parts of the machine that can be written to (registers
36
+ + memory regions). Write random values to any bytes in those machine
37
+ parts which are currently uninitialized. So this only works if we
38
+ have a way to tell if registers or memory have not been initialized.
39
+
40
+ Randomize all general purpose regs (plus regs in list of
41
+ extra_regs arg) that have not already been set. Also, for any
42
+ Heap, Stack, RawMemory or Memory regions, randomize any bytes that
43
+ have not been set. This last part is to come since we don't
44
+ currently have a way to tell which parts of memory have been
45
+ written and which parts have not. Further, there are kinds of
46
+ memory (Stack) which currently will break if we randomize all
47
+ bytes.
48
+
49
+ Note that, for a register, it is either set or not. We can't tell
50
+ if, say edx part of rdx has been set.
51
+
52
+ """
53
+ random.seed(seed)
54
+ logger.setLevel(logging.DEBUG)
55
+
56
+ # if logger.getEffectiveLevel() >= logging.DEBUG:
57
+ m = hashlib.md5()
58
+ platform = machine.get_platform()
59
+ machine_copy = copy.deepcopy(machine)
60
+ pdefs = platforms.defs.PlatformDef.for_platform(platform)
61
+ reg_names = list(pdefs.registers.keys())
62
+ reg_names.sort()
63
+
64
+ def get_reg(machine, reg_name):
65
+ cpu = machine.get_cpu()
66
+ for x in cpu:
67
+ if isinstance(x, state.Register):
68
+ if x.name == reg_name:
69
+ return x
70
+ return None
71
+
72
+ for name in reg_names:
73
+ if (name in pdefs.general_purpose_registers) or (name in extra_regs):
74
+ reg = get_reg(machine_copy, name)
75
+ if reg.get_content() is None:
76
+ # this means reg is uninitialized; ok to randomize
77
+ v = random.randint(0, (1 << (8 * reg.size)) - 1)
78
+ reg.set(v)
79
+ # logger.info(f"randomize_uninitialized setting {reg}")
80
+ # if logger.getEffectiveLevel() >= logging.DEBUG:
81
+ m.update(str(v).encode("utf-8"))
82
+
83
+ for el in machine_copy:
84
+ if isinstance(m, state.memory.Memory):
85
+ if isinstance(m, state.memory.Executable):
86
+ # nothing to randomize here
87
+ continue
88
+ # To come, I guess
89
+ # we'd like to randomize anything uninitialized...
90
+ pass
91
+ # if logger.getEffectiveLevel() >= logging.DEBUG:
92
+ logger.info(f"digest of changes made to machine: {m.hexdigest()}")
93
+
94
+ logger.setLevel(logging.INFO)
95
+
96
+ return machine_copy
31
97
 
32
98
 
33
99
  class Colorizer(analysis.Analysis):
34
- """A simple kind of data flow analysis via tracking distinct values (colors)
35
- and employing instruction use/def analysis
36
-
37
- We run multiple micro-executions of the code starting from same entry. At
38
- the start of each, we randomize register values that have not already been
39
- initialized. We maintain a "colors" map from values to when we first
40
- observed them. This map is initially empty. Before emulating an instruction,
41
- we examine the values (registers and memory) it will read. If any are NOT in
42
- the colors map, that is the initial sighting of that value and we emit a
43
- hint to that effect. If any color IS already in the map, then that is a flow
44
- from the time at which that value was first observed to this
45
- instruction. Similarly, after emulating an instruction, we examine every
46
- value (register and memory) written. If a value is not in the colors map, it
47
- is a new, computed result and we hint about its creation. If it is in the
48
- colors map, we do nothing since it just a copy.
100
+ """
101
+ A simple kind of data flow analysis via tracking distinct values
102
+ (colors) and employing instruction use/def analysis
103
+
104
+ We run multiple micro-executions of the code starting from same
105
+ entry. At the start of each, we randomize register values that
106
+ have not already been initialized. We maintain a "colors" map from
107
+ dynamic values to when/where we first observed them. This map is
108
+ initially empty. Before emulating an instruction, we examine the
109
+ values (registers and memory) it will read. If any are not in the
110
+ colors map, that is the initial sighting of that value and we emit
111
+ a hint to that effect and add a color to the map. If any color is
112
+ already in the map, then that is a def-use flow from the time or
113
+ place at which that value was first observed to this instruction.
114
+ Similarly, after emulating an instruction, we examine every value
115
+ written to a register or memory. If a value is not in the colors
116
+ map, it is a new, computed result and we hint about its creation
117
+ and add it to the map. If it is in the colors map, we do nothing
118
+ since it just a copy.
49
119
 
50
120
  Whilst looking at reads and writes for instructions, we hint if any
51
121
  correspond to unavailable memory.
52
122
 
53
123
  Arguments:
54
- num_micro_executions: The number of micro-executions to run.
55
124
  num_insns: The number of instructions to micro-execute.
56
- seed: Random seed for test stability, or None.
57
125
 
58
126
  """
59
127
 
@@ -64,357 +132,134 @@ class Colorizer(analysis.Analysis):
64
132
  def __init__(
65
133
  self,
66
134
  *args,
67
- num_micro_executions: int = 5,
135
+ exec_id: int,
68
136
  num_insns: int = 200,
69
- seed: typing.Optional[int] = 99,
70
- **kwargs
71
- # self, *args, num_micro_executions: int = 1, num_insns: int = 10, **kwargs
137
+ **kwargs,
72
138
  ):
73
139
  super().__init__(*args, **kwargs)
74
- # Create our own random so we can avoid contention.
75
- self.random = random.Random()
76
- self.seed = seed
77
- self.num_micro_executions = num_micro_executions
140
+ self.exec_id = exec_id
78
141
  self.num_insns = num_insns
142
+ self.colors: Colors = {}
143
+ self.shadow_register: typing.Dict[str, Shad] = {}
144
+ self.shadow_memory: Shad = {}
145
+ # self.edge: typing.Dict[int, typing.Dict[int, typing.Tuple[str, int, int]]] = {}
79
146
 
80
- def _get_instr_at_pc(self, pc: int) -> capstone.CsInsn:
81
- code = self.emu.read_memory(pc, 15) # longest possible instruction
147
+ def _get_instr_at_pc(self, emu, pc: int) -> capstone.CsInsn:
148
+ code = emu.read_memory(pc, 15) # longest possible instruction
82
149
  if code is None:
83
150
  raise AnalysisRunError(
84
151
  "Unable to read next instruction out of emulator memory"
85
152
  )
86
- (insns, disas) = self.emu._disassemble(code, pc, 2)
153
+ (insns, disas) = emu._disassemble(code, pc, 2)
87
154
  insn = insns[0]
88
155
  return insn
89
156
 
90
157
  def _operand_size(self, operand: Operand) -> int:
91
158
  if type(operand) is RegisterOperand:
92
159
  # return size of a reg based on its name
93
- return getattr(self.cpu, operand.name).size
160
+ return self.pdef.registers[operand.name].size
94
161
  elif type(operand) is BSIDMemoryReferenceOperand:
95
162
  # memory operand knows its size
96
163
  return operand.size
97
164
  return 0
98
165
 
99
166
  def run(self, machine: state.Machine) -> None:
100
- # note that start pc is in start_cpustate
101
-
102
167
  # collect hints for each microexecution, in a list of lists
103
- hint_list_list: typing.List[typing.List[hinting.Hint]] = []
104
168
 
105
169
  self.orig_machine = copy.deepcopy(machine)
106
170
  self.orig_cpu = self.orig_machine.get_cpu()
107
171
  self.platform = self.orig_cpu.platform
108
-
109
- for i in range(self.num_micro_executions):
110
- logger.info("-------------------------")
111
- logger.info(f"micro exec #{i}")
112
-
113
- if self.seed is not None:
114
- self.random.seed(a=self.seed)
115
-
116
- self.machine = copy.deepcopy(self.orig_machine)
117
- self.cpu = self.machine.get_cpu()
118
- self.emu = UnicornEmulator(self.platform)
119
- self.machine.apply(self.emu)
120
-
121
- # initialize registers with random values
122
- self._randomize_registers()
123
-
124
- # map from color values to first use / def
125
- self.colors: Colors = {}
126
-
127
- hint_list: typing.List[hinting.Hint] = []
128
- for j in range(self.num_insns):
129
- logger.info(f"instr_count = {j}")
130
- # obtain instr about to be emulated
131
- pc = self.emu.read_register("pc")
132
- if pc in self.emu.get_exit_points():
133
- break
134
- cs_insn = self._get_instr_at_pc(pc)
135
- sw_insn = Instruction.from_capstone(cs_insn)
136
-
137
- logger.debug(sw_insn)
138
-
139
- # pull state back out of the emulator for inspection
140
- m = copy.deepcopy(self.machine)
141
- m.extract(self.emu)
142
- self.cpu = m.get_cpu()
143
- # self.cpu = copy.deepcopy(self.machine).extract(self.emu).get_cpu()
144
- # curr_machine = copy.deepcopy(self.machine)
145
- # curr_machine.extract(self.emu)
146
- # curr_machine = self.eself.cpu.load(self.emu)
147
- # self.cpu = curr_machien.get_cpu()
148
-
149
- # print(f"pc={pc:x} {sw_insn}")
150
- # import pdb
151
- # pdb.set_trace()
152
-
153
- reads: typing.List[typing.Tuple[Operand, str, int]] = []
154
- for read_operand in sw_insn.reads:
155
- logger.debug(f"pc={pc:x} read_operand={read_operand}")
156
-
157
- if (
158
- type(read_operand) is RegisterOperand
159
- and read_operand.name == "rflags"
160
- ):
161
- continue
162
-
163
- sz = self._operand_size(read_operand)
164
- if type(read_operand) is BSIDMemoryReferenceOperand:
165
- a = read_operand.address(self.emu)
166
- ar = (a, a + sz)
167
- if not self.emu._is_address_range_mapped(ar):
168
- # at least one byte in this range is not mapped
169
- # so dont add this read to the list
170
- continue
171
- read_operand_color = self._concrete_val_to_color(
172
- read_operand.concretize(self.emu), sz
173
- )
174
- # discard bad colors
175
- if read_operand_color == BAD_COLOR:
176
- continue
177
- # except UnicornEmulationMemoryReadError as e:
178
- # # ignore bc self.emu.step() will also raise
179
- # # same error, which will generate a hint
180
- # pass
181
- # except Exception as e:
182
- # import pdb
183
- # pdb.set_trace()
184
- # print(e)
185
- tup = (read_operand, read_operand_color, sz)
186
- reads.append(tup)
187
- reads.sort(key=lambda e: e[0].__repr__())
188
- # logger.info(f"reads: {reads}")
189
- self._check_colors_instruction_reads(reads, sw_insn, i, j, hint_list)
190
-
191
- try:
192
- # print(f"pc={pc:x} {sw_insn}")
193
- # import pdb
194
- # pdb.set_trace()
195
-
196
- self.emu.step()
197
-
198
- except EmulationBounds:
199
- # import pdb
200
- # pdb.set_trace()
201
- logger.info(
202
- "emulation complete. encountered exit point or went out of bounds"
203
- )
204
- break
205
- except UnicornEmulationMemoryWriteError as e:
206
- # import pdb
207
- # pdb.set_trace()
208
- for write_operand, conc_val in e.details["writes"]:
209
- if type(write_operand) is BSIDMemoryReferenceOperand:
210
- if conc_val is None:
211
- h = self._mem_unavailable_hint(
212
- write_operand, e.pc, i, j, False
213
- )
214
- hint_list.append(h)
215
- break
216
-
217
- except UnicornEmulationMemoryReadError as e:
218
- # import pdb
219
- # pdb.set_trace()
220
- for read_operand in e.details["unmapped_reads"]:
221
- if type(read_operand) is BSIDMemoryReferenceOperand:
222
- h = self._mem_unavailable_hint(
223
- read_operand, e.pc, i, j, True
224
- )
225
- hint_list.append(h)
226
- break
227
- except Exception as e:
228
- # emulating this instruction failed
229
- # import pdb
230
- # pdb.set_trace()
231
- import pdb
232
-
233
- pdb.set_trace()
234
- exhint = hinting.EmulationException(
235
- message=f"In analysis, single step raised an exception {e}",
236
- pc=pc,
237
- # instruction=sw_insn,
238
- instruction_num=j,
239
- exception=str(e),
240
- )
241
- hint_list.append(exhint)
242
- hinter.debug(exhint)
243
- logger.info(e)
244
- break
245
-
246
- writes: typing.List[typing.Tuple[Operand, str, int]] = []
247
-
248
- # print(sw_insn.writes)
249
- # import pdb
250
- # pdb.set_trace()
251
-
252
- for write_operand in sw_insn.writes:
253
- logger.debug(f"pc={pc:x} write_operand={write_operand}")
254
-
255
- if (
256
- type(write_operand) is RegisterOperand
257
- and write_operand.name == "rflags"
258
- ):
259
- continue
260
-
261
- sz = self._operand_size(write_operand)
262
- try:
263
- write_operand_color = self._concrete_val_to_color(
264
- write_operand.concretize(self.emu), sz
265
- )
266
- # discard bad colors
267
- if write_operand_color == BAD_COLOR:
268
- continue
269
- except Exception as e:
270
- print(e)
271
- h = self._mem_unavailable_hint(write_operand, pc, i, j, False)
272
- hint_list.append(h)
172
+ self.pdef = platforms.PlatformDef.for_platform(self.platform)
173
+
174
+ def check_rws(emu, pc, te, is_read):
175
+ cs_insn = self._get_instr_at_pc(emu, pc)
176
+ sw_insn = Instruction.from_capstone(cs_insn)
177
+ if is_read:
178
+ operand_list = sw_insn.reads
179
+ lab = "read"
180
+ else:
181
+ operand_list = sw_insn.writes
182
+ lab = "write"
183
+ rws = []
184
+ for operand in operand_list:
185
+ logger.debug(f"pc={pc:x} {lab}_operand={operand}")
186
+ if type(operand) is RegisterOperand and operand.name == "rflags":
187
+ continue
188
+ sz = self._operand_size(operand)
189
+ # print(f"operand={operand} sz={sz}")
190
+ if type(operand) is BSIDMemoryReferenceOperand:
191
+ # if addr not mapped, discard this operand
192
+ a = operand.address(emu)
193
+ ar = (a, a + sz)
194
+ if not emu._is_address_range_mapped(ar):
273
195
  continue
274
- tup = (write_operand, write_operand_color, sz)
275
- writes.append(tup)
276
- writes.sort(key=lambda e: e[0].__repr__())
277
- # import pdb
278
- # pdb.set_trace()
279
- self._check_colors_instruction_writes(writes, sw_insn, i, j, hint_list)
280
-
281
- hint_list_list.append(hint_list)
282
-
283
- logger.info("-------------------------")
284
-
285
- # if two hints map to the same key then they are in same equivalence class
286
- def hint_key(hint):
287
- if type(hint) is hinting.DynamicRegisterValueHint:
288
- return (
289
- "dynamic_register_value",
290
- hint.pc,
291
- not hint.use,
292
- hint.color,
293
- hint.new,
294
- hint.message,
295
- hint.reg_name,
296
- )
297
- if type(hint) is hinting.DynamicMemoryValueHint:
298
- return (
299
- "dynamic_memory_value",
300
- hint.pc,
301
- not hint.use,
302
- hint.color,
303
- hint.new,
304
- hint.message,
305
- hint.base,
306
- hint.index,
307
- hint.scale,
308
- hint.offset,
309
- )
310
- if type(hint) is hinting.MemoryUnavailableHint:
311
- return (
312
- "memory_unavailable",
313
- hint.pc,
314
- hint.size,
315
- hint.message,
316
- hint.base_reg_name,
317
- hint.index_reg_name,
318
- hint.offset,
319
- hint.scale,
320
- )
321
- if type(hint) is hinting.EmulationException:
322
- return (
323
- "emulation_exception",
324
- hint.pc,
325
- hint.instruction_num,
326
- hint.exception,
327
- )
328
-
329
- all_hint_keys = set([])
330
- hk_exemplar = {}
331
- for hint_list in hint_list_list:
332
- for hint in hint_list:
333
- hk = hint_key(hint)
334
- all_hint_keys.add(hk)
335
- # keep one exemplar
336
- if hk not in hk_exemplar:
337
- hk_exemplar[hk] = hint
338
-
339
- # import pdb
340
- # pdb.set_trace()
341
- hint_keys_sorted = sorted(list(all_hint_keys))
342
-
343
- # given the equivalence classes established by `hint_key`, determine
344
- # which of those were observed in each micro-execution
345
- hk_observed: typing.Dict[
346
- int, typing.Set[typing.Tuple[int, bool, str, bool, str, str, str, int, int]]
347
- ] = {}
348
- for me in range(self.num_micro_executions):
349
- hk_observed[me] = set([])
350
- for hint in hint_list_list[me]:
351
- # this hint key was observed in micro execution me
352
- hk_observed[me].add(hint_key(hint))
353
-
354
- # estimate "probability" of observing a hint in an equiv class as
355
- # fraction of micro executions in which it was observed at least once
356
- hk_c = {}
357
- for hk in hint_keys_sorted:
358
- hk_c[hk] = 0
359
- for me in range(self.num_micro_executions):
360
- for hk2 in hk_observed[me]:
361
- if hk == hk2:
362
- hk_c[hk] += 1
363
-
364
- for hk in hint_keys_sorted:
365
- prob = (float(hk_c[hk])) / self.num_micro_executions
366
- assert prob <= 1.0
367
- hint = hk_exemplar[hk]
368
-
369
- if type(hint) is hinting.DynamicRegisterValueHint:
370
- hinter.info(
371
- hinting.DynamicRegisterValueProbHint(
372
- # instruction=hint.instruction,
373
- pc=hint.pc,
374
- reg_name=hint.reg_name,
375
- color=hint.color,
376
- size=hint.size,
377
- use=hint.use,
378
- new=hint.new,
379
- prob=prob,
380
- message=hint.message + "-prob",
381
- )
382
- )
383
- if type(hint) is hinting.DynamicMemoryValueHint:
384
- hinter.info(
385
- hinting.DynamicMemoryValueProbHint(
386
- # instruction=hint.instruction,
387
- pc=hint.pc,
388
- size=hint.size,
389
- base=hint.base,
390
- index=hint.index,
391
- scale=hint.scale,
392
- offset=hint.offset,
393
- color=hint.color,
394
- use=hint.use,
395
- new=hint.new,
396
- prob=prob,
397
- message=hint.message + "-prob",
398
- )
399
- )
400
- if type(hint) is hinting.MemoryUnavailableHint:
401
- hinter.info(
402
- hinting.MemoryUnavailableProbHint(
403
- is_read=hint.is_read,
404
- size=hint.size,
405
- base_reg_name=hint.base_reg_name,
406
- index_reg_name=hint.index_reg_name,
407
- offset=hint.offset,
408
- scale=hint.scale,
409
- pc=hint.pc,
410
- prob=prob,
411
- message=hint.message + "-prob",
412
- )
413
- )
196
+ conc = operand.concretize(emu)
197
+ color = self._concrete_val_to_color(conc, sz)
198
+ tup = (operand, conc, color, sz)
199
+ rws.append(tup)
200
+ rws.sort(key=lambda e: e[0].__repr__())
201
+ if len(rws) == 0:
202
+ return
203
+ for rw in rws:
204
+ (operand, conc, color, sz) = rw
205
+ if color == BAD_COLOR:
206
+ pass
207
+ else:
208
+ self._check_color(emu, is_read, rw, sw_insn, te.ic)
209
+ # self.update_shadow(emu, pc, is_read, rw)
210
+ if is_read:
211
+ self.reads = rws
212
+
213
+ def before_instruction_cb(emu, pc, te):
214
+ check_rws(emu, pc, te, True)
215
+
216
+ def after_instruction_cb(emu, pc, te):
217
+ check_rws(emu, pc, te, False)
218
+
219
+ self.colors = {}
220
+ self.shadow_register = {}
221
+ self.shadow_memory = {}
222
+ traceA = TraceExecution(self.hinter, num_insns=self.num_insns)
223
+ traceA.register_cb(
224
+ TraceExecutionCBPoint.BEFORE_INSTRUCTION, before_instruction_cb
225
+ )
226
+ traceA.register_cb(
227
+ TraceExecutionCBPoint.AFTER_INSTRUCTION, after_instruction_cb
228
+ )
229
+ traceA.run(machine)
230
+
231
+ # NOTE: Please keep this code
232
+ # if False:
233
+ # print("digraph{")
234
+ # print(" rankdir=LR")
235
+ # pc2nodeid = {}
236
+ # nodeid2pc = {}
237
+ # nodeids = set([])
238
+
239
+ # def add_pc(pc):
240
+ # if pc not in pc2nodeid:
241
+ # nodeid = f"node_{len(nodeids)}"
242
+ # nodeids.add(nodeid)
243
+ # pc2nodeid[pc] = nodeid
244
+ # nodeid2pc[nodeid] = pc
245
+
246
+ # for pc1 in self.edge.keys():
247
+ # add_pc(pc1)
248
+ # for pc2 in self.edge[pc1].keys():
249
+ # add_pc(pc2)
250
+ # for nodeid in nodeids:
251
+ # print(f'{nodeid} [label="0x{nodeid2pc[nodeid]:x}"]')
252
+ # for pc1 in self.edge.keys():
253
+ # for pc2 in self.edge[pc1].keys():
254
+ # (lab, conc, color) = self.edge[pc1][pc2]
255
+ # n1 = pc2nodeid[pc1]
256
+ # n2 = pc2nodeid[pc2]
257
+ # print(f'{n1} -> {n2} [label="{lab}"]')
258
+ # print("}")
414
259
 
415
260
  def _concrete_val_to_color(
416
261
  self, concrete_value: typing.Union[int, bytes, bytearray], size: int
417
- ) -> str:
262
+ ) -> int:
418
263
  # this concrete value can be an int (if it came from a register)
419
264
  # or bytes (if it came from memory read)
420
265
  # we want these in a common format so that we can see them as colors
@@ -433,225 +278,102 @@ class Colorizer(analysis.Analysis):
433
278
  the_bytes = concrete_value
434
279
  else:
435
280
  assert 1 == 0
436
- return base64.b64encode(the_bytes).decode()
437
-
438
- def _randomize_registers(self) -> None:
439
- for reg in self.orig_cpu:
440
- # only colorize the "regular" registers
441
- if (type(reg) is not state.Register) or (
442
- reg.name not in self.orig_cpu.get_general_purpose_registers()
443
- ):
444
- continue
445
- orig_val = self.emu.read_register(reg.name)
446
- logger.debug(f"_randomize_registers {reg.name} orig_val={orig_val:x}")
447
- # if reg.name == "rip" or reg.name == "rsp":
448
- # import pdb
449
- # pdb.set_trace()
450
- new_val = 0
451
- bc = 0
452
- for i in range(0, reg.size):
453
- new_val = new_val << 8
454
- if (
455
- reg.name in self.emu.initialized_registers
456
- and i in self.emu.initialized_registers[reg.name]
457
- ):
458
- bs = 8 * (reg.size - i - 1)
459
- b = (orig_val >> bs) & 0xFF
460
- # b = (orig_val >> (i * 8)) & 0xFF
461
- new_val |= b
462
- else:
463
- new_val |= random.randint(0, 255)
464
- bc += 1
465
- if bc == 0:
466
- logger.debug(
467
- f"Not colorizing register {reg.name} since it is already fully initialized with {orig_val:x}"
468
- )
469
- else:
470
- # make sure to update cpu as well as emu not sure why
471
- self.emu.write_register(reg.name, new_val)
472
- setattr(self.cpu, reg.name, new_val)
473
- logger.debug(
474
- f"Colorized {bc} bytes in register {reg.name}, old value was {orig_val:x} new is {new_val:x}"
475
- )
476
-
477
- # helper for read/write unavailable hint
478
- def _mem_unavailable_hint(
479
- self,
480
- operand: typing.Optional[BSIDMemoryReferenceOperand],
481
- pc: int,
482
- exec_num: int,
483
- insn_num: int,
484
- is_read: bool,
485
- ) -> hinting.Hint:
486
- (base_name, base_val) = ("None", 0)
487
- (index_name, index_val) = ("None", 0)
488
- (operand_size, operand_scale, operand_offset, operand_address) = (0, 0, 0, 0)
489
- if operand:
490
- operand_size = operand.size
491
- operand_scale = operand.scale
492
- operand_offset = operand.offset
493
- operand_address = operand.address(self.emu)
494
- if operand.base is not None:
495
- base_val = self.emu.read_register(operand.base)
496
- base_name = operand.base
497
- if operand.index is not None:
498
- index_val = self.emu.read_register(operand.index)
499
- index_name = operand.index
500
- hint = hinting.MemoryUnavailableHint(
501
- is_read=is_read,
502
- size=operand_size,
503
- base_reg_name=base_name,
504
- base_reg_val=base_val,
505
- index_reg_name=index_name,
506
- index_reg_val=index_val,
507
- offset=operand_offset,
508
- scale=operand_scale,
509
- address=operand_address,
510
- pc=pc,
511
- micro_exec_num=exec_num,
512
- instruction_num=insn_num,
513
- message="mem_unavailable",
514
- )
515
- hinter.debug(hint)
516
- return hint
517
-
518
- def _get_color_num(self, color: str) -> int:
519
- (_, _, _, _, color_num) = self.colors[color]
520
- return color_num
281
+ # let's make color a number
282
+ if size == 8:
283
+ return struct.unpack("<Q", the_bytes)[0]
284
+ if size == 4:
285
+ return struct.unpack("<L", the_bytes)[0]
286
+ if size == 2:
287
+ return struct.unpack("<H", the_bytes)[0]
288
+ assert size == 1
289
+ return struct.unpack("<B", the_bytes)[0]
521
290
 
522
291
  def _add_color(
523
292
  self,
524
- color: str,
293
+ color: int,
525
294
  operand: Operand,
526
295
  insn: Instruction,
527
- exec_num: int,
528
296
  insn_num: int,
529
297
  ) -> None:
530
- self.colors[color] = (operand, exec_num, insn_num, insn, 1 + len(self.colors))
298
+ self.colors[color] = (operand, insn_num, insn, 1 + len(self.colors))
531
299
 
532
- def _check_colors_instruction_reads(
300
+ def _check_color(
533
301
  self,
534
- reads: typing.List[typing.Tuple[Operand, str, int]],
302
+ emu,
303
+ is_read: bool,
304
+ rw, #: typing.Union[Operand, int, int],
535
305
  insn: Instruction,
536
- exec_num: int,
537
306
  insn_num: int,
538
- hint_list: typing.List[hinting.Hint],
539
307
  ):
540
- # import pdb
541
- # pdb.set_trace()
542
- for operand, color, operand_size in reads:
543
- if color in self.colors.keys():
308
+ (operand, conc, color, operand_size) = rw
309
+ if color in self.colors.keys():
310
+ # previously observed color
311
+ if is_read:
544
312
  # read-flow: use of a previously recorded color value
545
- hint = self._dynamic_value_hint(
546
- operand,
547
- operand_size,
548
- color,
549
- insn,
550
- True,
551
- False,
552
- exec_num,
553
- insn_num,
554
- "read-flow",
555
- )
556
- hinter.debug(hint)
557
- hint_list.append(hint)
313
+ msg = "read-flow"
558
314
  else:
559
- # red-def: use of a NOT previously recorded color value. As
560
- # long as the value is something reasonable, we'll record it as
561
- # a new color
562
- self._add_color(color, operand, insn, exec_num, insn_num)
563
- # logger.info(
564
- # f"new color {color} color_num {self._get_color_num(color)} instruction [{insn}] operand {operand}"
565
- # )
566
- hint = self._dynamic_value_hint(
567
- operand,
568
- operand_size,
569
- color,
570
- insn,
571
- True,
572
- True,
573
- exec_num,
574
- insn_num,
575
- "read-def",
576
- )
577
- hinter.debug(hint)
578
- hint_list.append(hint)
579
-
580
- def _check_colors_instruction_writes(
581
- self,
582
- writes: typing.List[typing.Tuple[Operand, str, int]],
583
- insn: Instruction,
584
- exec_num: int,
585
- insn_num: int,
586
- hint_list: typing.List[hinting.Hint],
587
- ):
588
- # NB: This should be called *AFTER the instruction emulates!
589
- for operand, color, operand_size in writes:
590
- if color in self.colors.keys():
591
315
  # write of a previously seen value
592
316
  # ... its just a copy so no hint, right?
593
- hint = self._dynamic_value_hint(
594
- operand,
595
- operand_size,
596
- color,
597
- insn,
598
- False,
599
- False,
600
- exec_num,
601
- insn_num,
602
- "write-copy",
603
- )
604
- hinter.debug(hint)
605
- hint_list.append(hint)
606
- pass
607
- else:
608
- # write-def: write of a NOT previously recorded color value as
317
+ msg = "write-copy"
318
+ hint = self._dynamic_value_hint(
319
+ emu,
320
+ operand,
321
+ operand_size,
322
+ color,
323
+ insn,
324
+ is_read,
325
+ False,
326
+ insn_num,
327
+ msg,
328
+ )
329
+ self.hinter.send(hint)
330
+ else:
331
+ # new color
332
+ self._add_color(color, operand, insn, insn_num)
333
+ if is_read:
334
+ # read-def: use of a NOT previously recorded color value. As
609
335
  # long as the value is something reasonable, we'll record it as
610
336
  # a new color
611
- self._add_color(color, operand, insn, exec_num, insn_num)
612
- # logger.info(
613
- # f"new color {color} color_num {self._get_color_num(color)} instruction [{insn}] operand {operand}"
614
- # )
615
- hint = self._dynamic_value_hint(
616
- operand,
617
- operand_size,
618
- color,
619
- insn,
620
- False,
621
- True,
622
- exec_num,
623
- insn_num,
624
- "write-def",
625
- )
626
- hinter.debug(hint)
627
- hint_list.append(hint)
337
+ msg = "read-def"
338
+ else:
339
+ msg = "write-def"
340
+ hint = self._dynamic_value_hint(
341
+ emu,
342
+ operand,
343
+ operand_size,
344
+ color,
345
+ insn,
346
+ is_read,
347
+ True,
348
+ insn_num,
349
+ msg,
350
+ )
351
+ self.hinter.send(hint)
628
352
 
629
353
  def _dynamic_value_hint(
630
354
  self,
355
+ emu,
631
356
  operand: Operand,
632
357
  size: int,
633
- color: str,
358
+ color: int,
634
359
  insn: Instruction,
635
360
  is_use: bool,
636
361
  is_new: bool,
637
- exec_num: int,
638
362
  insn_num: int,
639
363
  message: str,
640
364
  ):
641
365
  pc = insn.address
642
- color_num = self._get_color_num(color)
643
366
  if type(operand) is RegisterOperand:
644
367
  return hinting.DynamicRegisterValueHint(
645
368
  reg_name=operand.name,
646
369
  size=size,
647
- color=color_num,
370
+ color=color,
648
371
  dynamic_value=color,
649
372
  use=is_use,
650
373
  new=is_new,
651
- # instruction=insn,
652
374
  pc=pc,
653
- micro_exec_num=exec_num,
654
375
  instruction_num=insn_num,
376
+ exec_id=self.exec_id,
655
377
  message=message,
656
378
  )
657
379
  elif type(operand) is BSIDMemoryReferenceOperand:
@@ -662,21 +384,66 @@ class Colorizer(analysis.Analysis):
662
384
  if operand.index is not None:
663
385
  index_name = operand.index
664
386
  return hinting.DynamicMemoryValueHint(
665
- address=operand.address(self.emu),
387
+ address=operand.address(emu),
666
388
  base=base_name,
667
389
  index=index_name,
668
390
  scale=operand.scale,
669
391
  offset=operand.offset,
670
- color=color_num,
392
+ color=color,
671
393
  dynamic_value=color,
672
394
  size=operand.size,
673
395
  use=is_use,
674
396
  new=is_new,
675
- # instruction=insn,
676
397
  pc=pc,
677
- micro_exec_num=exec_num,
678
398
  instruction_num=insn_num,
399
+ exec_id=self.exec_id,
679
400
  message=message,
680
401
  )
681
402
  else:
682
403
  assert 1 == 0
404
+
405
+ # def update_shadow(self, emu, pc, is_read, rw):
406
+ # (operand, conc, color, operand_size) = rw
407
+
408
+ # if type(operand) is RegisterOperand:
409
+ # r = self.pdef.registers[operand.name]
410
+ # if type(r) is platforms.RegisterAliasDef:
411
+ # base_reg = r.parent
412
+ # start = r.offset
413
+ # else:
414
+ # base_reg = r.name
415
+ # start = 0
416
+ # if base_reg not in self.shadow_register:
417
+ # self.shadow_register[base_reg] = {}
418
+ # shad = self.shadow_register[base_reg]
419
+ # end = start + r.size
420
+ # lab = f"reg({r.name})"
421
+ # else:
422
+ # start = operand.address(emu)
423
+ # shad = self.shadow_memory
424
+ # end = start + operand.size
425
+ # lab = f"mem({start:x},{operand.size})"
426
+
427
+ # if is_read:
428
+ # # read. check labels on things we are reading to deduce flows
429
+ # fs = set([])
430
+ # for o in range(start, end):
431
+ # if o in shad:
432
+ # (pc_from, is_read, conc_from) = shad[o]
433
+ # if is_read:
434
+ # f = f"{lab} r->r"
435
+ # else:
436
+ # f = f"{lab} w->r"
437
+ # f += f" flow from pc={pc_from:x} to pc={pc:x} conc={conc} conc_from={conc_from} color={color}"
438
+ # if f not in fs:
439
+ # logger.info(f)
440
+ # if pc_from not in self.edge:
441
+ # self.edge[pc_from] = {}
442
+ # if pc not in self.edge[pc_from]:
443
+ # self.edge[pc_from][pc] = (lab, conc, color)
444
+ # # self.edge.add((pc_from, lab, pc, conc, color))
445
+ # fs.add(f)
446
+ # else:
447
+ # # write. we are overwriting things so no reason to check on bytes before doing so
448
+ # for o in range(start, end):
449
+ # shad[o] = (pc, is_read, conc)