metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,31 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ #
7
+ # here we build a simple a.out executable
8
+ #
9
+
10
+ require 'metasm'
11
+
12
+ Metasm::AOut.assemble(Metasm::Ia32.new, <<EOS).encode_file('m-a.out')
13
+ .text
14
+ .entrypoint
15
+ mov eax, 4
16
+ mov ebx, 1
17
+
18
+ .data
19
+ str db "kikoo\\n"
20
+ strend:
21
+
22
+ .text
23
+ mov ecx, str
24
+ mov edx, strend - str
25
+ int 80h // linux sys_write
26
+
27
+ mov eax, 1
28
+ mov ebx, 42
29
+ int 80h // linux sys_exit
30
+ ret
31
+ EOS
@@ -0,0 +1,77 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ # this script show the assembler syntax understood by the framework
7
+
8
+ require 'metasm'
9
+
10
+ edata = Metasm::Shellcode.assemble(Metasm::Ia32.new, <<EOS).encoded
11
+ #line 12 // preprocessor directive (useful in case of syntax error)
12
+
13
+ // data specification
14
+ db 42h ; a single byte
15
+ db 'a' ; same thing
16
+ dd 0x48, 0x7, 4 dup(0x19) ; 6 double words
17
+ dd toto + 4*pre_pad ; data may refer to labels in arbitrary expressions
18
+ db "foo", 0 ; null-terminated string
19
+ dw "foo", 0 ; null-terminated wide string
20
+ dd 0b010111101100 ; binary constant
21
+ dd someexternalvar+12 ; an external variable to be fixed up later
22
+
23
+ // code
24
+ inc ebx
25
+ jmp toto
26
+ mov eax, [fs:ebx + ((kikoo<<1) - 4*lol)] ; all immediate values can be an arbitrary arithmetic/logic expression
27
+ push.i16 0x1234 ; specific opcode forms are defined using this kind of syntax
28
+
29
+ // labels
30
+ pre_pad:
31
+ // parser instructions
32
+ .pad 90h ; this statement will be replaced by the right number of 0x90 to honor the next .offset directive
33
+ post_pad:
34
+
35
+ toto:
36
+ .offset 74 + (12-48>>4)*0 ; we are now at 74 bytes from the beginning of the shellcode (db 42h)
37
+ ; .offset accepts an arbitrary expression
38
+
39
+ .padto toto+38, db 3 dup(0b0110_0110) ; fill space with the specified data structure until 38 bytes after toto (same as .pad + .offset)
40
+
41
+ inc eax
42
+
43
+ .align 16, dw foobar + 42
44
+
45
+ local1:
46
+ 1: // a local label (any integer allowed)
47
+ jmp 1b // 1b => last '1' label (same as local1)
48
+ jmp 1f // 1f => next '1' label (same as local2)
49
+
50
+ local2:
51
+ 1: // local labels can be redefined as often as needed
52
+ mov eax, 1b // same as local2
53
+
54
+ ret
55
+
56
+ #ifdef BLABLA
57
+ you can also use any preprocessor directive (gcc-like syntax)
58
+ #pragma include_dir "/some/directory"
59
+ #include <foobar>
60
+ # elif defined(HOHOHO) && 42
61
+ #error 'infamous error message'
62
+ #else
63
+ #define test(ic) ((ic) - \
64
+ 4)
65
+ #endif
66
+
67
+ EOS
68
+
69
+ edata.fixup 'foobar' => 1 # fixup the value of 'foobar'
70
+ newdata = 'somestring'
71
+ edata.patch 'pre_pad', 'post_pad', newdata # replace the (beginning of the) segment beetween the labels by a string
72
+ #edata.patch 'pre_pad', 'post_pad', 'waaaaaaaaaay tooooooooooooooooooooooooooooooooooooooooo big !!!!' # raise an error
73
+
74
+ edata.fixup 'kikoo' => 8, 'lol' => 42 # fixup the immediate values
75
+ edata.fixup 'someexternalvar' => 0x30303030 # fixup the external used in the data segment
76
+
77
+ p edata.data # show the resulting raw string
@@ -0,0 +1,555 @@
1
+ #!/usr/bin/env ruby
2
+ # This file is part of Metasm, the Ruby assembly manipulation suite
3
+ # Copyright (C) 2006-2009 Yoann GUILLOT
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+
8
+ # This sample implements a trivial binary diffing algorithm between two programs
9
+ # the programs have first to be disassembled, and then the diff algorith will
10
+ # (try to) identify identical functions in both dasm graphs
11
+ # Currently there is NO fuzzy matching whatsoever, so the function graphs have to
12
+ # be exactly the same in both programs to be recognized.
13
+ # You can still force a comparaison between two functions, but the results will be bad.
14
+ #
15
+ # This file can be run as a standalone application (eg 'ruby bindiff file1 file2')
16
+ # or as a disassembler plugin (see dasm-plugin/bindiff)
17
+
18
+ require 'metasm'
19
+
20
+ module ::Metasm
21
+ class BinDiffWidget < Gui::DrawableWidget
22
+ attr_accessor :status
23
+
24
+ COLORS = { :same => '8f8', :similar => 'cfc', :badarg => 'fcc', :badop => 'faa', :default => '888' }
25
+
26
+ def initialize_widget(d1=nil, d2=nil)
27
+ self.dasm1 = d1 if d1
28
+ self.dasm2 = d2 if d2
29
+ @status = nil
30
+ end
31
+
32
+ def dasm1; @dasm1 end
33
+ def dasm1=(d)
34
+ @dasm1 = d
35
+ @func1 = nil
36
+ @funcstat1 = nil
37
+ @dasmcol1 = {}
38
+ @dasm1.gui.bg_color_callback = lambda { |a1| COLORS[@dasmcol1[a1] || :default] }
39
+ @match_func = nil
40
+ end
41
+
42
+ def dasm2; @dasm2 end
43
+ def dasm2=(d)
44
+ @dasm2 = d
45
+ @func2 = nil
46
+ @funcstat1 = nil
47
+ @dasmcol2 = {}
48
+ @dasm2.gui.bg_color_callback = lambda { |a2| COLORS[@dasmcol2[a2] || :default] }
49
+ @match_func = nil
50
+ end
51
+
52
+ def curaddr1; @dasm1.gui.curaddr end
53
+ def curaddr2; @dasm2.gui.curaddr end
54
+ def curfunc1; @dasm1.find_function_start(curaddr1) end
55
+ def curfunc2; @dasm2.find_function_start(curaddr2) end
56
+ def func1; @func1 ||= set_status('funcs 1') { create_funcs(@dasm1) } end
57
+ def func2; @func2 ||= set_status('funcs 2') { create_funcs(@dasm2) } end
58
+ def funcstat1; @funcstat1 ||= set_status('func stats 1') { create_funcs_stats(func1, @dasm1) } end
59
+ def funcstat2; @funcstat2 ||= set_status('func stats 2') { create_funcs_stats(func2, @dasm2) } end
60
+
61
+ def paint
62
+ draw_string_color(:black, @font_width, 3*@font_height, @status || 'idle')
63
+ end
64
+
65
+ def gui_update
66
+ @dasm1.gui.gui_update rescue nil
67
+ @dasm2.gui.gui_update rescue nil
68
+ redraw
69
+ end
70
+
71
+ def set_status(st=nil)
72
+ ost = @status
73
+ @status = st
74
+ redraw
75
+ if block_given?
76
+ ret = protect { yield }
77
+ set_status ost
78
+ ret
79
+ end
80
+ end
81
+
82
+ def keypress(key)
83
+ case key
84
+ when ?A
85
+ keypress(?D)
86
+ keypress(?f)
87
+ keypress(?i)
88
+ when ?D
89
+ disassemble_all
90
+ when ?c
91
+ disassemble
92
+ when ?C
93
+ disassemble(:disassemble_fast)
94
+ when ?f
95
+ funcstat1
96
+ funcstat2
97
+ when ?g
98
+ inputbox('address to go', :text => Expression[@dasm1.gui.curaddr]) { |v|
99
+ @dasm1.gui.focus_addr_autocomplete(v)
100
+ @dasm2.gui.focus_addr_autocomplete(v)
101
+ }
102
+ when ?M
103
+ show_match_funcs
104
+ when ?m
105
+ match_one_func(curfunc1, curfunc2)
106
+
107
+ when ?r
108
+ puts 'reload'
109
+ load __FILE__
110
+ gui_update
111
+
112
+ when ?Q
113
+ Gui.main_quit
114
+ end
115
+ end
116
+
117
+ def keypress_ctrl(key)
118
+ case key
119
+ when ?C
120
+ disassemble(:disassemble_fast_deep)
121
+ when ?r
122
+ inputbox('code to eval') { |c| messagebox eval(c).inspect[0, 512], 'eval' }
123
+ end
124
+ end
125
+
126
+ def disassemble_all
127
+ @func1 = @func2 = @funcstat1 = @funcstat2 = nil
128
+ @dasm1.load_plugin 'dasm_all'
129
+ @dasm2.load_plugin 'dasm_all'
130
+ set_status('dasm_all 1') { @dasm1.dasm_all_section '.text' }
131
+ set_status('dasm_all 2') { @dasm2.dasm_all_section '.text' }
132
+ gui_update
133
+ end
134
+
135
+ def disassemble(method=:disassemble)
136
+ @func1 = @func2 = @funcstat1 = @funcstat2 = nil
137
+ set_status('dasm 1') {
138
+ @dasm1.send(method, curaddr1)
139
+ @dasm1.gui.focus_addr(curaddr1, :graph)
140
+ }
141
+ set_status('dasm 2') {
142
+ @dasm2.send(method, curaddr2)
143
+ @dasm2.gui.focus_addr(curaddr2, :graph)
144
+ }
145
+ gui_update
146
+ end
147
+
148
+
149
+ def show_match_funcs
150
+ match_funcs
151
+
152
+ gui_update
153
+ Gui.main_iter
154
+ list = [['addr 1', 'addr 2', 'score']]
155
+ f1 = func1.keys
156
+ f2 = func2.keys
157
+ match_funcs.each { |a1, (a2, s)|
158
+ list << [(@dasm1.get_label_at(a1) || Expression[a1]), (@dasm2.get_label_at(a2) || Expression[a2]), '%.4f' % s]
159
+ f1.delete a1
160
+ f2.delete a2
161
+ }
162
+ f1.each { |a1| list << [(@dasm1.get_label_at(a1) || Expression[a1]), '?', 'nomatch'] }
163
+ f2.each { |a2| list << ['?', (@dasm2.get_label_at(a2) || Expression[a2]), 'nomatch'] }
164
+ listwindow("matches", list) { |i| @dasm1.gui.focus_addr i[0], nil, true ; @dasm2.gui.focus_addr i[1], nil, true }
165
+ end
166
+
167
+ # func addr => { funcblock => list of funcblock to }
168
+ def create_funcs(dasm)
169
+ f = {}
170
+ dasm.entrypoints.to_a.each { |ep| dasm.function[ep] ||= DecodedFunction.new }
171
+ dasm.function.each_key { |a|
172
+ next if not dasm.di_at(a)
173
+ f[a] = create_func(dasm, a)
174
+ Gui.main_iter
175
+ }
176
+ f
177
+ end
178
+
179
+ def create_func(dasm, a)
180
+ h = {}
181
+ todo = [a]
182
+ while a = todo.pop
183
+ next if h[a]
184
+ h[a] = []
185
+ dasm.decoded[a].block.each_to_samefunc(dasm) { |ta|
186
+ next if not dasm.di_at(ta)
187
+ todo << ta
188
+ h[a] << ta
189
+ }
190
+ end
191
+ h
192
+ end
193
+
194
+ def create_funcs_stats(f, dasm)
195
+ fs = {}
196
+ f.each { |a, g|
197
+ fs[a] = create_func_stats(dasm, a, g)
198
+ Gui.main_iter
199
+ }
200
+ fs
201
+ end
202
+
203
+ def create_func_stats(dasm, a, g)
204
+ s = {}
205
+ s[:blocks] = g.length
206
+
207
+ s[:edges] = 0 # nr of edges
208
+ s[:leaves] = 0 # nr of nodes with no successor
209
+ s[:ext_calls] = 0 # nr of jumps out_of_func
210
+ s[:loops] = 0 # nr of jump back
211
+
212
+ todo = [a]
213
+ done = []
214
+ while aa = todo.pop
215
+ next if done.include? aa
216
+ done << aa
217
+ todo.concat g[aa]
218
+
219
+ s[:edges] += g[aa].length
220
+ s[:leaves] += 1 if g[aa].empty?
221
+ dasm.decoded[aa].block.each_to_otherfunc(dasm) { s[:ext_calls] += 1 }
222
+ end
223
+
224
+ # loop detection
225
+ # find the longest distance to the root w/o loops
226
+ g = g.dup
227
+ while eliminate_one_loop(a, g)
228
+ s[:loops] += 1
229
+ end
230
+
231
+ s
232
+ end
233
+
234
+ def eliminate_one_loop(a, g)
235
+ stack = []
236
+ index = {}
237
+ reach_index = {}
238
+ done = false
239
+
240
+ curindex = 0
241
+
242
+ trajan = lambda { |e|
243
+ index[e] = curindex
244
+ reach_index[e] = curindex
245
+ curindex += 1
246
+ stack << e
247
+ g[e].each { |ne|
248
+ if not index[ne]
249
+ trajan[ne]
250
+ break if done
251
+ reach_index[e] = [reach_index[e], reach_index[ne]].min
252
+ elsif stack.include? ne
253
+ reach_index[e] = [reach_index[e], reach_index[ne]].min
254
+ end
255
+ }
256
+ break if done
257
+ if index[e] == reach_index[e]
258
+ if (e == stack.last and not g[e].include? e)
259
+ stack.pop
260
+ next
261
+ end
262
+ # e is the entry in the loop, cut the loop here
263
+ tail = reach_index.keys.find { |ee| reach_index[ee] == index[e] and g[ee].include? e }
264
+ g[tail] -= [e] # patch g, but don't modify the original g value (ie -= instead of delete)
265
+ done = true # one loop found & removed, try again
266
+ end
267
+ }
268
+
269
+ trajan[a]
270
+ done
271
+ end
272
+
273
+ def rematch_funcs
274
+ @match_funcs = nil
275
+ match_funcs
276
+ end
277
+
278
+ def match_funcs
279
+ @match_funcs ||= {}
280
+
281
+ layout_match = {}
282
+
283
+ set_status('match func layout') {
284
+ funcstat1.each { |a, s|
285
+ next if @match_funcs[a]
286
+ layout_match[a] = []
287
+ funcstat2.each { |aa, ss|
288
+ layout_match[a] << aa if s == ss
289
+ }
290
+ Gui.main_iter
291
+ }
292
+ }
293
+
294
+ set_status('match funcs') {
295
+ # refine the layout matching with actual function matching
296
+ already_matched = []
297
+ match_score = {}
298
+ layout_match.each { |f1, list|
299
+ puts "matching #{Expression[f1]}" if $VERBOSE
300
+ begin
301
+ f2 = (list - already_matched).sort_by { |f| match_func(f1, f, false, false) }.first
302
+ if f2
303
+ already_matched << f2
304
+ score = match_func(f1, f2)
305
+ @match_funcs[f1] = [f2, score]
306
+ end
307
+ rescue Interrupt
308
+ puts 'abort this one'
309
+ sleep 0.2 # allow a 2nd ^c do escalate
310
+ end
311
+ Gui.main_iter
312
+ }
313
+ }
314
+
315
+ puts "matched #{@match_funcs.length} - unmatched #{func1.length - @match_funcs.length}"
316
+ @match_funcs
317
+ end
318
+
319
+ def match_one_func(a1, a2)
320
+ s = match_func(a1, a2)
321
+ puts "match score: #{s}"
322
+ @match_funcs ||= {}
323
+ @match_funcs[a1] = [a2, s]
324
+ gui_update
325
+ end
326
+
327
+ # return how much match a func in d1 and a func in d2
328
+ def match_func(a1, a2, do_colorize=true, verb=true)
329
+ f1 = func1[a1]
330
+ f2 = func2[a2]
331
+ raise "dasm1 has no function at #{Expression[a1]}" if not f1
332
+ raise "dasm2 has no function at #{Expression[a2]}" if not f2
333
+ todo1 = [a1]
334
+ todo2 = [a2]
335
+ done1 = []
336
+ done2 = []
337
+ score = 0.0 # average of the (local best) match_block scores
338
+ score += 0.01 if @dasm1.get_label_at(a1) != @dasm2.get_label_at(a2) # for thunks
339
+ score_div = [f1.length, f2.length].max.to_f
340
+ # XXX this is stupid and only good for perfect matches (and even then it may fail)
341
+ # TODO handle block split etc (eg instr-level diff VS block-level)
342
+ while a1 = todo1.shift
343
+ next if done1.include? a1
344
+ t = todo2.map { |a| [a, match_block(@dasm1.decoded[a1].block, @dasm2.decoded[a].block)] }
345
+ a2 = t.sort_by { |a, s| s }.first
346
+ if not a2
347
+ break
348
+ end
349
+ score += a2[1] / score_div
350
+ a2 = a2[0]
351
+ done1 << a1
352
+ done2 << a2
353
+ todo1.concat f1[a1]
354
+ todo2.concat f2[a2]
355
+ todo2 -= done2
356
+ colorize_blocks(a1, a2) if do_colorize
357
+ end
358
+
359
+ score += (f1.length - f2.length).abs * 3 / score_div # block count difference -> +3 per block
360
+
361
+ score
362
+ end
363
+
364
+ def match_block(b1, b2)
365
+ # 0 = perfect match (same opcodes, same args)
366
+ # 1 = same opcodes, same arg type
367
+ # 2 = same opcodes, diff argtypes
368
+ # 3 = some opcode difference
369
+ # 4 = full block difference
370
+ score = 0
371
+ score_div = [b1.list.length, b2.list.length].max.to_f
372
+ common_start = 0
373
+ common_end = 0
374
+
375
+ # basic diff-style: compare start while it's good, then end, then whats left
376
+ # should handle most simples cases well
377
+ len = [b1.list.length, b2.list.length].min
378
+ while common_start < len and (s = match_instr(b1.list[common_start], b2.list[common_start])) <= 1
379
+ score += s / score_div
380
+ common_start += 1
381
+ end
382
+
383
+ while common_start+common_end < len and (s = match_instr(b1.list[-1-common_end], b2.list[-1-common_end])) <= 1
384
+ score += s / score_div
385
+ common_end += 1
386
+ end
387
+
388
+ # TODO improve the middle part matching (allow insertions/suppressions/swapping)
389
+ b1.list[common_start..-1-common_end].zip(b2.list[common_start..-1-common_end]).each { |di1, di2|
390
+ score += match_instr(di1, di2) / score_div
391
+ }
392
+
393
+ yield(common_start, common_end) if block_given? # used by colorize_blocks
394
+
395
+ score += (b1.list.length - b2.list.length).abs * 3 / score_div # instr count difference -> +3 per instr
396
+
397
+ score
398
+ end
399
+
400
+ def colorize_blocks(a1, a2)
401
+ b1 = @dasm1.decoded[a1].block
402
+ b2 = @dasm2.decoded[a2].block
403
+
404
+ common_start = common_end = 0
405
+ match_block(b1, b2) { |a, b| common_start = a ; common_end = b }
406
+
407
+ b1.list[0..-1-common_end].zip(b2.list[0..-1-common_end]).each { |di1, di2|
408
+ next if not di1 or not di2
409
+ @dasmcol1[di1.address] = @dasmcol2[di2.address] = [:same, :similar, :badarg, :badop][match_instr(di1, di2)]
410
+ }
411
+ b1.list[-common_end..-1].zip(b2.list[-common_end..-1]).each { |di1, di2|
412
+ next if not di1 or not di2
413
+ @dasmcol1[di1.address] = @dasmcol2[di2.address] = [:same, :similar, :badarg, :badop][match_instr(di1, di2)]
414
+ }
415
+ end
416
+
417
+ def match_instr(di1, di2)
418
+ if not di1 or not di2 or di1.opcode.name != di2.opcode.name
419
+ 3
420
+ elsif di1.instruction.args.map { |a| a.class } != di2.instruction.args.map { |a| a.class }
421
+ 2
422
+ elsif di1.instruction.to_s.gsub(/loc_\w+/, 'loc_') != di2.instruction.to_s.gsub(/loc_\w+/, 'loc_') # local labels TODO compare blocks targeted
423
+ 1
424
+ else
425
+ 0
426
+ end
427
+ end
428
+
429
+ # show in window 1 the match of the function found in win 2
430
+ def sync1
431
+ c2 = curfunc2
432
+ if a1 = match_funcs.find { |k, (a2, s)| a2 == c2 }
433
+ @dasm1.gui.focus_addr(a1[0])
434
+ end
435
+ end
436
+
437
+ def sync2
438
+ if a2 = match_funcs[curfunc1]
439
+ @dasm2.gui.focus_addr(a2[0])
440
+ end
441
+ end
442
+ end
443
+
444
+ class BinDiffWindow < Gui::Window
445
+ def initialize_window(d1=nil, d2=nil)
446
+ self.widget = BinDiffWidget.new(d1, d2)
447
+ end
448
+
449
+ def build_menu
450
+ menu = new_menu
451
+ addsubmenu(menu, 'load file 1') { openfile('file 1') { |f| loadfile1(f) } }
452
+ addsubmenu(menu, 'load file 2') { openfile('file 2') { |f| loadfile2(f) } }
453
+ addsubmenu(menu)
454
+ addsubmenu(menu, '_disassemble from there', '^C') { widget.disassemble(:disassemble_fast_deep) }
455
+ addsubmenu(menu, 'co_mpare current functions', 'm') { widget.match_one_func(widget.curfunc1, widget.curfunc2) }
456
+ addsubmenu(menu, 'compare all funct_ions', 'M') { widget.show_match_funcs }
457
+ addsubmenu(menu, '_goto', 'g') { widget.keypress ?g }
458
+ addsubmenu(menu)
459
+ addsubmenu(menu, 'sync win 2', '2') { widget.sync2 }
460
+ addsubmenu(menu, 'sync win 1', '1') { widget.sync1 }
461
+ addsubmenu(menu)
462
+ addsubmenu(menu, '_quit', 'Q') { Gui.main_quit }
463
+
464
+ addsubmenu(@menu, '_File', menu)
465
+ end
466
+
467
+ def loadfile1(f)
468
+ exe = AutoExe.orshellcode { Ia32.new }.decode_file(f)
469
+ d = exe.init_disassembler
470
+ Gui::DasmWindow.new("bindiff - 1 - #{f}").display(d)
471
+ widget.dasm1 = d
472
+ end
473
+
474
+ def loadfile2(f)
475
+ exe = AutoExe.orshellcode { Ia32.new }.decode_file(f)
476
+ d = exe.init_disassembler
477
+ Gui::DasmWindow.new("bindiff - 2 - #{f}").display(d)
478
+ widget.dasm2 = d
479
+ end
480
+ end
481
+ end
482
+
483
+ if $0 == __FILE__ and not defined? $bindiff_loaded
484
+ # allow reloading the file for easier diff algorithm test
485
+ $bindiff_loaded = true
486
+
487
+ require 'optparse'
488
+
489
+ $VERBOSE = true
490
+
491
+ # parse arguments
492
+ opts = {}
493
+ OptionParser.new { |opt|
494
+ opt.banner = 'Usage: bindiff.rb [options] <executable> [<entrypoints>]'
495
+ opt.on('-P <plugin>', '--plugin <plugin>', 'load a metasm disassembler plugin') { |h| (opts[:plugin] ||= []) << h }
496
+ opt.on('-e <code>', '--eval <code>', 'eval a ruby code') { |h| (opts[:hookstr] ||= []) << h }
497
+ opt.on('--map1 <mapfile>', 'load a map file (addr <-> name association)') { |f| opts[:map1] = f }
498
+ opt.on('--map2 <mapfile>', 'load a map file (addr <-> name association)') { |f| opts[:map2] = f }
499
+ opt.on('-c <header>', '--c-header <header>', 'read C function prototypes (for external library functions)') { |h| opts[:cheader] = h }
500
+ opt.on('-a', '--autoload', 'loads all relevant files with same filename (.h, .map..)') { opts[:autoload] = true }
501
+ opt.on('-v', '--verbose') { $VERBOSE = true } # default
502
+ opt.on('-q', '--no-verbose') { $VERBOSE = false }
503
+ opt.on('-d', '--debug') { $DEBUG = $VERBOSE = true }
504
+ opt.on('-A', 'match everything on start') { opts[:doit] = true }
505
+ }.parse!(ARGV)
506
+
507
+ if exename1 = ARGV.shift
508
+ w1 = Metasm::Gui::DasmWindow.new("#{exename1} - bindiff1 - metasm disassembler")
509
+ exe1 = w1.loadfile(exename1)
510
+ if opts[:autoload]
511
+ basename1 = exename1.sub(/\.\w\w?\w?$/, '')
512
+ opts[:map1] ||= basename1 + '.map' if File.exist?(basename1 + '.map')
513
+ opts[:cheader] ||= basename1 + '.h' if File.exist?(basename1 + '.h')
514
+ end
515
+ end
516
+
517
+ if exename2 = ARGV.shift
518
+ w2 = Metasm::Gui::DasmWindow.new("#{exename2} - bindiff2 - metasm disassembler")
519
+ exe2 = w2.loadfile(exename2)
520
+ if opts[:autoload]
521
+ basename2 = exename2.sub(/\.\w\w?\w?$/, '')
522
+ opts[:map2] ||= basename2 + '.map' if File.exist?(basename2 + '.map')
523
+ opts[:cheader] ||= basename2 + '.h' if File.exist?(basename2 + '.h')
524
+ end
525
+ end
526
+
527
+ if exe1
528
+ dasm1 = exe1.init_disassembler
529
+ dasm1.load_map opts[:map1] if opts[:map1]
530
+ dasm1.parse_c_file opts[:cheader] if opts[:cheader]
531
+ end
532
+
533
+ if exe2
534
+ dasm2 = exe2.init_disassembler
535
+ dasm2.load_map opts[:map2] if opts[:map2]
536
+ dasm2.parse_c_file opts[:cheader] if opts[:cheader]
537
+ end
538
+
539
+ ep = ARGV.dup
540
+
541
+ w1.dasm_widget.focus_addr ep.first if w1 and not ep.empty?
542
+ w2.dasm_widget.focus_addr ep.first if w2 and not ep.empty?
543
+
544
+ opts[:plugin].to_a.each { |p| dasm1.load_plugin(p) if dasm1 ; dasm2.load_plugin(p) if dasm2 }
545
+ opts[:hookstr].to_a.each { |f| eval f }
546
+
547
+ ep.each { |e| dasm1.disassemble_fast_deep(e) if dasm1 ; dasm2.disassemble_fast_deep(e) if dasm2 }
548
+
549
+ bd = Metasm::BinDiffWindow.new(dasm1, dasm2)
550
+
551
+ bd.widget.keypress ?A if opts[:doit]
552
+
553
+ Metasm::Gui.main
554
+
555
+ end