metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,31 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ #
7
+ # here we build a simple a.out executable
8
+ #
9
+
10
+ require 'metasm'
11
+
12
+ Metasm::AOut.assemble(Metasm::Ia32.new, <<EOS).encode_file('m-a.out')
13
+ .text
14
+ .entrypoint
15
+ mov eax, 4
16
+ mov ebx, 1
17
+
18
+ .data
19
+ str db "kikoo\\n"
20
+ strend:
21
+
22
+ .text
23
+ mov ecx, str
24
+ mov edx, strend - str
25
+ int 80h // linux sys_write
26
+
27
+ mov eax, 1
28
+ mov ebx, 42
29
+ int 80h // linux sys_exit
30
+ ret
31
+ EOS
@@ -0,0 +1,77 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ # this script show the assembler syntax understood by the framework
7
+
8
+ require 'metasm'
9
+
10
+ edata = Metasm::Shellcode.assemble(Metasm::Ia32.new, <<EOS).encoded
11
+ #line 12 // preprocessor directive (useful in case of syntax error)
12
+
13
+ // data specification
14
+ db 42h ; a single byte
15
+ db 'a' ; same thing
16
+ dd 0x48, 0x7, 4 dup(0x19) ; 6 double words
17
+ dd toto + 4*pre_pad ; data may refer to labels in arbitrary expressions
18
+ db "foo", 0 ; null-terminated string
19
+ dw "foo", 0 ; null-terminated wide string
20
+ dd 0b010111101100 ; binary constant
21
+ dd someexternalvar+12 ; an external variable to be fixed up later
22
+
23
+ // code
24
+ inc ebx
25
+ jmp toto
26
+ mov eax, [fs:ebx + ((kikoo<<1) - 4*lol)] ; all immediate values can be an arbitrary arithmetic/logic expression
27
+ push.i16 0x1234 ; specific opcode forms are defined using this kind of syntax
28
+
29
+ // labels
30
+ pre_pad:
31
+ // parser instructions
32
+ .pad 90h ; this statement will be replaced by the right number of 0x90 to honor the next .offset directive
33
+ post_pad:
34
+
35
+ toto:
36
+ .offset 74 + (12-48>>4)*0 ; we are now at 74 bytes from the beginning of the shellcode (db 42h)
37
+ ; .offset accepts an arbitrary expression
38
+
39
+ .padto toto+38, db 3 dup(0b0110_0110) ; fill space with the specified data structure until 38 bytes after toto (same as .pad + .offset)
40
+
41
+ inc eax
42
+
43
+ .align 16, dw foobar + 42
44
+
45
+ local1:
46
+ 1: // a local label (any integer allowed)
47
+ jmp 1b // 1b => last '1' label (same as local1)
48
+ jmp 1f // 1f => next '1' label (same as local2)
49
+
50
+ local2:
51
+ 1: // local labels can be redefined as often as needed
52
+ mov eax, 1b // same as local2
53
+
54
+ ret
55
+
56
+ #ifdef BLABLA
57
+ you can also use any preprocessor directive (gcc-like syntax)
58
+ #pragma include_dir "/some/directory"
59
+ #include <foobar>
60
+ # elif defined(HOHOHO) && 42
61
+ #error 'infamous error message'
62
+ #else
63
+ #define test(ic) ((ic) - \
64
+ 4)
65
+ #endif
66
+
67
+ EOS
68
+
69
+ edata.fixup 'foobar' => 1 # fixup the value of 'foobar'
70
+ newdata = 'somestring'
71
+ edata.patch 'pre_pad', 'post_pad', newdata # replace the (beginning of the) segment beetween the labels by a string
72
+ #edata.patch 'pre_pad', 'post_pad', 'waaaaaaaaaay tooooooooooooooooooooooooooooooooooooooooo big !!!!' # raise an error
73
+
74
+ edata.fixup 'kikoo' => 8, 'lol' => 42 # fixup the immediate values
75
+ edata.fixup 'someexternalvar' => 0x30303030 # fixup the external used in the data segment
76
+
77
+ p edata.data # show the resulting raw string
@@ -0,0 +1,555 @@
1
+ #!/usr/bin/env ruby
2
+ # This file is part of Metasm, the Ruby assembly manipulation suite
3
+ # Copyright (C) 2006-2009 Yoann GUILLOT
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+
8
+ # This sample implements a trivial binary diffing algorithm between two programs
9
+ # the programs have first to be disassembled, and then the diff algorith will
10
+ # (try to) identify identical functions in both dasm graphs
11
+ # Currently there is NO fuzzy matching whatsoever, so the function graphs have to
12
+ # be exactly the same in both programs to be recognized.
13
+ # You can still force a comparaison between two functions, but the results will be bad.
14
+ #
15
+ # This file can be run as a standalone application (eg 'ruby bindiff file1 file2')
16
+ # or as a disassembler plugin (see dasm-plugin/bindiff)
17
+
18
+ require 'metasm'
19
+
20
+ module ::Metasm
21
+ class BinDiffWidget < Gui::DrawableWidget
22
+ attr_accessor :status
23
+
24
+ COLORS = { :same => '8f8', :similar => 'cfc', :badarg => 'fcc', :badop => 'faa', :default => '888' }
25
+
26
+ def initialize_widget(d1=nil, d2=nil)
27
+ self.dasm1 = d1 if d1
28
+ self.dasm2 = d2 if d2
29
+ @status = nil
30
+ end
31
+
32
+ def dasm1; @dasm1 end
33
+ def dasm1=(d)
34
+ @dasm1 = d
35
+ @func1 = nil
36
+ @funcstat1 = nil
37
+ @dasmcol1 = {}
38
+ @dasm1.gui.bg_color_callback = lambda { |a1| COLORS[@dasmcol1[a1] || :default] }
39
+ @match_func = nil
40
+ end
41
+
42
+ def dasm2; @dasm2 end
43
+ def dasm2=(d)
44
+ @dasm2 = d
45
+ @func2 = nil
46
+ @funcstat1 = nil
47
+ @dasmcol2 = {}
48
+ @dasm2.gui.bg_color_callback = lambda { |a2| COLORS[@dasmcol2[a2] || :default] }
49
+ @match_func = nil
50
+ end
51
+
52
+ def curaddr1; @dasm1.gui.curaddr end
53
+ def curaddr2; @dasm2.gui.curaddr end
54
+ def curfunc1; @dasm1.find_function_start(curaddr1) end
55
+ def curfunc2; @dasm2.find_function_start(curaddr2) end
56
+ def func1; @func1 ||= set_status('funcs 1') { create_funcs(@dasm1) } end
57
+ def func2; @func2 ||= set_status('funcs 2') { create_funcs(@dasm2) } end
58
+ def funcstat1; @funcstat1 ||= set_status('func stats 1') { create_funcs_stats(func1, @dasm1) } end
59
+ def funcstat2; @funcstat2 ||= set_status('func stats 2') { create_funcs_stats(func2, @dasm2) } end
60
+
61
+ def paint
62
+ draw_string_color(:black, @font_width, 3*@font_height, @status || 'idle')
63
+ end
64
+
65
+ def gui_update
66
+ @dasm1.gui.gui_update rescue nil
67
+ @dasm2.gui.gui_update rescue nil
68
+ redraw
69
+ end
70
+
71
+ def set_status(st=nil)
72
+ ost = @status
73
+ @status = st
74
+ redraw
75
+ if block_given?
76
+ ret = protect { yield }
77
+ set_status ost
78
+ ret
79
+ end
80
+ end
81
+
82
+ def keypress(key)
83
+ case key
84
+ when ?A
85
+ keypress(?D)
86
+ keypress(?f)
87
+ keypress(?i)
88
+ when ?D
89
+ disassemble_all
90
+ when ?c
91
+ disassemble
92
+ when ?C
93
+ disassemble(:disassemble_fast)
94
+ when ?f
95
+ funcstat1
96
+ funcstat2
97
+ when ?g
98
+ inputbox('address to go', :text => Expression[@dasm1.gui.curaddr]) { |v|
99
+ @dasm1.gui.focus_addr_autocomplete(v)
100
+ @dasm2.gui.focus_addr_autocomplete(v)
101
+ }
102
+ when ?M
103
+ show_match_funcs
104
+ when ?m
105
+ match_one_func(curfunc1, curfunc2)
106
+
107
+ when ?r
108
+ puts 'reload'
109
+ load __FILE__
110
+ gui_update
111
+
112
+ when ?Q
113
+ Gui.main_quit
114
+ end
115
+ end
116
+
117
+ def keypress_ctrl(key)
118
+ case key
119
+ when ?C
120
+ disassemble(:disassemble_fast_deep)
121
+ when ?r
122
+ inputbox('code to eval') { |c| messagebox eval(c).inspect[0, 512], 'eval' }
123
+ end
124
+ end
125
+
126
+ def disassemble_all
127
+ @func1 = @func2 = @funcstat1 = @funcstat2 = nil
128
+ @dasm1.load_plugin 'dasm_all'
129
+ @dasm2.load_plugin 'dasm_all'
130
+ set_status('dasm_all 1') { @dasm1.dasm_all_section '.text' }
131
+ set_status('dasm_all 2') { @dasm2.dasm_all_section '.text' }
132
+ gui_update
133
+ end
134
+
135
+ def disassemble(method=:disassemble)
136
+ @func1 = @func2 = @funcstat1 = @funcstat2 = nil
137
+ set_status('dasm 1') {
138
+ @dasm1.send(method, curaddr1)
139
+ @dasm1.gui.focus_addr(curaddr1, :graph)
140
+ }
141
+ set_status('dasm 2') {
142
+ @dasm2.send(method, curaddr2)
143
+ @dasm2.gui.focus_addr(curaddr2, :graph)
144
+ }
145
+ gui_update
146
+ end
147
+
148
+
149
+ def show_match_funcs
150
+ match_funcs
151
+
152
+ gui_update
153
+ Gui.main_iter
154
+ list = [['addr 1', 'addr 2', 'score']]
155
+ f1 = func1.keys
156
+ f2 = func2.keys
157
+ match_funcs.each { |a1, (a2, s)|
158
+ list << [(@dasm1.get_label_at(a1) || Expression[a1]), (@dasm2.get_label_at(a2) || Expression[a2]), '%.4f' % s]
159
+ f1.delete a1
160
+ f2.delete a2
161
+ }
162
+ f1.each { |a1| list << [(@dasm1.get_label_at(a1) || Expression[a1]), '?', 'nomatch'] }
163
+ f2.each { |a2| list << ['?', (@dasm2.get_label_at(a2) || Expression[a2]), 'nomatch'] }
164
+ listwindow("matches", list) { |i| @dasm1.gui.focus_addr i[0], nil, true ; @dasm2.gui.focus_addr i[1], nil, true }
165
+ end
166
+
167
+ # func addr => { funcblock => list of funcblock to }
168
+ def create_funcs(dasm)
169
+ f = {}
170
+ dasm.entrypoints.to_a.each { |ep| dasm.function[ep] ||= DecodedFunction.new }
171
+ dasm.function.each_key { |a|
172
+ next if not dasm.di_at(a)
173
+ f[a] = create_func(dasm, a)
174
+ Gui.main_iter
175
+ }
176
+ f
177
+ end
178
+
179
+ def create_func(dasm, a)
180
+ h = {}
181
+ todo = [a]
182
+ while a = todo.pop
183
+ next if h[a]
184
+ h[a] = []
185
+ dasm.decoded[a].block.each_to_samefunc(dasm) { |ta|
186
+ next if not dasm.di_at(ta)
187
+ todo << ta
188
+ h[a] << ta
189
+ }
190
+ end
191
+ h
192
+ end
193
+
194
+ def create_funcs_stats(f, dasm)
195
+ fs = {}
196
+ f.each { |a, g|
197
+ fs[a] = create_func_stats(dasm, a, g)
198
+ Gui.main_iter
199
+ }
200
+ fs
201
+ end
202
+
203
+ def create_func_stats(dasm, a, g)
204
+ s = {}
205
+ s[:blocks] = g.length
206
+
207
+ s[:edges] = 0 # nr of edges
208
+ s[:leaves] = 0 # nr of nodes with no successor
209
+ s[:ext_calls] = 0 # nr of jumps out_of_func
210
+ s[:loops] = 0 # nr of jump back
211
+
212
+ todo = [a]
213
+ done = []
214
+ while aa = todo.pop
215
+ next if done.include? aa
216
+ done << aa
217
+ todo.concat g[aa]
218
+
219
+ s[:edges] += g[aa].length
220
+ s[:leaves] += 1 if g[aa].empty?
221
+ dasm.decoded[aa].block.each_to_otherfunc(dasm) { s[:ext_calls] += 1 }
222
+ end
223
+
224
+ # loop detection
225
+ # find the longest distance to the root w/o loops
226
+ g = g.dup
227
+ while eliminate_one_loop(a, g)
228
+ s[:loops] += 1
229
+ end
230
+
231
+ s
232
+ end
233
+
234
+ def eliminate_one_loop(a, g)
235
+ stack = []
236
+ index = {}
237
+ reach_index = {}
238
+ done = false
239
+
240
+ curindex = 0
241
+
242
+ trajan = lambda { |e|
243
+ index[e] = curindex
244
+ reach_index[e] = curindex
245
+ curindex += 1
246
+ stack << e
247
+ g[e].each { |ne|
248
+ if not index[ne]
249
+ trajan[ne]
250
+ break if done
251
+ reach_index[e] = [reach_index[e], reach_index[ne]].min
252
+ elsif stack.include? ne
253
+ reach_index[e] = [reach_index[e], reach_index[ne]].min
254
+ end
255
+ }
256
+ break if done
257
+ if index[e] == reach_index[e]
258
+ if (e == stack.last and not g[e].include? e)
259
+ stack.pop
260
+ next
261
+ end
262
+ # e is the entry in the loop, cut the loop here
263
+ tail = reach_index.keys.find { |ee| reach_index[ee] == index[e] and g[ee].include? e }
264
+ g[tail] -= [e] # patch g, but don't modify the original g value (ie -= instead of delete)
265
+ done = true # one loop found & removed, try again
266
+ end
267
+ }
268
+
269
+ trajan[a]
270
+ done
271
+ end
272
+
273
+ def rematch_funcs
274
+ @match_funcs = nil
275
+ match_funcs
276
+ end
277
+
278
+ def match_funcs
279
+ @match_funcs ||= {}
280
+
281
+ layout_match = {}
282
+
283
+ set_status('match func layout') {
284
+ funcstat1.each { |a, s|
285
+ next if @match_funcs[a]
286
+ layout_match[a] = []
287
+ funcstat2.each { |aa, ss|
288
+ layout_match[a] << aa if s == ss
289
+ }
290
+ Gui.main_iter
291
+ }
292
+ }
293
+
294
+ set_status('match funcs') {
295
+ # refine the layout matching with actual function matching
296
+ already_matched = []
297
+ match_score = {}
298
+ layout_match.each { |f1, list|
299
+ puts "matching #{Expression[f1]}" if $VERBOSE
300
+ begin
301
+ f2 = (list - already_matched).sort_by { |f| match_func(f1, f, false, false) }.first
302
+ if f2
303
+ already_matched << f2
304
+ score = match_func(f1, f2)
305
+ @match_funcs[f1] = [f2, score]
306
+ end
307
+ rescue Interrupt
308
+ puts 'abort this one'
309
+ sleep 0.2 # allow a 2nd ^c do escalate
310
+ end
311
+ Gui.main_iter
312
+ }
313
+ }
314
+
315
+ puts "matched #{@match_funcs.length} - unmatched #{func1.length - @match_funcs.length}"
316
+ @match_funcs
317
+ end
318
+
319
+ def match_one_func(a1, a2)
320
+ s = match_func(a1, a2)
321
+ puts "match score: #{s}"
322
+ @match_funcs ||= {}
323
+ @match_funcs[a1] = [a2, s]
324
+ gui_update
325
+ end
326
+
327
+ # return how much match a func in d1 and a func in d2
328
+ def match_func(a1, a2, do_colorize=true, verb=true)
329
+ f1 = func1[a1]
330
+ f2 = func2[a2]
331
+ raise "dasm1 has no function at #{Expression[a1]}" if not f1
332
+ raise "dasm2 has no function at #{Expression[a2]}" if not f2
333
+ todo1 = [a1]
334
+ todo2 = [a2]
335
+ done1 = []
336
+ done2 = []
337
+ score = 0.0 # average of the (local best) match_block scores
338
+ score += 0.01 if @dasm1.get_label_at(a1) != @dasm2.get_label_at(a2) # for thunks
339
+ score_div = [f1.length, f2.length].max.to_f
340
+ # XXX this is stupid and only good for perfect matches (and even then it may fail)
341
+ # TODO handle block split etc (eg instr-level diff VS block-level)
342
+ while a1 = todo1.shift
343
+ next if done1.include? a1
344
+ t = todo2.map { |a| [a, match_block(@dasm1.decoded[a1].block, @dasm2.decoded[a].block)] }
345
+ a2 = t.sort_by { |a, s| s }.first
346
+ if not a2
347
+ break
348
+ end
349
+ score += a2[1] / score_div
350
+ a2 = a2[0]
351
+ done1 << a1
352
+ done2 << a2
353
+ todo1.concat f1[a1]
354
+ todo2.concat f2[a2]
355
+ todo2 -= done2
356
+ colorize_blocks(a1, a2) if do_colorize
357
+ end
358
+
359
+ score += (f1.length - f2.length).abs * 3 / score_div # block count difference -> +3 per block
360
+
361
+ score
362
+ end
363
+
364
+ def match_block(b1, b2)
365
+ # 0 = perfect match (same opcodes, same args)
366
+ # 1 = same opcodes, same arg type
367
+ # 2 = same opcodes, diff argtypes
368
+ # 3 = some opcode difference
369
+ # 4 = full block difference
370
+ score = 0
371
+ score_div = [b1.list.length, b2.list.length].max.to_f
372
+ common_start = 0
373
+ common_end = 0
374
+
375
+ # basic diff-style: compare start while it's good, then end, then whats left
376
+ # should handle most simples cases well
377
+ len = [b1.list.length, b2.list.length].min
378
+ while common_start < len and (s = match_instr(b1.list[common_start], b2.list[common_start])) <= 1
379
+ score += s / score_div
380
+ common_start += 1
381
+ end
382
+
383
+ while common_start+common_end < len and (s = match_instr(b1.list[-1-common_end], b2.list[-1-common_end])) <= 1
384
+ score += s / score_div
385
+ common_end += 1
386
+ end
387
+
388
+ # TODO improve the middle part matching (allow insertions/suppressions/swapping)
389
+ b1.list[common_start..-1-common_end].zip(b2.list[common_start..-1-common_end]).each { |di1, di2|
390
+ score += match_instr(di1, di2) / score_div
391
+ }
392
+
393
+ yield(common_start, common_end) if block_given? # used by colorize_blocks
394
+
395
+ score += (b1.list.length - b2.list.length).abs * 3 / score_div # instr count difference -> +3 per instr
396
+
397
+ score
398
+ end
399
+
400
+ def colorize_blocks(a1, a2)
401
+ b1 = @dasm1.decoded[a1].block
402
+ b2 = @dasm2.decoded[a2].block
403
+
404
+ common_start = common_end = 0
405
+ match_block(b1, b2) { |a, b| common_start = a ; common_end = b }
406
+
407
+ b1.list[0..-1-common_end].zip(b2.list[0..-1-common_end]).each { |di1, di2|
408
+ next if not di1 or not di2
409
+ @dasmcol1[di1.address] = @dasmcol2[di2.address] = [:same, :similar, :badarg, :badop][match_instr(di1, di2)]
410
+ }
411
+ b1.list[-common_end..-1].zip(b2.list[-common_end..-1]).each { |di1, di2|
412
+ next if not di1 or not di2
413
+ @dasmcol1[di1.address] = @dasmcol2[di2.address] = [:same, :similar, :badarg, :badop][match_instr(di1, di2)]
414
+ }
415
+ end
416
+
417
+ def match_instr(di1, di2)
418
+ if not di1 or not di2 or di1.opcode.name != di2.opcode.name
419
+ 3
420
+ elsif di1.instruction.args.map { |a| a.class } != di2.instruction.args.map { |a| a.class }
421
+ 2
422
+ elsif di1.instruction.to_s.gsub(/loc_\w+/, 'loc_') != di2.instruction.to_s.gsub(/loc_\w+/, 'loc_') # local labels TODO compare blocks targeted
423
+ 1
424
+ else
425
+ 0
426
+ end
427
+ end
428
+
429
+ # show in window 1 the match of the function found in win 2
430
+ def sync1
431
+ c2 = curfunc2
432
+ if a1 = match_funcs.find { |k, (a2, s)| a2 == c2 }
433
+ @dasm1.gui.focus_addr(a1[0])
434
+ end
435
+ end
436
+
437
+ def sync2
438
+ if a2 = match_funcs[curfunc1]
439
+ @dasm2.gui.focus_addr(a2[0])
440
+ end
441
+ end
442
+ end
443
+
444
+ class BinDiffWindow < Gui::Window
445
+ def initialize_window(d1=nil, d2=nil)
446
+ self.widget = BinDiffWidget.new(d1, d2)
447
+ end
448
+
449
+ def build_menu
450
+ menu = new_menu
451
+ addsubmenu(menu, 'load file 1') { openfile('file 1') { |f| loadfile1(f) } }
452
+ addsubmenu(menu, 'load file 2') { openfile('file 2') { |f| loadfile2(f) } }
453
+ addsubmenu(menu)
454
+ addsubmenu(menu, '_disassemble from there', '^C') { widget.disassemble(:disassemble_fast_deep) }
455
+ addsubmenu(menu, 'co_mpare current functions', 'm') { widget.match_one_func(widget.curfunc1, widget.curfunc2) }
456
+ addsubmenu(menu, 'compare all funct_ions', 'M') { widget.show_match_funcs }
457
+ addsubmenu(menu, '_goto', 'g') { widget.keypress ?g }
458
+ addsubmenu(menu)
459
+ addsubmenu(menu, 'sync win 2', '2') { widget.sync2 }
460
+ addsubmenu(menu, 'sync win 1', '1') { widget.sync1 }
461
+ addsubmenu(menu)
462
+ addsubmenu(menu, '_quit', 'Q') { Gui.main_quit }
463
+
464
+ addsubmenu(@menu, '_File', menu)
465
+ end
466
+
467
+ def loadfile1(f)
468
+ exe = AutoExe.orshellcode { Ia32.new }.decode_file(f)
469
+ d = exe.init_disassembler
470
+ Gui::DasmWindow.new("bindiff - 1 - #{f}").display(d)
471
+ widget.dasm1 = d
472
+ end
473
+
474
+ def loadfile2(f)
475
+ exe = AutoExe.orshellcode { Ia32.new }.decode_file(f)
476
+ d = exe.init_disassembler
477
+ Gui::DasmWindow.new("bindiff - 2 - #{f}").display(d)
478
+ widget.dasm2 = d
479
+ end
480
+ end
481
+ end
482
+
483
+ if $0 == __FILE__ and not defined? $bindiff_loaded
484
+ # allow reloading the file for easier diff algorithm test
485
+ $bindiff_loaded = true
486
+
487
+ require 'optparse'
488
+
489
+ $VERBOSE = true
490
+
491
+ # parse arguments
492
+ opts = {}
493
+ OptionParser.new { |opt|
494
+ opt.banner = 'Usage: bindiff.rb [options] <executable> [<entrypoints>]'
495
+ opt.on('-P <plugin>', '--plugin <plugin>', 'load a metasm disassembler plugin') { |h| (opts[:plugin] ||= []) << h }
496
+ opt.on('-e <code>', '--eval <code>', 'eval a ruby code') { |h| (opts[:hookstr] ||= []) << h }
497
+ opt.on('--map1 <mapfile>', 'load a map file (addr <-> name association)') { |f| opts[:map1] = f }
498
+ opt.on('--map2 <mapfile>', 'load a map file (addr <-> name association)') { |f| opts[:map2] = f }
499
+ opt.on('-c <header>', '--c-header <header>', 'read C function prototypes (for external library functions)') { |h| opts[:cheader] = h }
500
+ opt.on('-a', '--autoload', 'loads all relevant files with same filename (.h, .map..)') { opts[:autoload] = true }
501
+ opt.on('-v', '--verbose') { $VERBOSE = true } # default
502
+ opt.on('-q', '--no-verbose') { $VERBOSE = false }
503
+ opt.on('-d', '--debug') { $DEBUG = $VERBOSE = true }
504
+ opt.on('-A', 'match everything on start') { opts[:doit] = true }
505
+ }.parse!(ARGV)
506
+
507
+ if exename1 = ARGV.shift
508
+ w1 = Metasm::Gui::DasmWindow.new("#{exename1} - bindiff1 - metasm disassembler")
509
+ exe1 = w1.loadfile(exename1)
510
+ if opts[:autoload]
511
+ basename1 = exename1.sub(/\.\w\w?\w?$/, '')
512
+ opts[:map1] ||= basename1 + '.map' if File.exist?(basename1 + '.map')
513
+ opts[:cheader] ||= basename1 + '.h' if File.exist?(basename1 + '.h')
514
+ end
515
+ end
516
+
517
+ if exename2 = ARGV.shift
518
+ w2 = Metasm::Gui::DasmWindow.new("#{exename2} - bindiff2 - metasm disassembler")
519
+ exe2 = w2.loadfile(exename2)
520
+ if opts[:autoload]
521
+ basename2 = exename2.sub(/\.\w\w?\w?$/, '')
522
+ opts[:map2] ||= basename2 + '.map' if File.exist?(basename2 + '.map')
523
+ opts[:cheader] ||= basename2 + '.h' if File.exist?(basename2 + '.h')
524
+ end
525
+ end
526
+
527
+ if exe1
528
+ dasm1 = exe1.init_disassembler
529
+ dasm1.load_map opts[:map1] if opts[:map1]
530
+ dasm1.parse_c_file opts[:cheader] if opts[:cheader]
531
+ end
532
+
533
+ if exe2
534
+ dasm2 = exe2.init_disassembler
535
+ dasm2.load_map opts[:map2] if opts[:map2]
536
+ dasm2.parse_c_file opts[:cheader] if opts[:cheader]
537
+ end
538
+
539
+ ep = ARGV.dup
540
+
541
+ w1.dasm_widget.focus_addr ep.first if w1 and not ep.empty?
542
+ w2.dasm_widget.focus_addr ep.first if w2 and not ep.empty?
543
+
544
+ opts[:plugin].to_a.each { |p| dasm1.load_plugin(p) if dasm1 ; dasm2.load_plugin(p) if dasm2 }
545
+ opts[:hookstr].to_a.each { |f| eval f }
546
+
547
+ ep.each { |e| dasm1.disassemble_fast_deep(e) if dasm1 ; dasm2.disassemble_fast_deep(e) if dasm2 }
548
+
549
+ bd = Metasm::BinDiffWindow.new(dasm1, dasm2)
550
+
551
+ bd.widget.keypress ?A if opts[:doit]
552
+
553
+ Metasm::Gui.main
554
+
555
+ end