metasm 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.hgtags +3 -0
- data/Gemfile +1 -0
- data/INSTALL +61 -0
- data/LICENCE +458 -0
- data/README +29 -21
- data/Rakefile +10 -0
- data/TODO +10 -12
- data/doc/code_organisation.txt +2 -0
- data/doc/core/DynLdr.txt +247 -0
- data/doc/core/ExeFormat.txt +43 -0
- data/doc/core/Expression.txt +220 -0
- data/doc/core/GNUExports.txt +27 -0
- data/doc/core/Ia32.txt +236 -0
- data/doc/core/SerialStruct.txt +108 -0
- data/doc/core/VirtualString.txt +145 -0
- data/doc/core/WindowsExports.txt +61 -0
- data/doc/core/index.txt +1 -0
- data/doc/style.css +6 -3
- data/doc/usage/debugger.txt +327 -0
- data/doc/usage/index.txt +1 -0
- data/doc/use_cases.txt +2 -2
- data/metasm.gemspec +22 -0
- data/{lib/metasm.rb → metasm.rb} +11 -3
- data/{lib/metasm → metasm}/compile_c.rb +13 -7
- data/metasm/cpu/arc.rb +8 -0
- data/metasm/cpu/arc/decode.rb +425 -0
- data/metasm/cpu/arc/main.rb +191 -0
- data/metasm/cpu/arc/opcodes.rb +588 -0
- data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
- data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
- data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
- data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
- data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
- data/metasm/cpu/arm/opcodes.rb +324 -0
- data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
- data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
- data/metasm/cpu/arm64.rb +15 -0
- data/metasm/cpu/arm64/debug.rb +38 -0
- data/metasm/cpu/arm64/decode.rb +289 -0
- data/metasm/cpu/arm64/encode.rb +41 -0
- data/metasm/cpu/arm64/main.rb +105 -0
- data/metasm/cpu/arm64/opcodes.rb +232 -0
- data/metasm/cpu/arm64/parse.rb +20 -0
- data/metasm/cpu/arm64/render.rb +95 -0
- data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
- data/metasm/cpu/bpf/decode.rb +142 -0
- data/metasm/cpu/bpf/main.rb +60 -0
- data/metasm/cpu/bpf/opcodes.rb +81 -0
- data/metasm/cpu/bpf/render.rb +41 -0
- data/metasm/cpu/cy16.rb +9 -0
- data/metasm/cpu/cy16/decode.rb +253 -0
- data/metasm/cpu/cy16/main.rb +63 -0
- data/metasm/cpu/cy16/opcodes.rb +78 -0
- data/metasm/cpu/cy16/render.rb +41 -0
- data/metasm/cpu/dalvik.rb +11 -0
- data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
- data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
- data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
- data/metasm/cpu/ia32.rb +17 -0
- data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
- data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
- data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
- data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
- data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
- data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
- data/metasm/cpu/ia32/opcodes.rb +1424 -0
- data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
- data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
- data/metasm/cpu/mips.rb +14 -0
- data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
- data/metasm/cpu/mips/debug.rb +42 -0
- data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
- data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
- data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
- data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
- data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
- data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
- data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
- data/metasm/cpu/msp430/decode.rb +247 -0
- data/metasm/cpu/msp430/main.rb +62 -0
- data/metasm/cpu/msp430/opcodes.rb +101 -0
- data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
- data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
- data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
- data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
- data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
- data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
- data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
- data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
- data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
- data/metasm/cpu/ppc/parse.rb +55 -0
- data/metasm/cpu/python.rb +8 -0
- data/metasm/cpu/python/decode.rb +136 -0
- data/metasm/cpu/python/main.rb +36 -0
- data/metasm/cpu/python/opcodes.rb +180 -0
- data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
- data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
- data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
- data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
- data/metasm/cpu/x86_64.rb +15 -0
- data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
- data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
- data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
- data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
- data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
- data/metasm/cpu/x86_64/opcodes.rb +136 -0
- data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
- data/metasm/cpu/x86_64/render.rb +35 -0
- data/metasm/cpu/z80.rb +9 -0
- data/metasm/cpu/z80/decode.rb +313 -0
- data/metasm/cpu/z80/main.rb +67 -0
- data/metasm/cpu/z80/opcodes.rb +224 -0
- data/metasm/cpu/z80/render.rb +59 -0
- data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
- data/{lib/metasm → metasm}/decode.rb +35 -4
- data/{lib/metasm → metasm}/decompile.rb +15 -16
- data/{lib/metasm → metasm}/disassemble.rb +201 -45
- data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
- data/{lib/metasm → metasm}/dynldr.rb +220 -133
- data/{lib/metasm → metasm}/encode.rb +10 -1
- data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
- data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
- data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
- data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
- data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
- data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
- data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
- data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
- data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
- data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
- data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
- data/metasm/exe_format/gb.rb +65 -0
- data/metasm/exe_format/javaclass.rb +424 -0
- data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
- data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
- data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
- data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
- data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
- data/metasm/exe_format/pyc.rb +167 -0
- data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
- data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
- data/metasm/exe_format/shellcode_rwx.rb +114 -0
- data/metasm/exe_format/swf.rb +205 -0
- data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
- data/metasm/exe_format/zip.rb +335 -0
- data/metasm/gui.rb +13 -0
- data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
- data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
- data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
- data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
- data/metasm/gui/dasm_graph.rb +1695 -0
- data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
- data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
- data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
- data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
- data/{lib/metasm → metasm}/gui/debug.rb +93 -27
- data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
- data/{lib/metasm → metasm}/gui/qt.rb +12 -2
- data/{lib/metasm → metasm}/gui/win32.rb +179 -42
- data/{lib/metasm → metasm}/gui/x11.rb +59 -59
- data/{lib/metasm → metasm}/main.rb +389 -264
- data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
- data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
- data/{lib/metasm → metasm}/os/linux.rb +628 -151
- data/metasm/os/main.rb +330 -0
- data/{lib/metasm → metasm}/os/windows.rb +132 -42
- data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
- data/{lib/metasm → metasm}/parse.rb +26 -24
- data/{lib/metasm → metasm}/parse_c.rb +221 -116
- data/{lib/metasm → metasm}/preprocessor.rb +55 -40
- data/{lib/metasm → metasm}/render.rb +14 -38
- data/misc/hexdump.rb +2 -1
- data/misc/lint.rb +58 -0
- data/misc/txt2html.rb +9 -7
- data/samples/bindiff.rb +3 -4
- data/samples/dasm-plugins/bindiff.rb +15 -0
- data/samples/dasm-plugins/bookmark.rb +133 -0
- data/samples/dasm-plugins/c_constants.rb +57 -0
- data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
- data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
- data/samples/dasm-plugins/dasm_all.rb +70 -0
- data/samples/dasm-plugins/demangle_cpp.rb +31 -0
- data/samples/dasm-plugins/deobfuscate.rb +251 -0
- data/samples/dasm-plugins/dump_text.rb +35 -0
- data/samples/dasm-plugins/export_graph_svg.rb +86 -0
- data/samples/dasm-plugins/findgadget.rb +75 -0
- data/samples/dasm-plugins/hl_opcode.rb +32 -0
- data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
- data/samples/dasm-plugins/imm2off.rb +34 -0
- data/samples/dasm-plugins/match_libsigs.rb +93 -0
- data/samples/dasm-plugins/patch_file.rb +95 -0
- data/samples/dasm-plugins/scanfuncstart.rb +36 -0
- data/samples/dasm-plugins/scanxrefs.rb +26 -0
- data/samples/dasm-plugins/selfmodify.rb +197 -0
- data/samples/dasm-plugins/stringsxrefs.rb +28 -0
- data/samples/dasmnavig.rb +1 -1
- data/samples/dbg-apihook.rb +24 -9
- data/samples/dbg-plugins/heapscan.rb +283 -0
- data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
- data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
- data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
- data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
- data/samples/dbg-plugins/heapscan/winheap.h +174 -0
- data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
- data/samples/dbg-plugins/trace_func.rb +214 -0
- data/samples/disassemble-gui.rb +35 -5
- data/samples/disassemble.rb +31 -6
- data/samples/dump_upx.rb +24 -12
- data/samples/dynamic_ruby.rb +12 -3
- data/samples/exeencode.rb +6 -5
- data/samples/factorize-headers-peimports.rb +1 -1
- data/samples/lindebug.rb +175 -381
- data/samples/metasm-shell.rb +1 -2
- data/samples/peldr.rb +2 -2
- data/tests/all.rb +1 -1
- data/tests/arc.rb +26 -0
- data/tests/dynldr.rb +22 -4
- data/tests/expression.rb +55 -0
- data/tests/graph_layout.rb +285 -0
- data/tests/ia32.rb +79 -26
- data/tests/mips.rb +9 -2
- data/tests/x86_64.rb +66 -18
- metadata +330 -218
- data/lib/metasm/arm/opcodes.rb +0 -177
- data/lib/metasm/gui.rb +0 -23
- data/lib/metasm/gui/dasm_graph.rb +0 -1354
- data/lib/metasm/ia32.rb +0 -14
- data/lib/metasm/ia32/opcodes.rb +0 -873
- data/lib/metasm/ppc/parse.rb +0 -52
- data/lib/metasm/x86_64.rb +0 -12
- data/lib/metasm/x86_64/opcodes.rb +0 -118
- data/samples/gdbclient.rb +0 -583
- data/samples/rubstop.rb +0 -399
|
@@ -134,9 +134,10 @@ class EncodedData
|
|
|
134
134
|
# bytes from rawsize to virtsize are returned as zeroes
|
|
135
135
|
# ignores self.relocations
|
|
136
136
|
def read(len=@virtsize-@ptr)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
str =
|
|
137
|
+
vlen = len
|
|
138
|
+
vlen = @virtsize-@ptr if len > @virtsize-@ptr
|
|
139
|
+
str = (@ptr < @data.length) ? @data[@ptr, vlen] : ''
|
|
140
|
+
str = str.to_str.ljust(vlen, "\0") if str.length < vlen
|
|
140
141
|
@ptr += len
|
|
141
142
|
str
|
|
142
143
|
end
|
|
@@ -182,7 +183,7 @@ class CPU
|
|
|
182
183
|
# returns a DecodedInstruction or nil
|
|
183
184
|
def decode_instruction(edata, addr)
|
|
184
185
|
@bin_lookaside ||= build_bin_lookaside
|
|
185
|
-
di = decode_findopcode edata
|
|
186
|
+
di = decode_findopcode edata if edata.ptr <= edata.length
|
|
186
187
|
di.address = addr if di
|
|
187
188
|
di = decode_instr_op(edata, di) if di
|
|
188
189
|
decode_instr_interpret(di, addr) if di
|
|
@@ -209,5 +210,35 @@ class CPU
|
|
|
209
210
|
def delay_slot(di=nil)
|
|
210
211
|
0
|
|
211
212
|
end
|
|
213
|
+
|
|
214
|
+
def disassembler_default_func
|
|
215
|
+
DecodedFunction.new
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# return something like backtrace_binding in the forward direction
|
|
219
|
+
# set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
|
|
220
|
+
def get_fwdemu_binding(di, pc_reg=nil)
|
|
221
|
+
fdi = di.backtrace_binding ||= get_backtrace_binding(di)
|
|
222
|
+
fdi = fix_fwdemu_binding(di, fdi)
|
|
223
|
+
if pc_reg
|
|
224
|
+
if di.opcode.props[:setip]
|
|
225
|
+
xr = get_xrefs_x(nil, di)
|
|
226
|
+
if xr and xr.length == 1
|
|
227
|
+
fdi[pc_reg] = xr[0]
|
|
228
|
+
else
|
|
229
|
+
fdi[:incomplete_binding] = Expression[1]
|
|
230
|
+
end
|
|
231
|
+
else
|
|
232
|
+
fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length]
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
fdi
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# patch a forward binding from the backtrace binding
|
|
239
|
+
# useful only on specific instructions that update a register *and* dereference that register (eg push)
|
|
240
|
+
def fix_fwdemu_binding(di, fbd)
|
|
241
|
+
fbd
|
|
242
|
+
end
|
|
212
243
|
end
|
|
213
244
|
end
|
|
@@ -69,7 +69,7 @@ class Decompiler
|
|
|
69
69
|
@c_parser.toplevel.symbol.delete func.name
|
|
70
70
|
decompile_func(entry)
|
|
71
71
|
@recurse = pre_recurse
|
|
72
|
-
if not
|
|
72
|
+
if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
|
|
73
73
|
@c_parser.toplevel.statements << C::Declaration.new(func)
|
|
74
74
|
end
|
|
75
75
|
end
|
|
@@ -208,7 +208,7 @@ class Decompiler
|
|
|
208
208
|
@c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name }
|
|
209
209
|
aoff = 1
|
|
210
210
|
ptype.args.to_a.each { |a|
|
|
211
|
-
|
|
211
|
+
aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
|
|
212
212
|
f.decompdata[:stackoff_type][aoff] ||= a.type
|
|
213
213
|
f.decompdata[:stackoff_name][aoff] ||= a.name if a.name
|
|
214
214
|
aoff += sizeof(a) # ary ?
|
|
@@ -293,7 +293,7 @@ class Decompiler
|
|
|
293
293
|
@dasm.function[ta] = DecodedFunction.new
|
|
294
294
|
puts "autofunc #{Expression[ta]}" if $VERBOSE
|
|
295
295
|
end
|
|
296
|
-
|
|
296
|
+
|
|
297
297
|
if @dasm.function[ta] and type != :subfuncret
|
|
298
298
|
f = dasm.auto_label_at(ta, 'func')
|
|
299
299
|
ta = dasm.normalize($1) if f =~ /^thunk_(.*)/
|
|
@@ -350,7 +350,7 @@ class Decompiler
|
|
|
350
350
|
:include_start => i_s, :no_check => true, :terminals => [:frameptr])
|
|
351
351
|
if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or
|
|
352
352
|
(ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)))
|
|
353
|
-
|
|
353
|
+
ee
|
|
354
354
|
else e
|
|
355
355
|
end
|
|
356
356
|
end
|
|
@@ -602,12 +602,12 @@ class Decompiler
|
|
|
602
602
|
when C::If
|
|
603
603
|
patch_test[ce.test]
|
|
604
604
|
if ce.bthen.kind_of? C::Block
|
|
605
|
-
|
|
605
|
+
case ce.bthen.statements.length
|
|
606
606
|
when 1
|
|
607
607
|
walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen }
|
|
608
608
|
ce.bthen = ce.bthen.statements.first
|
|
609
609
|
when 0
|
|
610
|
-
|
|
610
|
+
if not ce.belse and i = ce.bthen.outer.statements.index(ce)
|
|
611
611
|
ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts
|
|
612
612
|
end
|
|
613
613
|
end
|
|
@@ -1521,7 +1521,7 @@ class Decompiler
|
|
|
1521
1521
|
tabidx = off / sizeof(st)
|
|
1522
1522
|
off -= tabidx * sizeof(st)
|
|
1523
1523
|
ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array
|
|
1524
|
-
return ptr if off == 0 and (not msz or
|
|
1524
|
+
return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list
|
|
1525
1525
|
(ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and
|
|
1526
1526
|
not s.type.untypedef.kind_of? C::Union))
|
|
1527
1527
|
|
|
@@ -1656,13 +1656,12 @@ class Decompiler
|
|
|
1656
1656
|
ce.rexpr = p if ce.rexpr == v1
|
|
1657
1657
|
}
|
|
1658
1658
|
}
|
|
1659
|
-
|
|
1660
1659
|
}
|
|
1661
1660
|
end
|
|
1662
1661
|
|
|
1663
1662
|
# to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types
|
|
1664
1663
|
# will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..')
|
|
1665
|
-
|
|
1664
|
+
# remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;')
|
|
1666
1665
|
# remove useless casts ('(int)i' with 'int i;' => 'i')
|
|
1667
1666
|
def optimize(scope)
|
|
1668
1667
|
optimize_code(scope)
|
|
@@ -1681,7 +1680,7 @@ class Decompiler
|
|
|
1681
1680
|
t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function
|
|
1682
1681
|
t1 == t2 or
|
|
1683
1682
|
(t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and
|
|
1684
|
-
|
|
1683
|
+
t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
|
|
1685
1684
|
(t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or
|
|
1686
1685
|
(t1.pointer? and t2.pointer? and sametype[t1.type, t2.type])
|
|
1687
1686
|
}
|
|
@@ -1871,7 +1870,7 @@ class Decompiler
|
|
|
1871
1870
|
when ::Array; exp.any? { |_e| sideeffect _e, scope }
|
|
1872
1871
|
when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile
|
|
1873
1872
|
when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or
|
|
1874
|
-
|
|
1873
|
+
sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
|
|
1875
1874
|
else true # failsafe
|
|
1876
1875
|
end
|
|
1877
1876
|
end
|
|
@@ -2009,7 +2008,7 @@ class Decompiler
|
|
|
2009
2008
|
}.compact
|
|
2010
2009
|
|
|
2011
2010
|
tw = to - [:write]
|
|
2012
|
-
|
|
2011
|
+
if to.include? :split or tw.length > 1
|
|
2013
2012
|
:split
|
|
2014
2013
|
elsif tw.length == 1
|
|
2015
2014
|
tw.first
|
|
@@ -2089,7 +2088,7 @@ class Decompiler
|
|
|
2089
2088
|
if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and
|
|
2090
2089
|
scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile
|
|
2091
2090
|
next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and
|
|
2092
|
-
|
|
2091
|
+
!((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression)
|
|
2093
2092
|
next if oe.op == :& and not oe.lexpr # no &(++eax)
|
|
2094
2093
|
|
|
2095
2094
|
# merge pre/postincrement into next/prev var usage
|
|
@@ -2221,7 +2220,7 @@ class Decompiler
|
|
|
2221
2220
|
}
|
|
2222
2221
|
case cnt
|
|
2223
2222
|
when 0
|
|
2224
|
-
|
|
2223
|
+
break if bad
|
|
2225
2224
|
next
|
|
2226
2225
|
when 1 # good
|
|
2227
2226
|
break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable
|
|
@@ -2443,7 +2442,7 @@ class Decompiler
|
|
|
2443
2442
|
end
|
|
2444
2443
|
# compare type.type cause var is an Array and the cast is a Pointer
|
|
2445
2444
|
countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and
|
|
2446
|
-
|
|
2445
|
+
sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
|
|
2447
2446
|
}
|
|
2448
2447
|
vars.each { |n|
|
|
2449
2448
|
if countref[n] == countderef[n]
|
|
@@ -2453,7 +2452,7 @@ class Decompiler
|
|
|
2453
2452
|
v.initializer = v.initializer.first if v.initializer.kind_of? ::Array
|
|
2454
2453
|
walk_ce(tl) { |ce|
|
|
2455
2454
|
if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v]
|
|
2456
|
-
ce.op = :'.'
|
|
2455
|
+
ce.op = :'.'
|
|
2457
2456
|
elsif ce.lexpr == target
|
|
2458
2457
|
ce.lexpr = v
|
|
2459
2458
|
end
|
|
@@ -24,6 +24,8 @@ class DecodedInstruction
|
|
|
24
24
|
attr_accessor :comment
|
|
25
25
|
# a cache of the binding used by the backtracker to emulate this instruction
|
|
26
26
|
attr_accessor :backtrace_binding
|
|
27
|
+
# used during fixed-size instruction decoding to hold the decoded raw opcode
|
|
28
|
+
attr_accessor :raw_data
|
|
27
29
|
|
|
28
30
|
# create a new DecodedInstruction with an Instruction whose cpu is the argument
|
|
29
31
|
# can take an existing Instruction as argument
|
|
@@ -233,6 +235,11 @@ class DecodedFunction
|
|
|
233
235
|
attr_accessor :finalized
|
|
234
236
|
# bool, if true the function does not return (eg exit() or ExitProcess())
|
|
235
237
|
attr_accessor :noreturn
|
|
238
|
+
# hash stackoff => varname
|
|
239
|
+
# varname is a single String object shared by all ExpressionStrings (to allow renames)
|
|
240
|
+
attr_accessor :localvars
|
|
241
|
+
# hash stack offset => di address
|
|
242
|
+
attr_accessor :localvars_xrefs
|
|
236
243
|
|
|
237
244
|
# if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth]
|
|
238
245
|
# else update lazily the binding from expr.externals, and return backtrace_binding
|
|
@@ -264,6 +271,16 @@ class DecodedFunction
|
|
|
264
271
|
@backtracked_for = []
|
|
265
272
|
@backtrace_binding = {}
|
|
266
273
|
end
|
|
274
|
+
|
|
275
|
+
def get_localvar_stackoff(off, di=nil, str=nil)
|
|
276
|
+
if di
|
|
277
|
+
@localvars_xrefs ||= {}
|
|
278
|
+
@localvars_xrefs[off] ||= []
|
|
279
|
+
@localvars_xrefs[off] |= [di.address]
|
|
280
|
+
end
|
|
281
|
+
@localvars ||= {}
|
|
282
|
+
@localvars[off] ||= (str || (off > 0 ? 'arg_%X' % off : 'var_%X' % -off))
|
|
283
|
+
end
|
|
267
284
|
end
|
|
268
285
|
|
|
269
286
|
class CPU
|
|
@@ -438,7 +455,9 @@ class Disassembler
|
|
|
438
455
|
when ::Integer
|
|
439
456
|
when ::String
|
|
440
457
|
raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0
|
|
441
|
-
|
|
458
|
+
if ed = get_edata_at(base)
|
|
459
|
+
ed.del_export(base)
|
|
460
|
+
end
|
|
442
461
|
encoded.add_export base, 0
|
|
443
462
|
else raise "invalid section base #{base.inspect} - expected string or integer"
|
|
444
463
|
end
|
|
@@ -451,7 +470,7 @@ class Disassembler
|
|
|
451
470
|
|
|
452
471
|
# update section_edata.reloc
|
|
453
472
|
# label -> list of relocs that refers to it
|
|
454
|
-
@inv_section_reloc
|
|
473
|
+
@inv_section_reloc ||= {}
|
|
455
474
|
@sections.each { |b, e|
|
|
456
475
|
e.reloc.each { |o, r|
|
|
457
476
|
r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] }
|
|
@@ -485,14 +504,16 @@ class Disassembler
|
|
|
485
504
|
|
|
486
505
|
# add pseudo-xrefs for exe relocs
|
|
487
506
|
if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l]
|
|
507
|
+
x_more = []
|
|
488
508
|
a.each { |b, e, o, r|
|
|
489
509
|
addr = Expression[b]+o
|
|
490
510
|
# ignore relocs embedded in an already-listed instr
|
|
491
|
-
|
|
511
|
+
x_more << Xref.new(:reloc, addr) if not x.find { |x_|
|
|
492
512
|
next if not x_.origin or not di_at(x_.origin)
|
|
493
|
-
(addr - x_.origin
|
|
513
|
+
(addr - x_.origin) < @decoded[x_.origin].bin_length rescue false
|
|
494
514
|
}
|
|
495
515
|
}
|
|
516
|
+
x.concat x_more
|
|
496
517
|
end
|
|
497
518
|
|
|
498
519
|
x.each { |x_| yield x_ }
|
|
@@ -505,9 +526,18 @@ class Disassembler
|
|
|
505
526
|
|
|
506
527
|
# parses a C string for function prototypes
|
|
507
528
|
def parse_c(str, filename=nil, lineno=1)
|
|
529
|
+
@c_parser_constcache = nil
|
|
508
530
|
@c_parser ||= @cpu.new_cparser
|
|
509
531
|
@c_parser.lexer.define_weak('__METASM__DECODE__')
|
|
510
532
|
@c_parser.parse(str, filename, lineno)
|
|
533
|
+
rescue ParseError
|
|
534
|
+
@c_parser.lexer.feed! ''
|
|
535
|
+
raise
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
# list the constants ([name, integer value]) defined in the C code (#define / enums)
|
|
539
|
+
def c_constants
|
|
540
|
+
@c_parser_constcache ||= @c_parser.numeric_constants
|
|
511
541
|
end
|
|
512
542
|
|
|
513
543
|
# returns the canonical form of addr (absolute address integer or label of start of section + section offset)
|
|
@@ -568,6 +598,7 @@ class Disassembler
|
|
|
568
598
|
end
|
|
569
599
|
|
|
570
600
|
# returns a hash associating addr => list of labels at this addr
|
|
601
|
+
# label_alias[a] may be nil if a new label is created elsewhere in the edata with the same name
|
|
571
602
|
def label_alias
|
|
572
603
|
if not @label_alias_cache
|
|
573
604
|
@label_alias_cache = {}
|
|
@@ -622,17 +653,16 @@ class Disassembler
|
|
|
622
653
|
if not f.finalized
|
|
623
654
|
f.finalized = true
|
|
624
655
|
puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
|
|
625
|
-
|
|
656
|
+
backtrace_update_function_binding(addr, f)
|
|
626
657
|
if not f.return_address
|
|
627
658
|
detect_function_thunk(addr)
|
|
628
659
|
end
|
|
629
660
|
end
|
|
630
|
-
@comment[addr] ||= []
|
|
631
661
|
bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown }
|
|
632
662
|
unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact
|
|
633
663
|
bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty?
|
|
634
|
-
|
|
635
|
-
|
|
664
|
+
add_comment(addr, "function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', '))
|
|
665
|
+
add_comment(addr, "function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')) if f.return_address
|
|
636
666
|
}
|
|
637
667
|
end
|
|
638
668
|
|
|
@@ -658,7 +688,7 @@ puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
|
|
|
658
688
|
next if not f = @function[subfunc] or f.finalized
|
|
659
689
|
f.finalized = true
|
|
660
690
|
puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
661
|
-
|
|
691
|
+
backtrace_update_function_binding(subfunc, f)
|
|
662
692
|
if not f.return_address
|
|
663
693
|
detect_function_thunk(subfunc)
|
|
664
694
|
end
|
|
@@ -667,7 +697,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
667
697
|
|
|
668
698
|
if di = @decoded[addr]
|
|
669
699
|
if di.kind_of? DecodedInstruction
|
|
670
|
-
split_block(di.block, di.address) if not di.block_head? # this updates di.block
|
|
700
|
+
split_block(di.block, di.address, true) if not di.block_head? # this updates di.block
|
|
671
701
|
di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
|
|
672
702
|
bf = di.block
|
|
673
703
|
elsif di == true
|
|
@@ -726,20 +756,22 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
726
756
|
end
|
|
727
757
|
|
|
728
758
|
# splits an InstructionBlock, updates the blocks backtracked_for
|
|
729
|
-
def split_block(block, address=nil)
|
|
759
|
+
def split_block(block, address=nil, rebacktrace=false)
|
|
730
760
|
if not address # invoked as split_block(0x401012)
|
|
731
761
|
return if not @decoded[block].kind_of? DecodedInstruction
|
|
732
762
|
block, address = @decoded[block].block, block
|
|
733
763
|
end
|
|
734
764
|
return block if address == block.address
|
|
735
765
|
new_b = block.split address
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
766
|
+
if rebacktrace
|
|
767
|
+
new_b.backtracked_for.dup.each { |btt|
|
|
768
|
+
backtrace(btt.expr, btt.address,
|
|
769
|
+
:only_upto => block.list.last.address,
|
|
770
|
+
:include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
|
|
771
|
+
:origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
|
|
772
|
+
:detached => btt.detached, :maxdepth => btt.maxdepth)
|
|
773
|
+
}
|
|
774
|
+
end
|
|
743
775
|
new_b
|
|
744
776
|
end
|
|
745
777
|
|
|
@@ -763,8 +795,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
763
795
|
each_xref(waddr, :w) { |x|
|
|
764
796
|
#next if off + x.len < 0
|
|
765
797
|
puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE
|
|
766
|
-
|
|
767
|
-
@comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"]
|
|
798
|
+
add_comment(di_addr, "overwritten by #{@decoded[x.origin]}")
|
|
768
799
|
@callback_selfmodifying[di_addr] if callback_selfmodifying
|
|
769
800
|
return
|
|
770
801
|
}
|
|
@@ -775,7 +806,8 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
775
806
|
block.edata.ptr = di_addr - block.address + block.edata_ptr
|
|
776
807
|
if not di = @cpu.decode_instruction(block.edata, di_addr)
|
|
777
808
|
ed = block.edata
|
|
778
|
-
|
|
809
|
+
break if ed.ptr >= ed.length and get_section_at(di_addr) and di = block.list.last
|
|
810
|
+
puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*').first}"} at #{Expression[di_addr]}" if $VERBOSE
|
|
779
811
|
return
|
|
780
812
|
end
|
|
781
813
|
|
|
@@ -783,7 +815,18 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
783
815
|
block.add_di di
|
|
784
816
|
puts di if $DEBUG
|
|
785
817
|
|
|
786
|
-
|
|
818
|
+
if callback_newinstr
|
|
819
|
+
ndi = @callback_newinstr[di]
|
|
820
|
+
if not ndi or not ndi.block
|
|
821
|
+
block.list.delete di
|
|
822
|
+
if ndi
|
|
823
|
+
block.add_di ndi
|
|
824
|
+
ndi.bin_length = di.bin_length if ndi.bin_length == 0
|
|
825
|
+
@decoded[di_addr] = ndi
|
|
826
|
+
end
|
|
827
|
+
end
|
|
828
|
+
di = ndi
|
|
829
|
+
end
|
|
787
830
|
return if not di
|
|
788
831
|
block = di.block
|
|
789
832
|
|
|
@@ -793,7 +836,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
793
836
|
|
|
794
837
|
if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty?
|
|
795
838
|
# do not backtrace until delay slot is finished (eg MIPS: di is a
|
|
796
|
-
|
|
839
|
+
# ret and the delay slot holds stack fixup needed to calc func_binding)
|
|
797
840
|
# XXX if the delay slot is also xref_x or :stopexec it is ignored
|
|
798
841
|
delay_slot ||= [di, @cpu.delay_slot(di)]
|
|
799
842
|
end
|
|
@@ -835,6 +878,8 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
835
878
|
@entrypoints |= entrypoints
|
|
836
879
|
|
|
837
880
|
entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) }
|
|
881
|
+
|
|
882
|
+
@callback_finished[] if callback_finished
|
|
838
883
|
end
|
|
839
884
|
|
|
840
885
|
def do_disassemble_fast_deep(ep)
|
|
@@ -896,8 +941,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
896
941
|
}
|
|
897
942
|
if func
|
|
898
943
|
auto_label_at(addr, 'sub', 'loc', 'xref')
|
|
899
|
-
|
|
900
|
-
@function[addr] = DecodedFunction.new
|
|
944
|
+
@function[addr] = (@function[:default] || DecodedFunction.new).dup
|
|
901
945
|
@function[addr].finalized = true
|
|
902
946
|
detect_function_thunk(addr)
|
|
903
947
|
puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE
|
|
@@ -909,7 +953,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
909
953
|
# does not recurse into subfunctions
|
|
910
954
|
# assumes all :saveip returns, except those pointing to a subfunc with noreturn
|
|
911
955
|
# yields subfunction addresses (targets of :saveip)
|
|
912
|
-
#
|
|
956
|
+
# no backtrace for :x (change with backtrace_maxblocks_fast)
|
|
913
957
|
# returns a todo-style ary
|
|
914
958
|
# assumes @addrs_todo is empty
|
|
915
959
|
def disassemble_fast_block(block, &b)
|
|
@@ -927,6 +971,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
927
971
|
# decode instruction
|
|
928
972
|
block.edata.ptr = di_addr - block.address + block.edata_ptr
|
|
929
973
|
if not di = @cpu.decode_instruction(block.edata, di_addr)
|
|
974
|
+
break if block.edata.ptr >= block.edata.length and get_section_at(di_addr) and di = block.list.last
|
|
930
975
|
return ret
|
|
931
976
|
end
|
|
932
977
|
|
|
@@ -934,7 +979,18 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
934
979
|
block.add_di di
|
|
935
980
|
puts di if $DEBUG
|
|
936
981
|
|
|
937
|
-
|
|
982
|
+
if callback_newinstr
|
|
983
|
+
ndi = @callback_newinstr[di]
|
|
984
|
+
if not ndi or not ndi.block
|
|
985
|
+
block.list.delete di
|
|
986
|
+
if ndi
|
|
987
|
+
block.add_di ndi
|
|
988
|
+
ndi.bin_length = di.bin_length if ndi.bin_length == 0
|
|
989
|
+
@decoded[di_addr] = ndi
|
|
990
|
+
end
|
|
991
|
+
end
|
|
992
|
+
di = ndi
|
|
993
|
+
end
|
|
938
994
|
return ret if not di
|
|
939
995
|
|
|
940
996
|
di_addr = di.next_addr
|
|
@@ -942,7 +998,9 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
942
998
|
if di.opcode.props[:stopexec] or di.opcode.props[:setip]
|
|
943
999
|
if di.opcode.props[:setip]
|
|
944
1000
|
@addrs_todo = []
|
|
945
|
-
@program.get_xrefs_x(self, di)
|
|
1001
|
+
ar = @program.get_xrefs_x(self, di)
|
|
1002
|
+
ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr
|
|
1003
|
+
ar.each { |expr|
|
|
946
1004
|
backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast)
|
|
947
1005
|
}
|
|
948
1006
|
end
|
|
@@ -965,8 +1023,13 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
965
1023
|
end
|
|
966
1024
|
}
|
|
967
1025
|
|
|
968
|
-
|
|
969
|
-
|
|
1026
|
+
ar = [di_addr]
|
|
1027
|
+
ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr
|
|
1028
|
+
ar.each { |a|
|
|
1029
|
+
di.block.add_to_normal(a)
|
|
1030
|
+
ret << [a, di.address]
|
|
1031
|
+
}
|
|
1032
|
+
ret
|
|
970
1033
|
end
|
|
971
1034
|
|
|
972
1035
|
# handles when disassemble_fast encounters a call to a subfunction
|
|
@@ -1037,7 +1100,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
1037
1100
|
count = 0
|
|
1038
1101
|
while b = block_at(addr)
|
|
1039
1102
|
count += 1
|
|
1040
|
-
return if count > 5 or b.list.length >
|
|
1103
|
+
return if count > 5 or b.list.length > 5
|
|
1041
1104
|
if b.to_subfuncret and not b.to_subfuncret.empty?
|
|
1042
1105
|
return if b.to_subfuncret.length != 1
|
|
1043
1106
|
addr = normalize(b.to_subfuncret.first)
|
|
@@ -1047,7 +1110,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
1047
1110
|
return if not btb = sf.backtrace_binding
|
|
1048
1111
|
btb = btb.dup
|
|
1049
1112
|
btb.delete_if { |k, v| Expression[k] == Expression[v] }
|
|
1050
|
-
|
|
1113
|
+
return if btb.length > 2 or btb.values.include? Expression::Unknown
|
|
1051
1114
|
else
|
|
1052
1115
|
return if not bt = b.to_normal
|
|
1053
1116
|
if bt.include? :default
|
|
@@ -1291,6 +1354,88 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
1291
1354
|
end
|
|
1292
1355
|
end
|
|
1293
1356
|
|
|
1357
|
+
# iterates over all instructions of a function from a given entrypoint
|
|
1358
|
+
# carries an object while walking, the object is yielded every instruction
|
|
1359
|
+
# every block is walked only once, after all previous blocks are done (if possible)
|
|
1360
|
+
# on a 'jz', a [:clone] event is yielded for every path beside the first
|
|
1361
|
+
# on a juction (eg a -> b -> d, a -> c -> d), a [:merge] event occurs if froms have different objs
|
|
1362
|
+
# event list:
|
|
1363
|
+
# [:di, <addr>, <decoded_instruction>, <object>]
|
|
1364
|
+
# [:clone, <newaddr>, <oldaddr>, <object>]
|
|
1365
|
+
# [:merge, <newaddr>, {<oldaddr1> => <object1>, <oldaddr2> => <object2>, ...}, <object1>]
|
|
1366
|
+
# [:subfunc, <subfunc_addr>, <call_addr>, <object>]
|
|
1367
|
+
# all events should return an object
|
|
1368
|
+
# :merge has a copy of object1 at the end so that uninterested callers can always return args[-1]
|
|
1369
|
+
# if an event returns false, the trace stops for the current branch
|
|
1370
|
+
def function_walk(addr_start, obj_start)
|
|
1371
|
+
# addresses of instrs already seen => obj
|
|
1372
|
+
done = {}
|
|
1373
|
+
todo = [[addr_start, obj_start]]
|
|
1374
|
+
|
|
1375
|
+
while hop = todo.pop
|
|
1376
|
+
addr, obj = hop
|
|
1377
|
+
next if done.has_key?(done)
|
|
1378
|
+
|
|
1379
|
+
di = di_at(addr)
|
|
1380
|
+
next if not di
|
|
1381
|
+
|
|
1382
|
+
if done.empty?
|
|
1383
|
+
dilist = di.block.list[di.block.list.index(di)..-1]
|
|
1384
|
+
else
|
|
1385
|
+
# new block, check all 'from' have been seen
|
|
1386
|
+
if not hop[2]
|
|
1387
|
+
# may retry later
|
|
1388
|
+
all_ok = true
|
|
1389
|
+
di.block.each_from_samefunc(self) { |fa| all_ok = false unless done.has_key?(fa) }
|
|
1390
|
+
if not all_ok
|
|
1391
|
+
todo.unshift([addr, obj, true])
|
|
1392
|
+
next
|
|
1393
|
+
end
|
|
1394
|
+
end
|
|
1395
|
+
|
|
1396
|
+
froms = {}
|
|
1397
|
+
di.block.each_from_samefunc(self) { |fa| froms[fa] = done[fa] if done[fa] }
|
|
1398
|
+
if froms.values.uniq.length > 1
|
|
1399
|
+
obj = yield([:merge, addr, froms, froms.values.first])
|
|
1400
|
+
next if obj == false
|
|
1401
|
+
end
|
|
1402
|
+
|
|
1403
|
+
dilist = di.block.list
|
|
1404
|
+
end
|
|
1405
|
+
|
|
1406
|
+
if dilist.each { |_di|
|
|
1407
|
+
break if done.has_key?(_di.address) # looped back into addr_start
|
|
1408
|
+
done[_di.address] = obj
|
|
1409
|
+
obj = yield([:di, _di.address, _di, obj])
|
|
1410
|
+
break if obj == false # also return false for the previous 'if'
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
from = dilist.last.address
|
|
1414
|
+
|
|
1415
|
+
if di.block.to_normal and di.block.to_normal[0] and
|
|
1416
|
+
di.block.to_subfuncret and di.block.to_subfuncret[0]
|
|
1417
|
+
# current instruction block calls into a subfunction
|
|
1418
|
+
obj = di.block.to_normal.map { |subf|
|
|
1419
|
+
yield([:subfunc, subf, from, obj])
|
|
1420
|
+
}.first # propagate 1st subfunc result
|
|
1421
|
+
next if obj == false
|
|
1422
|
+
end
|
|
1423
|
+
|
|
1424
|
+
wantclone = false
|
|
1425
|
+
di.block.each_to_samefunc(self) { |ta|
|
|
1426
|
+
if wantclone
|
|
1427
|
+
nobj = yield([:clone, ta, from, obj])
|
|
1428
|
+
next if obj == false
|
|
1429
|
+
todo << [ta, nobj]
|
|
1430
|
+
else
|
|
1431
|
+
todo << [ta, obj]
|
|
1432
|
+
wantclone = true
|
|
1433
|
+
end
|
|
1434
|
+
}
|
|
1435
|
+
end
|
|
1436
|
+
end
|
|
1437
|
+
end
|
|
1438
|
+
|
|
1294
1439
|
# holds a backtrace result until a snapshot_addr is encountered
|
|
1295
1440
|
class StoppedExpr
|
|
1296
1441
|
attr_accessor :exprs
|
|
@@ -1356,7 +1501,7 @@ puts " not backtracking stack address #{expr}" if debug_backtrace
|
|
|
1356
1501
|
end
|
|
1357
1502
|
|
|
1358
1503
|
if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
|
|
1359
|
-
di, origin, type, len, maxdepth, detached))
|
|
1504
|
+
di, origin, type, len, maxdepth, detached, snapshot_addr))
|
|
1360
1505
|
# no need to update backtracked_for
|
|
1361
1506
|
return vals
|
|
1362
1507
|
elsif maxdepth <= 0
|
|
@@ -1396,7 +1541,7 @@ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr
|
|
|
1396
1541
|
if expr != oldexpr and not snapshot_addr and vals = (no_check ?
|
|
1397
1542
|
(!need_backtrace(expr, terminals) and [expr]) :
|
|
1398
1543
|
backtrace_check_found(expr, nil, origin, type, len,
|
|
1399
|
-
maxdepth-h[:loopdetect].length, detached))
|
|
1544
|
+
maxdepth-h[:loopdetect].length, detached, snapshot_addr))
|
|
1400
1545
|
result |= vals
|
|
1401
1546
|
next
|
|
1402
1547
|
end
|
|
@@ -1437,7 +1582,7 @@ puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#
|
|
|
1437
1582
|
|
|
1438
1583
|
if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
|
|
1439
1584
|
backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
|
|
1440
|
-
maxdepth-h[:loopdetect].length, detached))
|
|
1585
|
+
maxdepth-h[:loopdetect].length, detached, snapshot_addr))
|
|
1441
1586
|
if snapshot_addr
|
|
1442
1587
|
expr = StoppedExpr.new vals
|
|
1443
1588
|
next expr
|
|
@@ -1498,7 +1643,7 @@ oldexpr = expr
|
|
|
1498
1643
|
when :func
|
|
1499
1644
|
expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length)
|
|
1500
1645
|
if snapshot_addr and snapshot_addr == h[:funcaddr]
|
|
1501
|
-
# XXX recursiveness detection needs to be fixed
|
|
1646
|
+
# XXX recursiveness detection needs to be fixed
|
|
1502
1647
|
puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace
|
|
1503
1648
|
next false
|
|
1504
1649
|
end
|
|
@@ -1506,7 +1651,7 @@ puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_back
|
|
|
1506
1651
|
end
|
|
1507
1652
|
puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
|
|
1508
1653
|
if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
|
|
1509
|
-
h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached))
|
|
1654
|
+
h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, snapshot_addr))
|
|
1510
1655
|
if snapshot_addr
|
|
1511
1656
|
expr = StoppedExpr.new vals
|
|
1512
1657
|
else
|
|
@@ -1588,10 +1733,14 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
|
|
|
1588
1733
|
(ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr
|
|
1589
1734
|
end
|
|
1590
1735
|
|
|
1736
|
+
def backtrace_update_function_binding(addr, func=@function[addr], retaddrs=func.return_address)
|
|
1737
|
+
@cpu.backtrace_update_function_binding(self, addr, func, retaddrs)
|
|
1738
|
+
end
|
|
1739
|
+
|
|
1591
1740
|
# static resolution of indirections
|
|
1592
1741
|
def resolve(expr)
|
|
1593
1742
|
binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind|
|
|
1594
|
-
e
|
|
1743
|
+
e = get_edata_at(resolve(ind.target))
|
|
1595
1744
|
return expr if not e
|
|
1596
1745
|
binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ]
|
|
1597
1746
|
}
|
|
@@ -1619,7 +1768,7 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
|
|
|
1619
1768
|
# TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
|
|
1620
1769
|
# TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
|
|
1621
1770
|
# eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
|
|
1622
|
-
def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached)
|
|
1771
|
+
def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, snapshot_addr=nil)
|
|
1623
1772
|
# only entrypoints or block starts called by a :saveip are checked for being a function
|
|
1624
1773
|
# want to execute [esp] from a block start
|
|
1625
1774
|
if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
|
|
@@ -1649,11 +1798,14 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
|
|
|
1649
1798
|
end
|
|
1650
1799
|
|
|
1651
1800
|
return if need_backtrace(expr)
|
|
1801
|
+
if snapshot_addr
|
|
1802
|
+
return if expr.expr_externals(true).find { |ee| ee.kind_of?(Indirection) }
|
|
1803
|
+
end
|
|
1652
1804
|
|
|
1653
1805
|
puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
|
|
1654
1806
|
result = backtrace_value(expr, maxdepth)
|
|
1655
1807
|
# keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
|
|
1656
|
-
result << expr if not type
|
|
1808
|
+
#result << expr if not type # XXX returning multiple values for nothing is too confusing, TODO fix decompiler
|
|
1657
1809
|
result.uniq!
|
|
1658
1810
|
|
|
1659
1811
|
# create xrefs/labels
|
|
@@ -1695,7 +1847,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
|
|
|
1695
1847
|
ret = []
|
|
1696
1848
|
|
|
1697
1849
|
decode_imm = lambda { |addr, len|
|
|
1698
|
-
edata
|
|
1850
|
+
edata = get_edata_at(addr)
|
|
1699
1851
|
if edata
|
|
1700
1852
|
Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ]
|
|
1701
1853
|
else
|
|
@@ -1803,7 +1955,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
|
|
|
1803
1955
|
# TODO trace expression evolution to allow handling of
|
|
1804
1956
|
# mov eax, 28 ; add eax, 4 ; jmp eax
|
|
1805
1957
|
# => mov eax, (loc_xx-4)
|
|
1806
|
-
if di and not unk # and di.address == origin
|
|
1958
|
+
if di and not unk and expr != n # and di.address == origin
|
|
1807
1959
|
@cpu.replace_instr_arg_immediate(di.instruction, expr, n)
|
|
1808
1960
|
end
|
|
1809
1961
|
if @decoded[origin] and not unk
|
|
@@ -1850,6 +2002,10 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
|
|
|
1850
2002
|
end
|
|
1851
2003
|
end
|
|
1852
2004
|
|
|
2005
|
+
def inspect
|
|
2006
|
+
"<Metasm::Disassembler @%x>" % object_id
|
|
2007
|
+
end
|
|
2008
|
+
|
|
1853
2009
|
def to_s
|
|
1854
2010
|
a = ''
|
|
1855
2011
|
dump { |l| a << l << "\n" }
|
|
@@ -1916,7 +2072,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
|
|
|
1916
2072
|
if not xr.empty?
|
|
1917
2073
|
b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"]
|
|
1918
2074
|
end
|
|
1919
|
-
if block.edata.inv_export[block.edata_ptr]
|
|
2075
|
+
if block.edata.inv_export[block.edata_ptr] and label_alias[block.address]
|
|
1920
2076
|
b["\n"] if xr.empty?
|
|
1921
2077
|
label_alias[block.address].each { |name| b["#{name}:"] }
|
|
1922
2078
|
end
|
|
@@ -1933,8 +2089,8 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
|
|
|
1933
2089
|
# TODO array-style data access
|
|
1934
2090
|
def dump_data(addr, edata, off, &b)
|
|
1935
2091
|
b ||= lambda { |l| puts l }
|
|
1936
|
-
if l = edata.inv_export[off]
|
|
1937
|
-
l_list = label_alias[addr].
|
|
2092
|
+
if l = edata.inv_export[off] and label_alias[addr]
|
|
2093
|
+
l_list = label_alias[addr].sort
|
|
1938
2094
|
l = l_list.pop || l
|
|
1939
2095
|
l_list.each { |ll|
|
|
1940
2096
|
b["#{ll}:"]
|