metasm 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
@@ -0,0 +1,327 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/ia32/opcodes'
|
8
|
+
require 'metasm/ia32/encode'
|
9
|
+
require 'metasm/parse'
|
10
|
+
|
11
|
+
module Metasm
|
12
|
+
class Ia32
|
13
|
+
class ModRM
|
14
|
+
# may return a SegReg
|
15
|
+
# must be called before SegReg parser (which could match only the seg part of a modrm)
|
16
|
+
def self.parse(lexer, otok, cpu)
|
17
|
+
tok = otok
|
18
|
+
|
19
|
+
# read operand size specifier
|
20
|
+
if tok and tok.type == :string and tok.raw =~ /^(?:byte|[dqo]?word|_(\d+)bits)$/
|
21
|
+
ptsz =
|
22
|
+
if $1
|
23
|
+
$1.to_i
|
24
|
+
else
|
25
|
+
case tok.raw
|
26
|
+
when 'byte'; 8
|
27
|
+
when 'word'; 16
|
28
|
+
when 'dword'; 32
|
29
|
+
when 'qword'; 64
|
30
|
+
when 'oword'; 128
|
31
|
+
else raise otok, 'mrm: bad ptr size'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
lexer.skip_space
|
35
|
+
if tok = lexer.readtok and tok.type == :string and tok.raw == 'ptr'
|
36
|
+
lexer.skip_space
|
37
|
+
tok = lexer.readtok
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# read segment selector
|
42
|
+
if tok and tok.type == :string and seg = SegReg.s_to_i[tok.raw]
|
43
|
+
lexer.skip_space
|
44
|
+
seg = SegReg.new(seg)
|
45
|
+
if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':'
|
46
|
+
raise otok, 'invalid modrm' if ptsz
|
47
|
+
lexer.unreadtok ntok
|
48
|
+
return seg
|
49
|
+
end
|
50
|
+
lexer.skip_space
|
51
|
+
tok = lexer.readtok
|
52
|
+
end
|
53
|
+
|
54
|
+
# ensure we have a modrm
|
55
|
+
if not tok or tok.type != :punct or tok.raw != '['
|
56
|
+
raise otok, 'invalid modrm' if ptsz or seg
|
57
|
+
return
|
58
|
+
end
|
59
|
+
lexer.skip_space_eol
|
60
|
+
|
61
|
+
# support fasm syntax [fs:eax] for segment selector
|
62
|
+
if tok = lexer.readtok and tok.type == :string and not seg and seg = SegReg.s_to_i[tok.raw]
|
63
|
+
raise otok, 'invalid modrm' if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':'
|
64
|
+
seg = SegReg.new(seg)
|
65
|
+
lexer.skip_space_eol
|
66
|
+
else
|
67
|
+
lexer.unreadtok tok
|
68
|
+
end
|
69
|
+
|
70
|
+
# read modrm content as generic expression
|
71
|
+
content = Expression.parse(lexer)
|
72
|
+
lexer.skip_space_eol
|
73
|
+
raise(otok, 'bad modrm') if not content or not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ']'
|
74
|
+
|
75
|
+
# converts matching externals to Regs in an expression
|
76
|
+
regify = lambda { |o|
|
77
|
+
case o
|
78
|
+
when Expression
|
79
|
+
o.lexpr = regify[o.lexpr]
|
80
|
+
o.rexpr = regify[o.rexpr]
|
81
|
+
o
|
82
|
+
when String
|
83
|
+
cpu.str_to_reg(o) || o
|
84
|
+
else o
|
85
|
+
end
|
86
|
+
}
|
87
|
+
|
88
|
+
s = i = b = imm = nil
|
89
|
+
|
90
|
+
# assigns the Regs in the expression to base or index field of the modrm
|
91
|
+
walker = lambda { |o|
|
92
|
+
case o
|
93
|
+
when nil
|
94
|
+
when Reg
|
95
|
+
if b
|
96
|
+
raise otok, 'mrm: too many regs' if i
|
97
|
+
i = o
|
98
|
+
s = 1
|
99
|
+
else
|
100
|
+
b = o
|
101
|
+
end
|
102
|
+
when Expression
|
103
|
+
if o.op == :* and (o.rexpr.kind_of? Reg or o.lexpr.kind_of? Reg)
|
104
|
+
# scaled index
|
105
|
+
raise otok, 'mrm: too many indexes' if i
|
106
|
+
s = o.lexpr
|
107
|
+
i = o.rexpr
|
108
|
+
s, i = i, s if s.kind_of? Reg
|
109
|
+
raise otok, 'mrm: bad scale' unless s.kind_of? Integer
|
110
|
+
elsif o.op == :+
|
111
|
+
# recurse
|
112
|
+
walker[o.lexpr]
|
113
|
+
walker[o.rexpr]
|
114
|
+
else
|
115
|
+
# found (a part of) the immediate
|
116
|
+
imm = Expression[imm, :+, o]
|
117
|
+
end
|
118
|
+
else
|
119
|
+
# found (a part of) the immediate
|
120
|
+
imm = Expression[imm, :+, o]
|
121
|
+
end
|
122
|
+
}
|
123
|
+
|
124
|
+
# do it
|
125
|
+
walker[regify[content.reduce]]
|
126
|
+
|
127
|
+
# ensure found immediate is really an immediate
|
128
|
+
raise otok, 'mrm: reg in imm' if imm.kind_of? Expression and not imm.externals.grep(Reg).empty?
|
129
|
+
|
130
|
+
# find default address size
|
131
|
+
adsz = b ? b.sz : i ? i.sz : nil
|
132
|
+
# ptsz may be nil now, will be fixed up later (in parse_instr_fixup) to match another instruction argument's size
|
133
|
+
new adsz, ptsz, s, i, b, imm, seg
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
# handles cpu-specific parser instruction, falls back to Ancestor's version if unknown keyword
|
139
|
+
# XXX changing the cpu size in the middle of the code may have baaad effects...
|
140
|
+
def parse_parser_instruction(lexer, instr)
|
141
|
+
case instr.raw.downcase
|
142
|
+
when '.mode', '.bits'
|
143
|
+
lexer.skip_space
|
144
|
+
if tok = lexer.readtok and tok.type == :string and (tok.raw == '16' or tok.raw == '32')
|
145
|
+
@size = tok.raw.to_i
|
146
|
+
lexer.skip_space
|
147
|
+
raise instr, 'syntax error' if ntok = lexer.nexttok and ntok.type != :eol
|
148
|
+
else
|
149
|
+
raise instr, 'invalid cpu mode'
|
150
|
+
end
|
151
|
+
else super(lexer, instr)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def parse_prefix(i, pfx)
|
156
|
+
# XXX check for redefinition ?
|
157
|
+
# implicit 'true' return value when assignment occur
|
158
|
+
i.prefix ||= {}
|
159
|
+
case pfx
|
160
|
+
when 'lock'; i.prefix[:lock] = true
|
161
|
+
when 'rep'; i.prefix[:rep] = 'rep'
|
162
|
+
when 'repe', 'repz'; i.prefix[:rep] = 'repz'
|
163
|
+
when 'repne', 'repnz'; i.prefix[:rep] = 'repnz'
|
164
|
+
when 'code16'; i.prefix[:sz] = 16
|
165
|
+
when 'code32'; i.prefix[:sz] = 32
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def parse_argregclasslist
|
170
|
+
[Reg, SimdReg, SegReg, DbgReg, CtrlReg, FpReg]
|
171
|
+
end
|
172
|
+
def parse_modrm(lex, tok, cpu)
|
173
|
+
ModRM.parse(lex, tok, cpu)
|
174
|
+
end
|
175
|
+
|
176
|
+
# parses an arbitrary ia32 instruction argument
|
177
|
+
def parse_argument(lexer)
|
178
|
+
lexer = AsmPreprocessor.new(lexer) if lexer.kind_of? String
|
179
|
+
|
180
|
+
# reserved names (registers/segments etc)
|
181
|
+
@args_token ||= parse_argregclasslist.map { |a| a.s_to_i.keys }.flatten.inject({}) { |h, e| h.update e => true }
|
182
|
+
|
183
|
+
lexer.skip_space
|
184
|
+
return if not tok = lexer.readtok
|
185
|
+
|
186
|
+
if tok.type == :string and tok.raw == 'ST'
|
187
|
+
lexer.skip_space
|
188
|
+
if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == '('
|
189
|
+
lexer.skip_space
|
190
|
+
if not nntok = lexer.readtok or nntok.type != :string or nntok.raw !~ /^[0-9]$/ or
|
191
|
+
not ntok = (lexer.skip_space; lexer.readtok) or ntok.type != :punct or ntok.raw != ')'
|
192
|
+
raise tok, 'invalid FP register'
|
193
|
+
else
|
194
|
+
tok.raw << '(' << nntok.raw << ')'
|
195
|
+
fpr = parse_argregclasslist.last
|
196
|
+
if fpr.s_to_i.has_key? tok.raw
|
197
|
+
return fpr.new(fpr.s_to_i[tok.raw])
|
198
|
+
else
|
199
|
+
raise tok, 'invalid FP register'
|
200
|
+
end
|
201
|
+
end
|
202
|
+
else
|
203
|
+
lexer.unreadtok ntok
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
if ret = parse_modrm(lexer, tok, self)
|
208
|
+
ret
|
209
|
+
elsif @args_token[tok.raw]
|
210
|
+
parse_argregclasslist.each { |a|
|
211
|
+
return a.from_str(tok.raw) if a.s_to_i.has_key? tok.raw
|
212
|
+
}
|
213
|
+
raise tok, 'internal error'
|
214
|
+
else
|
215
|
+
lexer.unreadtok tok
|
216
|
+
expr = Expression.parse(lexer)
|
217
|
+
lexer.skip_space
|
218
|
+
|
219
|
+
# may be a farptr
|
220
|
+
if expr and ntok = lexer.readtok and ntok.type == :punct and ntok.raw == ':'
|
221
|
+
raise tok, 'invalid farptr' if not addr = Expression.parse(lexer)
|
222
|
+
Farptr.new expr, addr
|
223
|
+
else
|
224
|
+
lexer.unreadtok ntok
|
225
|
+
Expression[expr.reduce] if expr
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
# check if the argument matches the opcode's argument spec
|
231
|
+
def parse_arg_valid?(o, spec, arg)
|
232
|
+
if o.name == 'movsx' or o.name == 'movzx'
|
233
|
+
if not arg.kind_of? Reg and not arg.kind_of? ModRM
|
234
|
+
return
|
235
|
+
elsif not arg.sz
|
236
|
+
puts "ambiguous arg size for indirection in #{o.name}" if $VERBOSE
|
237
|
+
return
|
238
|
+
elsif spec == :reg # reg=dst, modrm=src (smaller)
|
239
|
+
return (arg.kind_of? Reg and arg.sz >= 16)
|
240
|
+
elsif o.props[:argsz]
|
241
|
+
return arg.sz == o.props[:argsz]
|
242
|
+
else
|
243
|
+
return arg.sz <= 16
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
return false if arg.kind_of? ModRM and arg.adsz and o.props[:adsz] and arg.adsz != o.props[:adsz]
|
248
|
+
|
249
|
+
cond = true
|
250
|
+
if s = o.props[:argsz] and (arg.kind_of? Reg or arg.kind_of? ModRM)
|
251
|
+
cond = (!arg.sz or arg.sz == s or spec == :reg_dx)
|
252
|
+
end
|
253
|
+
|
254
|
+
cond and
|
255
|
+
case spec
|
256
|
+
when :reg; arg.kind_of? Reg and (arg.sz >= 16 or o.props[:argsz])
|
257
|
+
when :modrm; (arg.kind_of? ModRM or arg.kind_of? Reg) and (!arg.sz or arg.sz >= 16 or o.props[:argsz])
|
258
|
+
when :i; arg.kind_of? Expression
|
259
|
+
when :imm_val1; arg.kind_of? Expression and arg.reduce == 1
|
260
|
+
when :imm_val3; arg.kind_of? Expression and arg.reduce == 3
|
261
|
+
when :reg_eax; arg.kind_of? Reg and arg.val == 0
|
262
|
+
when :reg_cl; arg.kind_of? Reg and arg.val == 1 and arg.sz == 8
|
263
|
+
when :reg_dx; arg.kind_of? Reg and arg.val == 2 and arg.sz == 16
|
264
|
+
when :seg3; arg.kind_of? SegReg
|
265
|
+
when :seg3A; arg.kind_of? SegReg and arg.val > 3
|
266
|
+
when :seg2; arg.kind_of? SegReg and arg.val < 4
|
267
|
+
when :seg2A; arg.kind_of? SegReg and arg.val < 4 and arg.val != 1
|
268
|
+
when :eeec; arg.kind_of? CtrlReg
|
269
|
+
when :eeed; arg.kind_of? DbgReg
|
270
|
+
when :modrmA; arg.kind_of? ModRM
|
271
|
+
when :mrm_imm; arg.kind_of? ModRM and not arg.s and not arg.i and not arg.b
|
272
|
+
when :farptr; arg.kind_of? Farptr
|
273
|
+
when :regfp; arg.kind_of? FpReg
|
274
|
+
when :regfp0; arg.kind_of? FpReg and (arg.val == nil or arg.val == 0)
|
275
|
+
when :modrmmmx; arg.kind_of? ModRM or (arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx])))
|
276
|
+
when :regmmx; arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx]))
|
277
|
+
when :modrmxmm; arg.kind_of? ModRM or (arg.kind_of? SimdReg and arg.sz == 128)
|
278
|
+
when :regxmm; arg.kind_of? SimdReg and arg.sz == 128
|
279
|
+
when :i8, :u8, :u16
|
280
|
+
arg.kind_of? Expression and
|
281
|
+
(o.props[:setip] or Expression.in_range?(arg, spec) != false) # true or nil allowed
|
282
|
+
# jz 0x28282828 may fit in :i8 depending on instr addr
|
283
|
+
else raise EncodeError, "Internal error: unknown argument specification #{spec.inspect}"
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def parse_instruction_checkproto(i)
|
288
|
+
case i.opname
|
289
|
+
when 'imul'
|
290
|
+
if i.args.length == 2 and i.args.first.kind_of? Reg and i.args.last.kind_of? Expression
|
291
|
+
i.args.unshift i.args.first.dup
|
292
|
+
end
|
293
|
+
end
|
294
|
+
super(i)
|
295
|
+
end
|
296
|
+
|
297
|
+
# fixup the sz of a modrm argument, defaults to other argument size or current cpu mode
|
298
|
+
def parse_instruction_fixup(i)
|
299
|
+
if m = i.args.grep(ModRM).first and not m.sz
|
300
|
+
if i.opname == 'movzx' or i.opname == 'movsx'
|
301
|
+
m.sz = 8
|
302
|
+
else
|
303
|
+
if r = i.args.grep(Reg).first
|
304
|
+
m.sz = r.sz
|
305
|
+
elsif opcode_list_byname[i.opname].all? { |o| o.props[:argsz] }
|
306
|
+
m.sz = opcode_list_byname[i.opname].first.props[:argsz]
|
307
|
+
else
|
308
|
+
# this is also the size of ctrlreg/dbgreg etc
|
309
|
+
# XXX fpu/simd ?
|
310
|
+
m.sz = i.prefix[:sz] || @size
|
311
|
+
end
|
312
|
+
end
|
313
|
+
end
|
314
|
+
if m and not m.adsz
|
315
|
+
if opcode_list_byname[i.opname].all? { |o| o.props[:adsz] }
|
316
|
+
m.adsz = opcode_list_byname[i.opname].first.props[:adsz]
|
317
|
+
else
|
318
|
+
m.adsz = i.prefix[:sz] || @size
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
def instr_uncond_jump_to(target)
|
324
|
+
parse_instruction("jmp #{target}")
|
325
|
+
end
|
326
|
+
end
|
327
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/ia32/opcodes'
|
8
|
+
require 'metasm/render'
|
9
|
+
|
10
|
+
# XXX move context in another file ?
|
11
|
+
module Metasm
|
12
|
+
class Ia32
|
13
|
+
class Argument
|
14
|
+
include Renderable
|
15
|
+
end
|
16
|
+
|
17
|
+
[SegReg, DbgReg, CtrlReg, FpReg].each { |c| c.class_eval {
|
18
|
+
def render ; [self.class.i_to_s[@val]] end
|
19
|
+
} }
|
20
|
+
[Reg, SimdReg].each { |c| c.class_eval {
|
21
|
+
def render ; [self.class.i_to_s[@sz][@val]] end
|
22
|
+
def context ; {'set sz' => lambda { |s| @sz = s }} end
|
23
|
+
} }
|
24
|
+
|
25
|
+
class Farptr
|
26
|
+
def render
|
27
|
+
[@seg, ':', @addr]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class ModRM
|
32
|
+
def qualifier(sz)
|
33
|
+
{
|
34
|
+
8 => 'byte',
|
35
|
+
16 => 'word',
|
36
|
+
32 => 'dword',
|
37
|
+
64 => 'qword',
|
38
|
+
128 => 'oword'
|
39
|
+
}.fetch(sz) { |k| "_#{sz}bits" }
|
40
|
+
end
|
41
|
+
|
42
|
+
attr_accessor :instruction
|
43
|
+
def render
|
44
|
+
r = []
|
45
|
+
r << ( qualifier(@sz) << ' ptr ' ) if @sz and (not instruction or not @instruction.args.find { |a| a.kind_of? Reg and a.sz == @sz })
|
46
|
+
r << @seg << ':' if seg
|
47
|
+
|
48
|
+
e = nil
|
49
|
+
e = Expression[e, :+, @b] if b
|
50
|
+
e = Expression[e, :+, @imm] if imm
|
51
|
+
e = Expression[e, :+, (@s == 1 ? @i : [@s, :*, @i])] if s
|
52
|
+
r << '[' << e << ']'
|
53
|
+
end
|
54
|
+
|
55
|
+
def context
|
56
|
+
{'set targetsz' => lambda { |s| @sz = s },
|
57
|
+
'set seg' => lambda { |s| @seg = Seg.new s }}
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def render_instruction(i)
|
62
|
+
r = []
|
63
|
+
r << 'lock ' if i.prefix and i.prefix[:lock]
|
64
|
+
r << i.prefix[:rep] << ' ' if i.prefix and i.prefix[:rep]
|
65
|
+
r << i.opname
|
66
|
+
i.args.each { |a|
|
67
|
+
a.instruction = i if a.kind_of? ModRM
|
68
|
+
r << (r.last == i.opname ? ' ' : ', ') << a
|
69
|
+
}
|
70
|
+
r
|
71
|
+
end
|
72
|
+
|
73
|
+
def instruction_context(i)
|
74
|
+
# XXX
|
75
|
+
h = {}
|
76
|
+
op = opcode_list_byname[i.opname].first
|
77
|
+
if i.prefix and i.prefix[:rep]
|
78
|
+
h['toogle repz'] = lambda { i.prefix[:rep] = {'repnz' => 'repz', 'repz' => 'repnz'}[i.prefix[:rep]] } if op.props[:stropz]
|
79
|
+
h['rm rep'] = lambda { i.prefix.delete :rep }
|
80
|
+
else
|
81
|
+
h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'rep' } if op.props[:strop]
|
82
|
+
h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'repz' } if op.props[:stropz]
|
83
|
+
end
|
84
|
+
if i.args.find { |a| a.kind_of? ModRM and a.seg }
|
85
|
+
h['rm seg'] = lambda { i.args.find { |a| a.kind_of? ModRM and a.seg }.seg = nil }
|
86
|
+
end
|
87
|
+
h['toggle lock'] = lambda { (i.prefix ||= {})[:lock] = !i.prefix[:lock] }
|
88
|
+
h
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/metasm/main.rb
ADDED
@@ -0,0 +1,1193 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
module Metasm
|
8
|
+
|
9
|
+
VERSION = 0x0001 # major major minor minor
|
10
|
+
|
11
|
+
# superclass for all metasm exceptions
|
12
|
+
class Exception < RuntimeError ; end
|
13
|
+
# parse error
|
14
|
+
class ParseError < Exception ; end
|
15
|
+
# invalid exeformat signature
|
16
|
+
class InvalidExeFormat < Exception ; end
|
17
|
+
# cannot honor .offset specification, reloc fixup overflow
|
18
|
+
class EncodeError < Exception ; end
|
19
|
+
|
20
|
+
# holds context of a processor
|
21
|
+
# endianness, current mode, opcode list...
|
22
|
+
class CPU
|
23
|
+
attr_accessor :valid_args, :valid_props, :fields_mask
|
24
|
+
attr_accessor :endianness, :size
|
25
|
+
attr_accessor :generate_PIC
|
26
|
+
|
27
|
+
def opcode_list
|
28
|
+
@opcode_list ||= init_opcode_list
|
29
|
+
end
|
30
|
+
def opcode_list=(l) @opcode_list = l end
|
31
|
+
|
32
|
+
def initialize
|
33
|
+
@fields_mask = {}
|
34
|
+
@fields_shift= {}
|
35
|
+
@valid_args = []
|
36
|
+
@valid_props = [:setip, :saveip, :stopexec]
|
37
|
+
@generate_PIC = true
|
38
|
+
end
|
39
|
+
|
40
|
+
# returns a hash opcode_name => array of opcodes with this name
|
41
|
+
def opcode_list_byname
|
42
|
+
@opcode_list_byname ||= opcode_list.inject({}) { |h, o| (h[o.name] ||= []) << o ; h }
|
43
|
+
end
|
44
|
+
|
45
|
+
# sets up the C parser : standard macro definitions, type model (size of int etc)
|
46
|
+
def tune_cparser(cp)
|
47
|
+
case @size
|
48
|
+
when 64; cp.lp64
|
49
|
+
when 32; cp.ilp32
|
50
|
+
when 16; cp.ilp16
|
51
|
+
end
|
52
|
+
cp.endianness = @endianness
|
53
|
+
cp.lexer.define_weak('_STDC', 1)
|
54
|
+
# TODO gcc -dM -E - </dev/null
|
55
|
+
tune_prepro(cp.lexer)
|
56
|
+
end
|
57
|
+
|
58
|
+
def tune_prepro(pp)
|
59
|
+
# TODO pp.define('BIGENDIAN')
|
60
|
+
end
|
61
|
+
|
62
|
+
# return a new AsmPreprocessor
|
63
|
+
def new_asmprepro(str='', exe=nil)
|
64
|
+
pp = AsmPreprocessor.new(str, exe)
|
65
|
+
tune_prepro(pp)
|
66
|
+
exe.tune_prepro(pp) if exe
|
67
|
+
pp
|
68
|
+
end
|
69
|
+
|
70
|
+
# returns a new & tuned C::Parser
|
71
|
+
def new_cparser
|
72
|
+
C::Parser.new(self)
|
73
|
+
end
|
74
|
+
|
75
|
+
# returns a new C::Compiler
|
76
|
+
def new_ccompiler(parser, exe=ExeFormat.new)
|
77
|
+
exe.cpu = self if not exe.instance_variable_get("@cpu")
|
78
|
+
C::Compiler.new(parser, exe)
|
79
|
+
end
|
80
|
+
|
81
|
+
def shortname
|
82
|
+
self.class.name.sub(/.*::/, '').downcase
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# generic CPU, with no instructions, just size/endianness
|
87
|
+
class UnknownCPU < CPU
|
88
|
+
def initialize(size, endianness)
|
89
|
+
super()
|
90
|
+
@size, @endianness = size, endianness
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# a cpu instruction 'formal' description
|
95
|
+
class Opcode
|
96
|
+
# the name of the instruction
|
97
|
+
attr_accessor :name
|
98
|
+
# formal description of arguments (array of cpu-specific symbols)
|
99
|
+
attr_accessor :args
|
100
|
+
# binary encoding of the opcode (integer for risc, array of bytes for cisc)
|
101
|
+
attr_accessor :bin
|
102
|
+
# list of bit fields in the binary encoding
|
103
|
+
# hash position => field
|
104
|
+
# position is bit shift for risc, [byte index, bit shift] for risc
|
105
|
+
# field is cpu-specific
|
106
|
+
attr_accessor :fields
|
107
|
+
# hash of opcode generic properties/restrictions (mostly property => true/false)
|
108
|
+
attr_accessor :props
|
109
|
+
# binary mask for decoding
|
110
|
+
attr_accessor :bin_mask
|
111
|
+
|
112
|
+
def initialize(name, bin=nil)
|
113
|
+
@name = name
|
114
|
+
@bin = bin
|
115
|
+
@args = []
|
116
|
+
@fields = {}
|
117
|
+
@props = {}
|
118
|
+
end
|
119
|
+
|
120
|
+
def basename
|
121
|
+
@name.sub(/\..*/, '')
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# defines an attribute self.backtrace (array of filename/lineno)
|
126
|
+
# and a method backtrace_str which dumps this array to a human-readable form
|
127
|
+
module Backtrace
|
128
|
+
# array [file, lineno, file, lineno]
|
129
|
+
# if file 'A' does #include 'B' you'll get ['A', linenoA, 'B', linenoB]
|
130
|
+
attr_accessor :backtrace
|
131
|
+
|
132
|
+
# builds a readable string from self.backtrace
|
133
|
+
def backtrace_str
|
134
|
+
Backtrace.backtrace_str(@backtrace)
|
135
|
+
end
|
136
|
+
|
137
|
+
# builds a readable backtrace string from an array of [file, lineno, file, lineno, ..]
|
138
|
+
def self.backtrace_str(ary)
|
139
|
+
return '' if not ary
|
140
|
+
i = ary.length
|
141
|
+
bt = ''
|
142
|
+
while i > 0
|
143
|
+
bt << ",\n\tincluded from " if ary[i]
|
144
|
+
i -= 2
|
145
|
+
bt << "#{ary[i].inspect} line #{ary[i+1]}"
|
146
|
+
end
|
147
|
+
bt
|
148
|
+
end
|
149
|
+
|
150
|
+
def exception(msg='syntax error')
|
151
|
+
ParseError.new "at #{backtrace_str}: #{msg}"
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# an instruction: opcode name + arguments
|
156
|
+
class Instruction
|
157
|
+
# arguments (cpu-specific objects)
|
158
|
+
attr_accessor :args
|
159
|
+
# hash of prefixes (unused in simple cpus)
|
160
|
+
attr_accessor :prefix
|
161
|
+
# name of the associated opcode
|
162
|
+
attr_accessor :opname
|
163
|
+
# reference to the cpu which issued this instruction (used for rendering)
|
164
|
+
attr_accessor :cpu
|
165
|
+
|
166
|
+
include Backtrace
|
167
|
+
|
168
|
+
def initialize(cpu, opname=nil, args=[], pfx=nil, backtrace=nil)
|
169
|
+
@cpu = cpu
|
170
|
+
@opname = opname
|
171
|
+
@args = args
|
172
|
+
@prefix = pfx if pfx
|
173
|
+
@backtrace = backtrace
|
174
|
+
end
|
175
|
+
|
176
|
+
# duplicates the argument list and prefix hash
|
177
|
+
def dup
|
178
|
+
Instruction.new(@cpu, (@opname.dup if opname), @args.dup, (@prefix.dup if prefix), (@backtrace.dup if backtrace))
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# all kind of data description (including repeated/uninitialized)
|
183
|
+
class Data
|
184
|
+
# maps data type to Expression parameters (signedness/bit size)
|
185
|
+
INT_TYPE = {'db' => :a8, 'dw' => :a16, 'dd' => :a32, 'dq' => :a64}
|
186
|
+
|
187
|
+
# an Expression, an Array of Data, a String, or :uninitialized
|
188
|
+
attr_accessor :data
|
189
|
+
# the data type, from INT_TYPE (TODO store directly Expression parameters ?)
|
190
|
+
attr_accessor :type
|
191
|
+
# the repetition count of the data parameter (dup constructs)
|
192
|
+
attr_accessor :count
|
193
|
+
|
194
|
+
include Backtrace
|
195
|
+
|
196
|
+
def initialize(type, data, count=1, backtrace=nil)
|
197
|
+
@data, @type, @count, @backtrace = data, type, count, backtrace
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# a name for a location
|
202
|
+
class Label
|
203
|
+
attr_accessor :name
|
204
|
+
|
205
|
+
include Backtrace
|
206
|
+
|
207
|
+
def initialize(name, backtrace=nil)
|
208
|
+
@name, @backtrace = name, backtrace
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# alignment directive
|
213
|
+
class Align
|
214
|
+
# the size to align to
|
215
|
+
attr_accessor :val
|
216
|
+
# the Data used to pad
|
217
|
+
attr_accessor :fillwith
|
218
|
+
|
219
|
+
include Backtrace
|
220
|
+
|
221
|
+
def initialize(val, fillwith=nil, backtrace=nil)
|
222
|
+
@val, @fillwith, @backtrace = val, fillwith, backtrace
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# padding directive
|
227
|
+
class Padding
|
228
|
+
# Data used to pad
|
229
|
+
attr_accessor :fillwith
|
230
|
+
|
231
|
+
include Backtrace
|
232
|
+
|
233
|
+
def initialize(fillwith=nil, backtrace=nil)
|
234
|
+
@fillwith, @backtrace = fillwith, backtrace
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
# offset directive
|
239
|
+
# can be used to fix padding length or to assert some code/data compiled length
|
240
|
+
class Offset
|
241
|
+
# the assembler will arrange to make this pseudo-instruction
|
242
|
+
# be at this offset from beginning of current section
|
243
|
+
attr_accessor :val
|
244
|
+
|
245
|
+
include Backtrace
|
246
|
+
|
247
|
+
def initialize(val, backtrace=nil)
|
248
|
+
@val, @backtrace = val, backtrace
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# the superclass of all real executable formats
|
253
|
+
# main methods:
|
254
|
+
# self.decode(str) => decodes the file format (imports/relocs/etc), no asm disassembly
|
255
|
+
# parse(source) => parses assembler source, fills self.source
|
256
|
+
# assemble => assembles self.source in binary sections/segments/whatever
|
257
|
+
# encode => builds imports/relocs tables, put all this together, links everything in self.encoded
|
258
|
+
class ExeFormat
|
259
|
+
# array of Data/Instruction/Align/Padding/Offset/Label, populated in parse
|
260
|
+
attr_accessor :cursource
|
261
|
+
# contains the binary version of the compiled program (EncodedData)
|
262
|
+
attr_accessor :encoded
|
263
|
+
# hash of labels generated by new_label
|
264
|
+
attr_accessor :unique_labels_cache
|
265
|
+
|
266
|
+
# initializes self.cpu, creates an empty self.encoded
|
267
|
+
def initialize(cpu=nil)
|
268
|
+
@cpu = cpu
|
269
|
+
@encoded = EncodedData.new
|
270
|
+
@unique_labels_cache = {}
|
271
|
+
end
|
272
|
+
|
273
|
+
attr_writer :cpu # custom reader
|
274
|
+
def cpu
|
275
|
+
@cpu ||= cpu_from_headers
|
276
|
+
end
|
277
|
+
|
278
|
+
# return the label name corresponding to the specified offset of the encodeddata, creates it if necessary
|
279
|
+
def label_at(edata, offset, base = '')
|
280
|
+
if not l = edata.inv_export[offset]
|
281
|
+
edata.add_export(l = new_label(base), offset)
|
282
|
+
end
|
283
|
+
l
|
284
|
+
end
|
285
|
+
|
286
|
+
# creates a new label, that is guaranteed to never be returned again as long as this object (ExeFormat) exists
|
287
|
+
def new_label(base = '')
|
288
|
+
base = base.dup.tr('^a-zA-Z0-9_', '_')
|
289
|
+
# use %x instead of to_s(16) for negative values
|
290
|
+
base = (base << '_uuid' << ('%08x' % base.object_id)).freeze if base.empty? or @unique_labels_cache[base]
|
291
|
+
@unique_labels_cache[base] = true
|
292
|
+
base
|
293
|
+
end
|
294
|
+
|
295
|
+
# share self.unique_labels_cache with other, checks for conflicts, returns self
|
296
|
+
def share_namespace(other)
|
297
|
+
return self if other.unique_labels_cache.equal? @unique_labels_cache
|
298
|
+
raise "share_ns #{(other.unique_labels_cache.keys & @unique_labels_cache.keys).inspect}" if !(other.unique_labels_cache.keys & @unique_labels_cache.keys).empty?
|
299
|
+
@unique_labels_cache.update other.unique_labels_cache
|
300
|
+
other.unique_labels_cache = @unique_labels_cache
|
301
|
+
self
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
# superclass for classes similar to Expression
|
306
|
+
# must define #bind, #reduce_rec, #match_rec, #externals
|
307
|
+
class ExpressionType
|
308
|
+
def +(o) Expression[self, :+, o].reduce end
|
309
|
+
def -(o) Expression[self, :-, o].reduce end
|
310
|
+
end
|
311
|
+
|
312
|
+
# handle immediate values, and arbitrary arithmetic/logic expression involving variables
|
313
|
+
# boolean values are treated as in C : true is 1, false is 0
|
314
|
+
# TODO replace #type with #size => bits + #type => [:signed/:unsigned/:any/:floating]
|
315
|
+
# TODO handle floats
|
316
|
+
class Expression < ExpressionType
|
317
|
+
INT_SIZE = {}
|
318
|
+
INT_MIN = {}
|
319
|
+
INT_MAX = {}
|
320
|
+
|
321
|
+
[8, 16, 32, 64].each { |sz|
|
322
|
+
INT_SIZE["i#{sz}".to_sym] =
|
323
|
+
INT_SIZE["u#{sz}".to_sym] =
|
324
|
+
INT_SIZE["a#{sz}".to_sym] = sz
|
325
|
+
|
326
|
+
INT_MIN["a#{sz}".to_sym] =
|
327
|
+
INT_MIN["i#{sz}".to_sym] = -(1 << (sz-1)) # -0x8000
|
328
|
+
INT_MIN["u#{sz}".to_sym] = 0
|
329
|
+
|
330
|
+
INT_MAX["i#{sz}".to_sym] = (1 << (sz-1)) - 1 # 0x7fff
|
331
|
+
INT_MAX["a#{sz}".to_sym] =
|
332
|
+
INT_MAX["u#{sz}".to_sym] = (1 << sz) - 1 # 0xffff
|
333
|
+
}
|
334
|
+
|
335
|
+
# alternative constructor
|
336
|
+
# in operands order, and allows nesting using sub-arrays
|
337
|
+
# ex: Expression[[:-, 42], :*, [1, :+, [4, :*, 7]]]
|
338
|
+
# with a single argument, return it if already an Expression, else construct a new one (using unary +/-)
|
339
|
+
def self.[](l, op=nil, r=nil)
|
340
|
+
if not r # need to shift args
|
341
|
+
if not op
|
342
|
+
raise ArgumentError, 'invalid Expression[nil]' if not l
|
343
|
+
return l if l.kind_of? Expression
|
344
|
+
if l.kind_of? ::Numeric and l < 0
|
345
|
+
r = -l
|
346
|
+
op = :'-'
|
347
|
+
else
|
348
|
+
r = l
|
349
|
+
op = :'+'
|
350
|
+
end
|
351
|
+
else
|
352
|
+
r = op
|
353
|
+
op = l
|
354
|
+
end
|
355
|
+
l = nil
|
356
|
+
else
|
357
|
+
l = self[*l] if l.kind_of? ::Array
|
358
|
+
end
|
359
|
+
r = self[*r] if r.kind_of? ::Array
|
360
|
+
new(op, r, l)
|
361
|
+
end
|
362
|
+
|
363
|
+
# checks if a given Expression/Integer is in the type range
|
364
|
+
# returns true if it is, false if it overflows, and nil if cannot be determined (eg unresolved variable)
|
365
|
+
def self.in_range?(val, type)
|
366
|
+
val = val.reduce if val.kind_of? self
|
367
|
+
return unless val.kind_of? ::Numeric
|
368
|
+
|
369
|
+
if INT_MIN[type]
|
370
|
+
val == val.to_i and
|
371
|
+
val >= INT_MIN[type] and val <= INT_MAX[type]
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
# casts an unsigned value to a two-complement signed if the sign bit is set
|
376
|
+
def self.make_signed(val, bitlength)
|
377
|
+
if val.kind_of? Integer
|
378
|
+
val = val - (1 << bitlength) if val >> (bitlength - 1) == 1
|
379
|
+
end
|
380
|
+
val
|
381
|
+
end
|
382
|
+
|
383
|
+
# the operator (symbol)
|
384
|
+
attr_accessor :op
|
385
|
+
# the lefthandside expression (nil for unary expressions)
|
386
|
+
attr_accessor :lexpr
|
387
|
+
# the righthandside expression
|
388
|
+
attr_accessor :rexpr
|
389
|
+
|
390
|
+
# basic constructor
|
391
|
+
# XXX funny args order, you should use +Expression[]+ instead
|
392
|
+
def initialize(op, rexpr, lexpr)
|
393
|
+
raise ArgumentError, "Expression: invalid arg order: #{[lexpr, op, rexpr].inspect}" if not op.kind_of? ::Symbol
|
394
|
+
@op, @lexpr, @rexpr = op, lexpr, rexpr
|
395
|
+
end
|
396
|
+
|
397
|
+
# recursive check of equity using #==
|
398
|
+
# will not match 1+2 and 2+1
|
399
|
+
def ==(o)
|
400
|
+
# shortcircuit recursion
|
401
|
+
o.object_id == object_id or (o.kind_of?(Expression) and @op == o.op and @lexpr == o.lexpr and @rexpr == o.rexpr)
|
402
|
+
end
|
403
|
+
|
404
|
+
# make it useable as Hash key (see +==+)
|
405
|
+
def hash
|
406
|
+
(@lexpr.hash + @op.hash + @rexpr.hash) & 0x7fff_ffff
|
407
|
+
end
|
408
|
+
alias eql? ==
|
409
|
+
|
410
|
+
# returns a new Expression with all variables found in the binding replaced with their value
|
411
|
+
# does not check the binding's key class except for numeric
|
412
|
+
# calls lexpr/rexpr #bind if they respond_to? it
|
413
|
+
def bind(binding = {})
|
414
|
+
if binding[self]
|
415
|
+
return binding[self].dup
|
416
|
+
end
|
417
|
+
|
418
|
+
l, r = @lexpr, @rexpr
|
419
|
+
if l and binding[l]
|
420
|
+
raise "internal error - bound #{l.inspect}" if l.kind_of? ::Numeric
|
421
|
+
l = binding[l]
|
422
|
+
elsif l.kind_of? ExpressionType
|
423
|
+
l = l.bind(binding)
|
424
|
+
end
|
425
|
+
if r and binding[r]
|
426
|
+
raise "internal error - bound #{r.inspect}" if r.kind_of? ::Numeric
|
427
|
+
r = binding[r]
|
428
|
+
elsif r.kind_of? ExpressionType
|
429
|
+
r = r.bind(binding)
|
430
|
+
end
|
431
|
+
Expression[l, @op, r]
|
432
|
+
end
|
433
|
+
|
434
|
+
# bind in place (replace self.lexpr/self.rexpr with the binding value)
|
435
|
+
# only recurse with Expressions (does not use respond_to?)
|
436
|
+
def bind!(binding = {})
|
437
|
+
if @lexpr.kind_of?(Expression)
|
438
|
+
@lexpr.bind!(binding)
|
439
|
+
elsif @lexpr
|
440
|
+
@lexpr = binding[@lexpr] || @lexpr
|
441
|
+
end
|
442
|
+
if @rexpr.kind_of?(Expression)
|
443
|
+
@rexpr.bind!(binding)
|
444
|
+
elsif @rexpr
|
445
|
+
@rexpr = binding[@rexpr] || @rexpr
|
446
|
+
end
|
447
|
+
self
|
448
|
+
end
|
449
|
+
|
450
|
+
# reduce_lambda is a callback called after the standard reduction procedure for custom algorithms
|
451
|
+
# the lambda may return a new expression or nil (to keep the old expr)
|
452
|
+
# exemple: lambda { |e| e.lexpr if e.kind_of? Expression and e.op == :& and e.rexpr == 0xffff_ffff }
|
453
|
+
# returns old lambda
|
454
|
+
def self.reduce_lambda(&b)
|
455
|
+
old = @@reduce_lambda
|
456
|
+
@@reduce_lambda = b if block_given?
|
457
|
+
old
|
458
|
+
end
|
459
|
+
def self.reduce_lambda=(p)
|
460
|
+
@@reduce_lambda = p
|
461
|
+
end
|
462
|
+
@@reduce_lambda = nil
|
463
|
+
|
464
|
+
# returns a simplified copy of self
|
465
|
+
# can return an +Expression+ or a +Numeric+, may return self
|
466
|
+
# see +reduce_rec+ for simplifications description
|
467
|
+
# if given a block, it will temporarily overwrite the global @@reduce_lambda XXX THIS IS NOT THREADSAFE
|
468
|
+
def reduce(&b)
|
469
|
+
old_rp, @@reduce_lambda = @@reduce_lambda, b if b
|
470
|
+
case e = reduce_rec
|
471
|
+
when Expression, Numeric; e
|
472
|
+
else Expression[e]
|
473
|
+
end
|
474
|
+
ensure
|
475
|
+
@@reduce_lambda = old_rp if b
|
476
|
+
end
|
477
|
+
|
478
|
+
# resolves logic operations (true || false, etc)
|
479
|
+
# computes numeric operations (1 + 3)
|
480
|
+
# expands substractions to addition of the opposite
|
481
|
+
# reduces double-oppositions (-(-1) => 1)
|
482
|
+
# reduces addition of 0 and unary +
|
483
|
+
# canonicalize additions: put variables in the lhs, descend addition tree in the rhs => (a + (b + (c + 12)))
|
484
|
+
# make formal reduction if finds somewhere in addition tree (a) and (-a)
|
485
|
+
def reduce_rec
|
486
|
+
l = @lexpr.kind_of?(ExpressionType) ? @lexpr.reduce_rec : @lexpr
|
487
|
+
r = @rexpr.kind_of?(ExpressionType) ? @rexpr.reduce_rec : @rexpr
|
488
|
+
|
489
|
+
if @@reduce_lambda
|
490
|
+
l = @@reduce_lambda[l] || l if not @lexpr.kind_of? Expression
|
491
|
+
r = @@reduce_lambda[r] || r if not @rexpr.kind_of? Expression
|
492
|
+
end
|
493
|
+
|
494
|
+
v =
|
495
|
+
if r.kind_of?(::Numeric) and (l == nil or l.kind_of?(::Numeric))
|
496
|
+
# calculate numerics
|
497
|
+
if [:'&&', :'||', :'>', :'<', :'>=', :'<=', :'==', :'!='].include?(@op)
|
498
|
+
# bool expr
|
499
|
+
raise 'internal error' if not l
|
500
|
+
case @op
|
501
|
+
when :'&&'; (l != 0) && (r != 0)
|
502
|
+
when :'||'; (l != 0) || (r != 0)
|
503
|
+
when :'>' ; l > r
|
504
|
+
when :'>='; l >= r
|
505
|
+
when :'<' ; l < r
|
506
|
+
when :'<='; l <= r
|
507
|
+
when :'=='; l == r
|
508
|
+
when :'!='; l != r
|
509
|
+
end ? 1 : 0
|
510
|
+
elsif not l
|
511
|
+
case @op
|
512
|
+
when :'!'; (r == 0) ? 1 : 0
|
513
|
+
when :+; r
|
514
|
+
when :-; -r
|
515
|
+
when :~; ~r
|
516
|
+
end
|
517
|
+
else
|
518
|
+
# use ruby evaluator
|
519
|
+
l.send(@op, r)
|
520
|
+
end
|
521
|
+
|
522
|
+
elsif @op == :'&&'
|
523
|
+
if l == 0 # shortcircuit eval
|
524
|
+
0
|
525
|
+
elsif l == 1
|
526
|
+
Expression[r, :'!=', 0].reduce_rec
|
527
|
+
elsif r == 0
|
528
|
+
0 # XXX l could be a special ExprType with sideeffects ?
|
529
|
+
end
|
530
|
+
elsif @op == :'||'
|
531
|
+
if l.kind_of? ::Numeric and l != 0 # shortcircuit eval
|
532
|
+
1
|
533
|
+
elsif l == 0
|
534
|
+
Expression[r, :'!=', 0].reduce_rec
|
535
|
+
elsif r == 0
|
536
|
+
Expression[l, :'!=', 0].reduce_rec
|
537
|
+
end
|
538
|
+
elsif @op == :>> or @op == :<<
|
539
|
+
if l == 0; 0
|
540
|
+
elsif r == 0; l
|
541
|
+
elsif l.kind_of? Expression and l.op == @op
|
542
|
+
Expression[l.lexpr, @op, [l.rexpr, :+, r]].reduce_rec
|
543
|
+
# XXX (a >> 1) << 1 != a (lose low bit)
|
544
|
+
# XXX (a << 1) >> 1 != a (with real cpus, lose high bit)
|
545
|
+
# (a | b) << i
|
546
|
+
elsif r.kind_of? Integer and l.kind_of? Expression and [:&, :|, :^].include? l.op
|
547
|
+
Expression[[l.lexpr, @op, r], l.op, [l.rexpr, @op, r]].reduce_rec
|
548
|
+
end
|
549
|
+
elsif @op == :'!'
|
550
|
+
if r.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[r.op]
|
551
|
+
Expression[r.lexpr, op, r.rexpr].reduce_rec
|
552
|
+
end
|
553
|
+
elsif @op == :==
|
554
|
+
if l == r; 1
|
555
|
+
elsif r == 0 and l.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[l.op]
|
556
|
+
Expression[l.lexpr, op, l.rexpr].reduce_rec
|
557
|
+
elsif r == 1 and l.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[l.op]
|
558
|
+
l
|
559
|
+
elsif r == 0 and l.kind_of? Expression and l.op == :+
|
560
|
+
if l.rexpr.kind_of? Expression and l.rexpr.op == :- and not l.rexpr.lexpr
|
561
|
+
Expression[l.lexpr, @op, l.rexpr.rexpr].reduce_rec
|
562
|
+
elsif l.rexpr.kind_of? ::Integer
|
563
|
+
Expression[l.lexpr, @op, -l.rexpr].reduce_rec
|
564
|
+
end
|
565
|
+
end
|
566
|
+
elsif @op == :'!='
|
567
|
+
if l == r; 0
|
568
|
+
end
|
569
|
+
elsif @op == :^
|
570
|
+
if l == :unknown or r == :unknown; :unknown
|
571
|
+
elsif l == 0; r
|
572
|
+
elsif r == 0; l
|
573
|
+
elsif l == r; 0
|
574
|
+
elsif r == 1 and l.kind_of? Expression and [:'==', :'!=', :<, :>, :<=, :>=].include? l.op
|
575
|
+
Expression[nil, :'!', l].reduce_rec
|
576
|
+
elsif l.kind_of?(::Numeric)
|
577
|
+
if r.kind_of? Expression and r.op == :^
|
578
|
+
# 1^(x^y) => x^(y^1)
|
579
|
+
Expression[r.lexpr, :^, [r.rexpr, :^, l]].reduce_rec
|
580
|
+
else
|
581
|
+
# 1^a => a^1
|
582
|
+
Expression[r, :^, l].reduce_rec
|
583
|
+
end
|
584
|
+
elsif l.kind_of? Expression and l.op == :^
|
585
|
+
# (a^b)^c => a^(b^c)
|
586
|
+
Expression[l.lexpr, :^, [l.rexpr, :^, r]].reduce_rec
|
587
|
+
elsif r.kind_of? Expression and r.op == :^
|
588
|
+
if r.rexpr == l
|
589
|
+
# a^(a^b) => b
|
590
|
+
r.lexpr
|
591
|
+
elsif r.lexpr == l
|
592
|
+
# a^(b^a) => b
|
593
|
+
r.rexpr
|
594
|
+
else
|
595
|
+
# a^(b^(c^(a^d))) => b^(a^(c^(a^d)))
|
596
|
+
# XXX ugly..
|
597
|
+
tr = r
|
598
|
+
found = false
|
599
|
+
while not found and tr.kind_of?(Expression) and tr.op == :^
|
600
|
+
found = true if tr.lexpr == l or tr.rexpr == l
|
601
|
+
tr = tr.rexpr
|
602
|
+
end
|
603
|
+
if found
|
604
|
+
Expression[r.lexpr, :^, [l, :^, r.rexpr]].reduce_rec
|
605
|
+
end
|
606
|
+
end
|
607
|
+
elsif l.kind_of?(Expression) and l.op == :& and l.rexpr.kind_of?(::Integer) and (l.rexpr & (l.rexpr+1)) == 0
|
608
|
+
if r.kind_of?(::Integer) and r & l.rexpr == r
|
609
|
+
# (a&0xfff)^12 => (a^12)&0xfff
|
610
|
+
Expression[[l.lexpr, :^, r], :&, l.rexpr].reduce_rec
|
611
|
+
elsif r.kind_of?(Expression) and r.op == :& and r.rexpr.kind_of?(::Integer) and r.rexpr == l.rexpr
|
612
|
+
# (a&0xfff)^(b&0xfff) => (a^b)&0xfff
|
613
|
+
Expression[[l.lexpr, :^, r.lexpr], :&, l.rexpr].reduce_rec
|
614
|
+
end
|
615
|
+
end
|
616
|
+
elsif @op == :&
|
617
|
+
if l == 0 or r == 0; 0
|
618
|
+
elsif r == 1 and l.kind_of?(Expression) and [:'==', :'!=', :<, :>, :<=, :>=].include?(l.op)
|
619
|
+
l
|
620
|
+
elsif l == r; l
|
621
|
+
elsif l.kind_of?(Integer); Expression[r, @op, l].reduce_rec
|
622
|
+
elsif l.kind_of?(Expression) and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
|
623
|
+
elsif l.kind_of?(Expression) and [:|, :^].include?(l.op) and r.kind_of?(Integer) and (l.op == :| or (r & (r+1)) != 0)
|
624
|
+
# (a ^| b) & i => (a&i ^| b&i)
|
625
|
+
Expression[[l.lexpr, :&, r], l.op, [l.rexpr, :&, r]].reduce_rec
|
626
|
+
elsif r.kind_of?(::Integer) and l.kind_of?(Expression) and (r & (r+1)) == 0
|
627
|
+
# foo & 0xffff
|
628
|
+
reduce_rec_mod2(l, r)
|
629
|
+
end
|
630
|
+
elsif @op == :|
|
631
|
+
if l == 0; r
|
632
|
+
elsif r == 0; l
|
633
|
+
elsif l == -1 or r == -1; -1
|
634
|
+
elsif l == r; l
|
635
|
+
elsif l.kind_of? Integer; Expression[r, @op, l].reduce_rec
|
636
|
+
elsif l.kind_of? Expression and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
|
637
|
+
end
|
638
|
+
elsif @op == :*
|
639
|
+
if l == 0 or r == 0; 0
|
640
|
+
elsif l == 1; r
|
641
|
+
elsif r == 1; l
|
642
|
+
elsif r.kind_of? Integer; Expression[r, @op, l].reduce_rec
|
643
|
+
elsif r.kind_of? Expression and r.op == @op; Expression[[l, @op, r.lexpr], @op, r.rexpr].reduce_rec
|
644
|
+
elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :* and r.lexpr.kind_of? Integer; Expression[l*r.lexpr, :*, r.rexpr].reduce_rec # XXX need & regsize..
|
645
|
+
elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :+ and r.rexpr.kind_of? Integer; Expression[[l, :*, r.lexpr], :+, l*r.rexpr].reduce_rec
|
646
|
+
end
|
647
|
+
elsif @op == :/
|
648
|
+
if r == 0
|
649
|
+
elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :+ and l.rexpr.kind_of? Integer and l.rexpr % r == 0
|
650
|
+
Expression[[l.lexpr, :/, r], :+, l.rexpr/r].reduce_rec
|
651
|
+
elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :* and l.lexpr % r == 0
|
652
|
+
Expression[l.lexpr/r, :*, l.rexpr].reduce_rec
|
653
|
+
end
|
654
|
+
elsif @op == :-
|
655
|
+
if l == :unknown or r == :unknown; :unknown
|
656
|
+
elsif not l and r.kind_of? Expression and (r.op == :- or r.op == :+)
|
657
|
+
if r.op == :- # no lexpr (reduced)
|
658
|
+
# -(-x) => x
|
659
|
+
r.rexpr
|
660
|
+
else # :+ and lexpr (r is reduced)
|
661
|
+
# -(a+b) => (-a)+(-b)
|
662
|
+
Expression[[:-, r.lexpr], :+, [:-, r.rexpr]].reduce_rec
|
663
|
+
end
|
664
|
+
elsif l.kind_of? Expression and l.op == :+ and l.lexpr == r
|
665
|
+
# shortcircuit for a common occurence [citation needed]
|
666
|
+
# (a+b)-a
|
667
|
+
l.rexpr
|
668
|
+
elsif l
|
669
|
+
# a-b => a+(-b)
|
670
|
+
Expression[l, :+, [:-, r]].reduce_rec
|
671
|
+
end
|
672
|
+
elsif @op == :+
|
673
|
+
if l == :unknown or r == :unknown; :unknown
|
674
|
+
elsif not l; r # +x => x
|
675
|
+
elsif r == 0; l # x+0 => x
|
676
|
+
elsif l.kind_of?(::Numeric)
|
677
|
+
if r.kind_of? Expression and r.op == :+
|
678
|
+
# 1+(x+y) => x+(y+1)
|
679
|
+
Expression[r.lexpr, :+, [r.rexpr, :+, l]].reduce_rec
|
680
|
+
else
|
681
|
+
# 1+a => a+1
|
682
|
+
Expression[r, :+, l].reduce_rec
|
683
|
+
end
|
684
|
+
# (a+b)+foo => a+(b+foo)
|
685
|
+
elsif l.kind_of? Expression and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
|
686
|
+
elsif l.kind_of? Expression and r.kind_of? Expression and l.op == :% and r.op == :% and l.rexpr.kind_of?(::Integer) and l.rexpr == r.rexpr
|
687
|
+
Expression[[l.lexpr, :+, r.lexpr], :%, l.rexpr].reduce_rec
|
688
|
+
else
|
689
|
+
reduce_rec_add(l, r)
|
690
|
+
end
|
691
|
+
end
|
692
|
+
|
693
|
+
ret = case v
|
694
|
+
when nil
|
695
|
+
# no dup if no new value
|
696
|
+
(r == :unknown or l == :unknown) ? :unknown :
|
697
|
+
((r == @rexpr and l == @lexpr) ? self : Expression[l, @op, r])
|
698
|
+
when Expression
|
699
|
+
(v.lexpr == :unknown or v.rexpr == :unknown) ? :unknown : v
|
700
|
+
else v
|
701
|
+
end
|
702
|
+
if @@reduce_lambda and ret.kind_of? ExpressionType and newret = @@reduce_lambda[ret] and newret != ret
|
703
|
+
if newret.kind_of? ExpressionType
|
704
|
+
ret = newret.reduce_rec
|
705
|
+
else
|
706
|
+
ret = newret
|
707
|
+
end
|
708
|
+
end
|
709
|
+
ret
|
710
|
+
end
|
711
|
+
|
712
|
+
|
713
|
+
# a+(b+(c+(-a))) => b+c+0
|
714
|
+
# a+((-a)+(b+c)) => 0+b+c
|
715
|
+
def reduce_rec_add(l, r)
|
716
|
+
if l.kind_of? Expression and l.op == :- and not l.lexpr
|
717
|
+
neg_l = l.rexpr
|
718
|
+
else
|
719
|
+
neg_l = Expression[:-, l]
|
720
|
+
end
|
721
|
+
|
722
|
+
# recursive search & replace -lexpr by 0
|
723
|
+
simplifier = lambda { |cur|
|
724
|
+
if neg_l == cur
|
725
|
+
# -l found
|
726
|
+
0
|
727
|
+
elsif cur.kind_of? Expression and cur.op == :+
|
728
|
+
# recurse
|
729
|
+
if newl = simplifier[cur.lexpr]
|
730
|
+
Expression[newl, cur.op, cur.rexpr].reduce_rec
|
731
|
+
elsif newr = simplifier[cur.rexpr]
|
732
|
+
Expression[cur.lexpr, cur.op, newr].reduce_rec
|
733
|
+
end
|
734
|
+
end
|
735
|
+
}
|
736
|
+
|
737
|
+
simplifier[r]
|
738
|
+
end
|
739
|
+
|
740
|
+
# expr & 0xffff
|
741
|
+
def reduce_rec_mod2(e, mask)
|
742
|
+
case e.op
|
743
|
+
when :+, :^
|
744
|
+
if e.lexpr.kind_of?(Expression) and e.lexpr.op == :& and
|
745
|
+
e.lexpr.rexpr.kind_of?(::Integer) and e.lexpr.rexpr & mask == mask
|
746
|
+
# ((a&m) + b) & m => (a+b) & m
|
747
|
+
Expression[[e.lexpr.lexpr, e.op, e.rexpr], :&, mask].reduce_rec
|
748
|
+
elsif e.rexpr.kind_of?(Expression) and e.rexpr.op == :& and
|
749
|
+
e.rexpr.rexpr.kind_of?(::Integer) and e.rexpr.rexpr & mask == mask
|
750
|
+
# (a + (b&m)) & m => (a+b) & m
|
751
|
+
Expression[[e.lexpr, e.op, e.rexpr.lexpr], :&, mask].reduce_rec
|
752
|
+
else
|
753
|
+
Expression[e, :&, mask]
|
754
|
+
end
|
755
|
+
when :|
|
756
|
+
# rol/ror composition
|
757
|
+
reduce_rec_composerol e, mask
|
758
|
+
else
|
759
|
+
Expression[e, :&, mask]
|
760
|
+
end
|
761
|
+
end
|
762
|
+
|
763
|
+
# a check to see if an Expr is the composition of two rotations (rol eax, 4 ; rol eax, 6 => rol eax, 10)
|
764
|
+
# this is a bit too ugly to stay in the main reduce_rec body.
|
765
|
+
def reduce_rec_composerol(e, mask)
|
766
|
+
m = Expression[['var', :sh_op, 'amt'], :|, ['var', :inv_sh_op, 'inv_amt']]
|
767
|
+
if vars = e.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and vars[:sh_op] == {:>> => :<<, :<< => :>>}[vars[:inv_sh_op]] and
|
768
|
+
((vars['amt'].kind_of?(::Integer) and vars['inv_amt'].kind_of?(::Integer) and ampl = vars['amt'] + vars['inv_amt']) or
|
769
|
+
(vars['amt'].kind_of? Expression and vars['amt'].op == :% and vars['amt'].rexpr.kind_of? ::Integer and
|
770
|
+
vars['inv_amt'].kind_of? Expression and vars['inv_amt'].op == :% and vars['amt'].rexpr == vars['inv_amt'].rexpr and ampl = vars['amt'].rexpr)) and
|
771
|
+
mask == (1<<ampl)-1 and vars['var'].kind_of? Expression and # it's a rotation
|
772
|
+
|
773
|
+
vars['var'].op == :& and vars['var'].rexpr == mask and
|
774
|
+
ivars = vars['var'].lexpr.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and ivars[:sh_op] == {:>> => :<<, :<< => :>>}[ivars[:inv_sh_op]] and
|
775
|
+
((ivars['amt'].kind_of?(::Integer) and ivars['inv_amt'].kind_of?(::Integer) and ampl = ivars['amt'] + ivars['inv_amt']) or
|
776
|
+
(ivars['amt'].kind_of? Expression and ivars['amt'].op == :% and ivars['amt'].rexpr.kind_of? ::Integer and
|
777
|
+
ivars['inv_amt'].kind_of? Expression and ivars['inv_amt'].op == :% and ivars['amt'].rexpr == ivars['inv_amt'].rexpr and ampl = ivars['amt'].rexpr))
|
778
|
+
if ivars[:sh_op] != vars[:sh_op]
|
779
|
+
# ensure the rotations are the same orientation
|
780
|
+
ivars[:sh_op], ivars[:inv_sh_op] = ivars[:inv_sh_op], ivars[:sh_op]
|
781
|
+
ivars['amt'], ivars['inv_amt'] = ivars['inv_amt'], ivars['amt']
|
782
|
+
end
|
783
|
+
amt = Expression[[vars['amt'], :+, ivars['amt']], :%, ampl]
|
784
|
+
invamt = Expression[[vars['inv_amt'], :+, ivars['inv_amt']], :%, ampl]
|
785
|
+
Expression[[[[ivars['var'], :&, mask], vars[:sh_op], amt], :|, [[ivars['var'], :&, mask], vars[:inv_sh_op], invamt]], :&, mask].reduce_rec
|
786
|
+
else
|
787
|
+
Expression[e, :&, mask]
|
788
|
+
end
|
789
|
+
end
|
790
|
+
|
791
|
+
# a pattern-matching method
|
792
|
+
# Expression[42, :+, 28].match(Expression['any', :+, 28], 'any') => {'any' => 42}
|
793
|
+
# Expression[42, :+, 28].match(Expression['any', :+, 'any'], 'any') => false
|
794
|
+
# Expression[42, :+, 42].match(Expression['any', :+, 'any'], 'any') => {'any' => 42}
|
795
|
+
# vars can match anything except nil
|
796
|
+
def match(target, *vars)
|
797
|
+
match_rec(target, vars.inject({}) { |h, v| h.update v => nil })
|
798
|
+
end
|
799
|
+
|
800
|
+
def match_rec(target, vars)
|
801
|
+
return false if not target.kind_of? Expression
|
802
|
+
[target.lexpr, target.op, target.rexpr].zip([@lexpr, @op, @rexpr]) { |targ, exp|
|
803
|
+
if targ and vars[targ]
|
804
|
+
return false if exp != vars[targ]
|
805
|
+
elsif targ and vars.has_key? targ
|
806
|
+
return false if not vars[targ] = exp
|
807
|
+
elsif targ.kind_of? ExpressionType
|
808
|
+
return false if not exp.kind_of? ExpressionType or not exp.match_rec(targ, vars)
|
809
|
+
else
|
810
|
+
return false if targ != exp
|
811
|
+
end
|
812
|
+
}
|
813
|
+
vars
|
814
|
+
end
|
815
|
+
|
816
|
+
# returns the array of non-numeric members of the expression
|
817
|
+
# if a variables appears 3 times, it will be present 3 times in the returned array
|
818
|
+
def externals
|
819
|
+
[@rexpr, @lexpr].inject([]) { |a, e|
|
820
|
+
case e
|
821
|
+
when ExpressionType; a.concat e.externals
|
822
|
+
when nil, ::Numeric; a
|
823
|
+
else a << e
|
824
|
+
end
|
825
|
+
}
|
826
|
+
end
|
827
|
+
|
828
|
+
# returns the externals that appears in the expression, does not walk through other ExpressionType
|
829
|
+
def expr_externals
|
830
|
+
[@rexpr, @lexpr].inject([]) { |a, e|
|
831
|
+
case e
|
832
|
+
when Expression; a.concat e.expr_externals
|
833
|
+
when nil, ::Numeric, ExpressionType; a
|
834
|
+
else a << e
|
835
|
+
end
|
836
|
+
}
|
837
|
+
end
|
838
|
+
|
839
|
+
def inspect
|
840
|
+
"Expression[#{@lexpr.inspect.sub(/^Expression/, '') + ', ' if @lexpr}#{@op.inspect + ', ' if @lexpr or @op != :+}#{@rexpr.inspect.sub(/^Expression/, '')}]"
|
841
|
+
end
|
842
|
+
|
843
|
+
Unknown = self[:unknown]
|
844
|
+
end
|
845
|
+
|
846
|
+
# an EncodedData relocation, specifies a value to patch in
|
847
|
+
class Relocation
|
848
|
+
# the relocation value (an Expression)
|
849
|
+
attr_accessor :target
|
850
|
+
# the relocation expression type
|
851
|
+
attr_accessor :type
|
852
|
+
# the endianness of the relocation
|
853
|
+
attr_accessor :endianness
|
854
|
+
|
855
|
+
include Backtrace
|
856
|
+
|
857
|
+
def initialize(target, type, endianness, backtrace = nil)
|
858
|
+
raise ArgumentError, "bad args #{[target, type, endianness].inspect}" if not target.kind_of? Expression or not type.kind_of? ::Symbol or not endianness.kind_of? ::Symbol
|
859
|
+
@target, @type, @endianness, @backtrace = target, type, endianness, backtrace
|
860
|
+
end
|
861
|
+
|
862
|
+
# fixup the encodeddata with value (reloc starts at off)
|
863
|
+
def fixup(edata, off, value)
|
864
|
+
str = Expression.encode_imm(value, @type, @endianness, @backtrace)
|
865
|
+
edata.fill off
|
866
|
+
edata.data[off, str.length] = str
|
867
|
+
end
|
868
|
+
|
869
|
+
# size of the relocation field, in bytes
|
870
|
+
def length
|
871
|
+
Expression::INT_SIZE[@type]/8
|
872
|
+
end
|
873
|
+
end
|
874
|
+
|
875
|
+
# a String-like, with export/relocation informations added
|
876
|
+
class EncodedData
|
877
|
+
# string with raw data
|
878
|
+
attr_accessor :data
|
879
|
+
# hash, key = offset within data, value = +Relocation+
|
880
|
+
attr_accessor :reloc
|
881
|
+
# hash, key = export name, value = offset within data - use add_export to update
|
882
|
+
attr_accessor :export
|
883
|
+
# hash, key = offset, value = 1st export name
|
884
|
+
attr_accessor :inv_export
|
885
|
+
# virtual size of data (all 0 by default, see +fill+)
|
886
|
+
attr_accessor :virtsize
|
887
|
+
# arbitrary pointer, often used when decoding immediates
|
888
|
+
# may be initialized with an export value
|
889
|
+
attr_reader :ptr # custom writer
|
890
|
+
def ptr=(p) @ptr = @export[p] || p end
|
891
|
+
|
892
|
+
# opts' keys in :reloc, :export, :virtsize, defaults to empty/empty/data.length
|
893
|
+
def initialize(data = '', opts={})
|
894
|
+
@data = data
|
895
|
+
@reloc = opts[:reloc] || {}
|
896
|
+
@export = opts[:export] || {}
|
897
|
+
@inv_export = @export.invert
|
898
|
+
@virtsize = opts[:virtsize] || @data.length
|
899
|
+
@ptr = 0
|
900
|
+
end
|
901
|
+
|
902
|
+
def add_export(label, off=@ptr, set_inv=false)
|
903
|
+
@export[label] = off
|
904
|
+
if set_inv or not @inv_export[off]
|
905
|
+
@inv_export[off] = label
|
906
|
+
end
|
907
|
+
end
|
908
|
+
|
909
|
+
def del_export(label, off=@ptr)
|
910
|
+
@export.delete label
|
911
|
+
if e = @export.index(off)
|
912
|
+
@inv_export[off] = e
|
913
|
+
else
|
914
|
+
@inv_export.delete off
|
915
|
+
end
|
916
|
+
end
|
917
|
+
|
918
|
+
# returns the size of raw data, that is [data.length, last relocation end].max
|
919
|
+
def rawsize
|
920
|
+
[@data.length, *@reloc.map { |off, rel| off + rel.length } ].max
|
921
|
+
end
|
922
|
+
# String-like
|
923
|
+
alias length virtsize
|
924
|
+
# String-like
|
925
|
+
alias size virtsize
|
926
|
+
|
927
|
+
def empty?
|
928
|
+
@virtsize == 0
|
929
|
+
end
|
930
|
+
|
931
|
+
def eos?
|
932
|
+
ptr.to_i >= @virtsize
|
933
|
+
end
|
934
|
+
|
935
|
+
# returns a copy of itself, with reloc/export duped (but not deep)
|
936
|
+
def dup
|
937
|
+
self.class.new @data.dup, :reloc => @reloc.dup, :export => @export.dup, :virtsize => @virtsize
|
938
|
+
end
|
939
|
+
|
940
|
+
# resolve relocations:
|
941
|
+
# calculate each reloc target using Expression#bind(binding)
|
942
|
+
# if numeric, replace the raw data with the encoding of this value (+fill+s preceding data if needed) and remove the reloc
|
943
|
+
# if replace_target is true, the reloc target is replaced with its bound counterpart
|
944
|
+
def fixup_choice(binding, replace_target)
|
945
|
+
@reloc.keys.each { |off|
|
946
|
+
val = @reloc[off].target.bind(binding).reduce
|
947
|
+
if val.kind_of? Integer
|
948
|
+
reloc = @reloc[off]
|
949
|
+
reloc.fixup(self, off, val)
|
950
|
+
@reloc.delete(off) # delete only if not overflowed
|
951
|
+
elsif replace_target
|
952
|
+
@reloc[off].target = val
|
953
|
+
end
|
954
|
+
}
|
955
|
+
end
|
956
|
+
|
957
|
+
# +fixup_choice+ binding, false
|
958
|
+
def fixup(binding)
|
959
|
+
fixup_choice(binding, false)
|
960
|
+
end
|
961
|
+
|
962
|
+
# +fixup_choice+ binding, true
|
963
|
+
def fixup!(binding)
|
964
|
+
fixup_choice(binding, true)
|
965
|
+
end
|
966
|
+
|
967
|
+
# returns a default binding suitable for use in +fixup+
|
968
|
+
# every export is expressed as base + offset
|
969
|
+
# base defaults to the first export name + its offset
|
970
|
+
def binding(base = nil)
|
971
|
+
if not base
|
972
|
+
key = @export.index(@export.values.min)
|
973
|
+
return {} if not key
|
974
|
+
base = (@export[key] == 0 ? key : Expression[key, :-, @export[key]])
|
975
|
+
end
|
976
|
+
@export.inject({}) { |binding, (n, o)| binding.update n => Expression.new(:+, o, base) }
|
977
|
+
end
|
978
|
+
|
979
|
+
# returns an array of variables that needs to be defined for a complete #fixup
|
980
|
+
# ie the list of externals for all relocations
|
981
|
+
def reloc_externals
|
982
|
+
@reloc.values.map { |r| r.target.externals }.flatten.uniq - @export.keys
|
983
|
+
end
|
984
|
+
|
985
|
+
# returns the offset where the relocation for target t is to be applied
|
986
|
+
def offset_of_reloc(t)
|
987
|
+
t = Expression[t]
|
988
|
+
@reloc.keys.find { |off| @reloc[off].target == t }
|
989
|
+
end
|
990
|
+
|
991
|
+
# fill virtual space by repeating pattern (String) up to len
|
992
|
+
# expand self if len is larger than self.virtsize
|
993
|
+
def fill(len = @virtsize, pattern = [0].pack('C'))
|
994
|
+
@virtsize = len if len > @virtsize
|
995
|
+
@data = @data.to_str.ljust(len, pattern) if len > @data.length
|
996
|
+
end
|
997
|
+
|
998
|
+
# rounds up virtsize to next multiple of len
|
999
|
+
def align(len, pattern=nil)
|
1000
|
+
@virtsize = EncodedData.align_size(@virtsize, len)
|
1001
|
+
fill(@virtsize, pattern) if pattern
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
# returns the value val rounded up to next multiple of len
|
1005
|
+
def self.align_size(val, len)
|
1006
|
+
return val if len == 0
|
1007
|
+
((val + len - 1) / len).to_i * len
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
# concatenation of another +EncodedData+ (or nil/Fixnum/anything supporting String#<<)
|
1011
|
+
def << other
|
1012
|
+
case other
|
1013
|
+
when nil
|
1014
|
+
when ::Fixnum
|
1015
|
+
fill
|
1016
|
+
@data = @data.to_str if not @data.kind_of? String
|
1017
|
+
@data << other
|
1018
|
+
@virtsize += 1
|
1019
|
+
when EncodedData
|
1020
|
+
fill if not other.data.empty?
|
1021
|
+
other.reloc.each { |k, v| @reloc[k + @virtsize] = v } if not other.reloc.empty?
|
1022
|
+
if not other.export.empty?
|
1023
|
+
other.export.each { |k, v|
|
1024
|
+
if @export[k] and @export[k] != v + @virtsize
|
1025
|
+
cf = (other.export.keys & @export.keys).find_all { |k_| other.export[k_] != @export[k_] - @virtsize }
|
1026
|
+
raise "edata merge: label conflict #{cf.inspect}"
|
1027
|
+
end
|
1028
|
+
@export[k] = v + @virtsize
|
1029
|
+
}
|
1030
|
+
other.inv_export.each { |k, v| @inv_export[@virtsize + k] = v }
|
1031
|
+
end
|
1032
|
+
if @data.empty?; @data = other.data.dup
|
1033
|
+
elsif not @data.kind_of?(String); @data = @data.to_str << other.data
|
1034
|
+
else @data << other.data
|
1035
|
+
end
|
1036
|
+
@virtsize += other.virtsize
|
1037
|
+
else
|
1038
|
+
fill
|
1039
|
+
if @data.empty?; @data = other.dup
|
1040
|
+
elsif not @data.kind_of?(String); @data = @data.to_str << other
|
1041
|
+
else @data << other
|
1042
|
+
end
|
1043
|
+
@virtsize += other.length
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
self
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
# equivalent to dup << other, filters out Integers & nil
|
1050
|
+
def + other
|
1051
|
+
raise ArgumentError if not other or other.kind_of?(Integer)
|
1052
|
+
dup << other
|
1053
|
+
end
|
1054
|
+
|
1055
|
+
# slice
|
1056
|
+
def [](from, len=nil)
|
1057
|
+
if not len and from.kind_of? Range
|
1058
|
+
b = from.begin
|
1059
|
+
e = from.end
|
1060
|
+
b = @export[b] if @export[b]
|
1061
|
+
e = @export[e] if @export[e]
|
1062
|
+
b = b + @virtsize if b < 0
|
1063
|
+
e = e + @virtsize if e < 0
|
1064
|
+
len = e - b
|
1065
|
+
len += 1 if not from.exclude_end?
|
1066
|
+
from = b
|
1067
|
+
end
|
1068
|
+
from = @export[from] if @export[from]
|
1069
|
+
from = from + @virtsize if from < 0
|
1070
|
+
return if from > @virtsize or from < 0
|
1071
|
+
|
1072
|
+
return @data[from] if not len
|
1073
|
+
len = @virtsize - from if from+len > @virtsize
|
1074
|
+
ret = EncodedData.new @data[from, len]
|
1075
|
+
ret.virtsize = len
|
1076
|
+
@reloc.each { |o, r|
|
1077
|
+
ret.reloc[o - from] = r if o >= from and o + r.length <= from+len
|
1078
|
+
}
|
1079
|
+
@export.each { |e_, o|
|
1080
|
+
ret.export[e_] = o - from if o >= from and o <= from+len # XXX include end ?
|
1081
|
+
}
|
1082
|
+
@inv_export.each { |o, e_|
|
1083
|
+
ret.inv_export[o-from] = e_ if o >= from and o <= from+len
|
1084
|
+
}
|
1085
|
+
ret
|
1086
|
+
end
|
1087
|
+
|
1088
|
+
# slice replacement, supports size change (shifts following relocs/exports)
|
1089
|
+
# discards old exports/relocs from the overwritten space
|
1090
|
+
def []=(from, len, val=nil)
|
1091
|
+
if not val
|
1092
|
+
val = len
|
1093
|
+
len = nil
|
1094
|
+
end
|
1095
|
+
if not len and from.kind_of? ::Range
|
1096
|
+
b = from.begin
|
1097
|
+
e = from.end
|
1098
|
+
b = @export[b] if @export[b]
|
1099
|
+
e = @export[e] if @export[e]
|
1100
|
+
b = b + @virtsize if b < 0
|
1101
|
+
e = e + @virtsize if e < 0
|
1102
|
+
len = e - b
|
1103
|
+
len += 1 if not from.exclude_end?
|
1104
|
+
from = b
|
1105
|
+
end
|
1106
|
+
from = @export[from] || from
|
1107
|
+
raise "invalid offset #{from}" if not from.kind_of? ::Integer
|
1108
|
+
from = from + @virtsize if from < 0
|
1109
|
+
|
1110
|
+
if not len
|
1111
|
+
val = val.chr if val.kind_of? ::Integer
|
1112
|
+
len = val.length
|
1113
|
+
end
|
1114
|
+
raise "invalid slice length #{len}" if not len.kind_of? ::Integer or len < 0
|
1115
|
+
|
1116
|
+
if from >= @virtsize
|
1117
|
+
len = 0
|
1118
|
+
elsif from+len > @virtsize
|
1119
|
+
len = @virtsize-from
|
1120
|
+
end
|
1121
|
+
|
1122
|
+
val = EncodedData.new << val
|
1123
|
+
|
1124
|
+
# remove overwritten metadata
|
1125
|
+
@export.delete_if { |name, off| off > from and off < from + len }
|
1126
|
+
@reloc.delete_if { |off, rel| off - rel.length > from and off < from + len }
|
1127
|
+
# shrink/grow
|
1128
|
+
if val.length != len
|
1129
|
+
diff = val.length - len
|
1130
|
+
@export.keys.each { |name| @export[name] = @export[name] + diff if @export[name] > from }
|
1131
|
+
@inv_export.keys.each { |off| @inv_export[off+diff] = @inv_export.delete(off) if off > from }
|
1132
|
+
@reloc.keys.each { |off| @reloc[off + diff] = @reloc.delete(off) if off > from }
|
1133
|
+
if @virtsize >= from+len
|
1134
|
+
@virtsize += diff
|
1135
|
+
end
|
1136
|
+
end
|
1137
|
+
|
1138
|
+
@virtsize = from + val.length if @virtsize < from + val.length
|
1139
|
+
|
1140
|
+
if from + len < @data.length # patch real data
|
1141
|
+
val.fill
|
1142
|
+
@data[from, len] = val.data
|
1143
|
+
elsif not val.data.empty? # patch end of real data
|
1144
|
+
@data << ([0].pack('C')*(from-@data.length)) if @data.length < from
|
1145
|
+
@data[from..-1] = val.data
|
1146
|
+
else # patch end of real data with fully virtual
|
1147
|
+
@data = @data[0, from]
|
1148
|
+
end
|
1149
|
+
val.export.each { |name, off| @export[name] = from + off }
|
1150
|
+
val.inv_export.each { |off, name| @inv_export[from+off] = name }
|
1151
|
+
val.reloc.each { |off, rel| @reloc[from + off] = rel }
|
1152
|
+
end
|
1153
|
+
|
1154
|
+
# replace a portion of self
|
1155
|
+
# from/to may be Integers (offsets) or labels (from self.export)
|
1156
|
+
# content is a String or an EncodedData, which will be inserted in the specified location (padded if necessary)
|
1157
|
+
# raise if the string does not fit in.
|
1158
|
+
def patch(from, to, content)
|
1159
|
+
from = @export[from] || from
|
1160
|
+
raise "invalid offset specification #{from}" if not from.kind_of? Integer
|
1161
|
+
to = @export[to] || to
|
1162
|
+
raise "invalid offset specification #{to}" if not to.kind_of? Integer
|
1163
|
+
raise EncodeError, 'cannot patch data: new content too long' if to - from < content.length
|
1164
|
+
self[from, content.length] = content
|
1165
|
+
end
|
1166
|
+
|
1167
|
+
# returns a list of offsets where /pat/ can be found inside @data
|
1168
|
+
# scan is done per chunk of chunksz bytes, with a margin for chunk-overlapping patterns
|
1169
|
+
# yields each offset found, and only include it in the result if the block returns !false
|
1170
|
+
def pattern_scan(pat, chunksz=nil, margin=nil)
|
1171
|
+
chunksz ||= 4*1024*1024 # scan 4MB at a time
|
1172
|
+
margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
|
1173
|
+
pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String
|
1174
|
+
|
1175
|
+
found = []
|
1176
|
+
chunkoff = 0
|
1177
|
+
while chunkoff < @data.length
|
1178
|
+
chunk = @data[chunkoff, chunksz+margin].to_str
|
1179
|
+
off = 0
|
1180
|
+
while match_off = (chunk[off..-1] =~ pat)
|
1181
|
+
break if off+match_off >= chunksz # match fully in margin
|
1182
|
+
match_addr = chunkoff + off + match_off
|
1183
|
+
found << match_addr if not block_given? or yield(match_addr)
|
1184
|
+
off += match_off + 1
|
1185
|
+
# XXX +1 or +lastmatch.length ?
|
1186
|
+
# 'aaaabc'.pattern_scan(/a*bc/) will match 5 times here
|
1187
|
+
end
|
1188
|
+
chunkoff += chunksz
|
1189
|
+
end
|
1190
|
+
found
|
1191
|
+
end
|
1192
|
+
end
|
1193
|
+
end
|