metasm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
require 'metasm/ia32/opcodes'
|
|
8
|
+
require 'metasm/ia32/encode'
|
|
9
|
+
require 'metasm/parse'
|
|
10
|
+
|
|
11
|
+
module Metasm
|
|
12
|
+
class Ia32
|
|
13
|
+
class ModRM
|
|
14
|
+
# may return a SegReg
|
|
15
|
+
# must be called before SegReg parser (which could match only the seg part of a modrm)
|
|
16
|
+
def self.parse(lexer, otok, cpu)
|
|
17
|
+
tok = otok
|
|
18
|
+
|
|
19
|
+
# read operand size specifier
|
|
20
|
+
if tok and tok.type == :string and tok.raw =~ /^(?:byte|[dqo]?word|_(\d+)bits)$/
|
|
21
|
+
ptsz =
|
|
22
|
+
if $1
|
|
23
|
+
$1.to_i
|
|
24
|
+
else
|
|
25
|
+
case tok.raw
|
|
26
|
+
when 'byte'; 8
|
|
27
|
+
when 'word'; 16
|
|
28
|
+
when 'dword'; 32
|
|
29
|
+
when 'qword'; 64
|
|
30
|
+
when 'oword'; 128
|
|
31
|
+
else raise otok, 'mrm: bad ptr size'
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
lexer.skip_space
|
|
35
|
+
if tok = lexer.readtok and tok.type == :string and tok.raw == 'ptr'
|
|
36
|
+
lexer.skip_space
|
|
37
|
+
tok = lexer.readtok
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# read segment selector
|
|
42
|
+
if tok and tok.type == :string and seg = SegReg.s_to_i[tok.raw]
|
|
43
|
+
lexer.skip_space
|
|
44
|
+
seg = SegReg.new(seg)
|
|
45
|
+
if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':'
|
|
46
|
+
raise otok, 'invalid modrm' if ptsz
|
|
47
|
+
lexer.unreadtok ntok
|
|
48
|
+
return seg
|
|
49
|
+
end
|
|
50
|
+
lexer.skip_space
|
|
51
|
+
tok = lexer.readtok
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# ensure we have a modrm
|
|
55
|
+
if not tok or tok.type != :punct or tok.raw != '['
|
|
56
|
+
raise otok, 'invalid modrm' if ptsz or seg
|
|
57
|
+
return
|
|
58
|
+
end
|
|
59
|
+
lexer.skip_space_eol
|
|
60
|
+
|
|
61
|
+
# support fasm syntax [fs:eax] for segment selector
|
|
62
|
+
if tok = lexer.readtok and tok.type == :string and not seg and seg = SegReg.s_to_i[tok.raw]
|
|
63
|
+
raise otok, 'invalid modrm' if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':'
|
|
64
|
+
seg = SegReg.new(seg)
|
|
65
|
+
lexer.skip_space_eol
|
|
66
|
+
else
|
|
67
|
+
lexer.unreadtok tok
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# read modrm content as generic expression
|
|
71
|
+
content = Expression.parse(lexer)
|
|
72
|
+
lexer.skip_space_eol
|
|
73
|
+
raise(otok, 'bad modrm') if not content or not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ']'
|
|
74
|
+
|
|
75
|
+
# converts matching externals to Regs in an expression
|
|
76
|
+
regify = lambda { |o|
|
|
77
|
+
case o
|
|
78
|
+
when Expression
|
|
79
|
+
o.lexpr = regify[o.lexpr]
|
|
80
|
+
o.rexpr = regify[o.rexpr]
|
|
81
|
+
o
|
|
82
|
+
when String
|
|
83
|
+
cpu.str_to_reg(o) || o
|
|
84
|
+
else o
|
|
85
|
+
end
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
s = i = b = imm = nil
|
|
89
|
+
|
|
90
|
+
# assigns the Regs in the expression to base or index field of the modrm
|
|
91
|
+
walker = lambda { |o|
|
|
92
|
+
case o
|
|
93
|
+
when nil
|
|
94
|
+
when Reg
|
|
95
|
+
if b
|
|
96
|
+
raise otok, 'mrm: too many regs' if i
|
|
97
|
+
i = o
|
|
98
|
+
s = 1
|
|
99
|
+
else
|
|
100
|
+
b = o
|
|
101
|
+
end
|
|
102
|
+
when Expression
|
|
103
|
+
if o.op == :* and (o.rexpr.kind_of? Reg or o.lexpr.kind_of? Reg)
|
|
104
|
+
# scaled index
|
|
105
|
+
raise otok, 'mrm: too many indexes' if i
|
|
106
|
+
s = o.lexpr
|
|
107
|
+
i = o.rexpr
|
|
108
|
+
s, i = i, s if s.kind_of? Reg
|
|
109
|
+
raise otok, 'mrm: bad scale' unless s.kind_of? Integer
|
|
110
|
+
elsif o.op == :+
|
|
111
|
+
# recurse
|
|
112
|
+
walker[o.lexpr]
|
|
113
|
+
walker[o.rexpr]
|
|
114
|
+
else
|
|
115
|
+
# found (a part of) the immediate
|
|
116
|
+
imm = Expression[imm, :+, o]
|
|
117
|
+
end
|
|
118
|
+
else
|
|
119
|
+
# found (a part of) the immediate
|
|
120
|
+
imm = Expression[imm, :+, o]
|
|
121
|
+
end
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# do it
|
|
125
|
+
walker[regify[content.reduce]]
|
|
126
|
+
|
|
127
|
+
# ensure found immediate is really an immediate
|
|
128
|
+
raise otok, 'mrm: reg in imm' if imm.kind_of? Expression and not imm.externals.grep(Reg).empty?
|
|
129
|
+
|
|
130
|
+
# find default address size
|
|
131
|
+
adsz = b ? b.sz : i ? i.sz : nil
|
|
132
|
+
# ptsz may be nil now, will be fixed up later (in parse_instr_fixup) to match another instruction argument's size
|
|
133
|
+
new adsz, ptsz, s, i, b, imm, seg
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# handles cpu-specific parser instruction, falls back to Ancestor's version if unknown keyword
|
|
139
|
+
# XXX changing the cpu size in the middle of the code may have baaad effects...
|
|
140
|
+
def parse_parser_instruction(lexer, instr)
|
|
141
|
+
case instr.raw.downcase
|
|
142
|
+
when '.mode', '.bits'
|
|
143
|
+
lexer.skip_space
|
|
144
|
+
if tok = lexer.readtok and tok.type == :string and (tok.raw == '16' or tok.raw == '32')
|
|
145
|
+
@size = tok.raw.to_i
|
|
146
|
+
lexer.skip_space
|
|
147
|
+
raise instr, 'syntax error' if ntok = lexer.nexttok and ntok.type != :eol
|
|
148
|
+
else
|
|
149
|
+
raise instr, 'invalid cpu mode'
|
|
150
|
+
end
|
|
151
|
+
else super(lexer, instr)
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def parse_prefix(i, pfx)
|
|
156
|
+
# XXX check for redefinition ?
|
|
157
|
+
# implicit 'true' return value when assignment occur
|
|
158
|
+
i.prefix ||= {}
|
|
159
|
+
case pfx
|
|
160
|
+
when 'lock'; i.prefix[:lock] = true
|
|
161
|
+
when 'rep'; i.prefix[:rep] = 'rep'
|
|
162
|
+
when 'repe', 'repz'; i.prefix[:rep] = 'repz'
|
|
163
|
+
when 'repne', 'repnz'; i.prefix[:rep] = 'repnz'
|
|
164
|
+
when 'code16'; i.prefix[:sz] = 16
|
|
165
|
+
when 'code32'; i.prefix[:sz] = 32
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def parse_argregclasslist
|
|
170
|
+
[Reg, SimdReg, SegReg, DbgReg, CtrlReg, FpReg]
|
|
171
|
+
end
|
|
172
|
+
def parse_modrm(lex, tok, cpu)
|
|
173
|
+
ModRM.parse(lex, tok, cpu)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# parses an arbitrary ia32 instruction argument
|
|
177
|
+
def parse_argument(lexer)
|
|
178
|
+
lexer = AsmPreprocessor.new(lexer) if lexer.kind_of? String
|
|
179
|
+
|
|
180
|
+
# reserved names (registers/segments etc)
|
|
181
|
+
@args_token ||= parse_argregclasslist.map { |a| a.s_to_i.keys }.flatten.inject({}) { |h, e| h.update e => true }
|
|
182
|
+
|
|
183
|
+
lexer.skip_space
|
|
184
|
+
return if not tok = lexer.readtok
|
|
185
|
+
|
|
186
|
+
if tok.type == :string and tok.raw == 'ST'
|
|
187
|
+
lexer.skip_space
|
|
188
|
+
if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == '('
|
|
189
|
+
lexer.skip_space
|
|
190
|
+
if not nntok = lexer.readtok or nntok.type != :string or nntok.raw !~ /^[0-9]$/ or
|
|
191
|
+
not ntok = (lexer.skip_space; lexer.readtok) or ntok.type != :punct or ntok.raw != ')'
|
|
192
|
+
raise tok, 'invalid FP register'
|
|
193
|
+
else
|
|
194
|
+
tok.raw << '(' << nntok.raw << ')'
|
|
195
|
+
fpr = parse_argregclasslist.last
|
|
196
|
+
if fpr.s_to_i.has_key? tok.raw
|
|
197
|
+
return fpr.new(fpr.s_to_i[tok.raw])
|
|
198
|
+
else
|
|
199
|
+
raise tok, 'invalid FP register'
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
else
|
|
203
|
+
lexer.unreadtok ntok
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
if ret = parse_modrm(lexer, tok, self)
|
|
208
|
+
ret
|
|
209
|
+
elsif @args_token[tok.raw]
|
|
210
|
+
parse_argregclasslist.each { |a|
|
|
211
|
+
return a.from_str(tok.raw) if a.s_to_i.has_key? tok.raw
|
|
212
|
+
}
|
|
213
|
+
raise tok, 'internal error'
|
|
214
|
+
else
|
|
215
|
+
lexer.unreadtok tok
|
|
216
|
+
expr = Expression.parse(lexer)
|
|
217
|
+
lexer.skip_space
|
|
218
|
+
|
|
219
|
+
# may be a farptr
|
|
220
|
+
if expr and ntok = lexer.readtok and ntok.type == :punct and ntok.raw == ':'
|
|
221
|
+
raise tok, 'invalid farptr' if not addr = Expression.parse(lexer)
|
|
222
|
+
Farptr.new expr, addr
|
|
223
|
+
else
|
|
224
|
+
lexer.unreadtok ntok
|
|
225
|
+
Expression[expr.reduce] if expr
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# check if the argument matches the opcode's argument spec
|
|
231
|
+
def parse_arg_valid?(o, spec, arg)
|
|
232
|
+
if o.name == 'movsx' or o.name == 'movzx'
|
|
233
|
+
if not arg.kind_of? Reg and not arg.kind_of? ModRM
|
|
234
|
+
return
|
|
235
|
+
elsif not arg.sz
|
|
236
|
+
puts "ambiguous arg size for indirection in #{o.name}" if $VERBOSE
|
|
237
|
+
return
|
|
238
|
+
elsif spec == :reg # reg=dst, modrm=src (smaller)
|
|
239
|
+
return (arg.kind_of? Reg and arg.sz >= 16)
|
|
240
|
+
elsif o.props[:argsz]
|
|
241
|
+
return arg.sz == o.props[:argsz]
|
|
242
|
+
else
|
|
243
|
+
return arg.sz <= 16
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
return false if arg.kind_of? ModRM and arg.adsz and o.props[:adsz] and arg.adsz != o.props[:adsz]
|
|
248
|
+
|
|
249
|
+
cond = true
|
|
250
|
+
if s = o.props[:argsz] and (arg.kind_of? Reg or arg.kind_of? ModRM)
|
|
251
|
+
cond = (!arg.sz or arg.sz == s or spec == :reg_dx)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
cond and
|
|
255
|
+
case spec
|
|
256
|
+
when :reg; arg.kind_of? Reg and (arg.sz >= 16 or o.props[:argsz])
|
|
257
|
+
when :modrm; (arg.kind_of? ModRM or arg.kind_of? Reg) and (!arg.sz or arg.sz >= 16 or o.props[:argsz])
|
|
258
|
+
when :i; arg.kind_of? Expression
|
|
259
|
+
when :imm_val1; arg.kind_of? Expression and arg.reduce == 1
|
|
260
|
+
when :imm_val3; arg.kind_of? Expression and arg.reduce == 3
|
|
261
|
+
when :reg_eax; arg.kind_of? Reg and arg.val == 0
|
|
262
|
+
when :reg_cl; arg.kind_of? Reg and arg.val == 1 and arg.sz == 8
|
|
263
|
+
when :reg_dx; arg.kind_of? Reg and arg.val == 2 and arg.sz == 16
|
|
264
|
+
when :seg3; arg.kind_of? SegReg
|
|
265
|
+
when :seg3A; arg.kind_of? SegReg and arg.val > 3
|
|
266
|
+
when :seg2; arg.kind_of? SegReg and arg.val < 4
|
|
267
|
+
when :seg2A; arg.kind_of? SegReg and arg.val < 4 and arg.val != 1
|
|
268
|
+
when :eeec; arg.kind_of? CtrlReg
|
|
269
|
+
when :eeed; arg.kind_of? DbgReg
|
|
270
|
+
when :modrmA; arg.kind_of? ModRM
|
|
271
|
+
when :mrm_imm; arg.kind_of? ModRM and not arg.s and not arg.i and not arg.b
|
|
272
|
+
when :farptr; arg.kind_of? Farptr
|
|
273
|
+
when :regfp; arg.kind_of? FpReg
|
|
274
|
+
when :regfp0; arg.kind_of? FpReg and (arg.val == nil or arg.val == 0)
|
|
275
|
+
when :modrmmmx; arg.kind_of? ModRM or (arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx])))
|
|
276
|
+
when :regmmx; arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx]))
|
|
277
|
+
when :modrmxmm; arg.kind_of? ModRM or (arg.kind_of? SimdReg and arg.sz == 128)
|
|
278
|
+
when :regxmm; arg.kind_of? SimdReg and arg.sz == 128
|
|
279
|
+
when :i8, :u8, :u16
|
|
280
|
+
arg.kind_of? Expression and
|
|
281
|
+
(o.props[:setip] or Expression.in_range?(arg, spec) != false) # true or nil allowed
|
|
282
|
+
# jz 0x28282828 may fit in :i8 depending on instr addr
|
|
283
|
+
else raise EncodeError, "Internal error: unknown argument specification #{spec.inspect}"
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def parse_instruction_checkproto(i)
|
|
288
|
+
case i.opname
|
|
289
|
+
when 'imul'
|
|
290
|
+
if i.args.length == 2 and i.args.first.kind_of? Reg and i.args.last.kind_of? Expression
|
|
291
|
+
i.args.unshift i.args.first.dup
|
|
292
|
+
end
|
|
293
|
+
end
|
|
294
|
+
super(i)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# fixup the sz of a modrm argument, defaults to other argument size or current cpu mode
|
|
298
|
+
def parse_instruction_fixup(i)
|
|
299
|
+
if m = i.args.grep(ModRM).first and not m.sz
|
|
300
|
+
if i.opname == 'movzx' or i.opname == 'movsx'
|
|
301
|
+
m.sz = 8
|
|
302
|
+
else
|
|
303
|
+
if r = i.args.grep(Reg).first
|
|
304
|
+
m.sz = r.sz
|
|
305
|
+
elsif opcode_list_byname[i.opname].all? { |o| o.props[:argsz] }
|
|
306
|
+
m.sz = opcode_list_byname[i.opname].first.props[:argsz]
|
|
307
|
+
else
|
|
308
|
+
# this is also the size of ctrlreg/dbgreg etc
|
|
309
|
+
# XXX fpu/simd ?
|
|
310
|
+
m.sz = i.prefix[:sz] || @size
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
if m and not m.adsz
|
|
315
|
+
if opcode_list_byname[i.opname].all? { |o| o.props[:adsz] }
|
|
316
|
+
m.adsz = opcode_list_byname[i.opname].first.props[:adsz]
|
|
317
|
+
else
|
|
318
|
+
m.adsz = i.prefix[:sz] || @size
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def instr_uncond_jump_to(target)
|
|
324
|
+
parse_instruction("jmp #{target}")
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
require 'metasm/ia32/opcodes'
|
|
8
|
+
require 'metasm/render'
|
|
9
|
+
|
|
10
|
+
# XXX move context in another file ?
|
|
11
|
+
module Metasm
|
|
12
|
+
class Ia32
|
|
13
|
+
class Argument
|
|
14
|
+
include Renderable
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
[SegReg, DbgReg, CtrlReg, FpReg].each { |c| c.class_eval {
|
|
18
|
+
def render ; [self.class.i_to_s[@val]] end
|
|
19
|
+
} }
|
|
20
|
+
[Reg, SimdReg].each { |c| c.class_eval {
|
|
21
|
+
def render ; [self.class.i_to_s[@sz][@val]] end
|
|
22
|
+
def context ; {'set sz' => lambda { |s| @sz = s }} end
|
|
23
|
+
} }
|
|
24
|
+
|
|
25
|
+
class Farptr
|
|
26
|
+
def render
|
|
27
|
+
[@seg, ':', @addr]
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
class ModRM
|
|
32
|
+
def qualifier(sz)
|
|
33
|
+
{
|
|
34
|
+
8 => 'byte',
|
|
35
|
+
16 => 'word',
|
|
36
|
+
32 => 'dword',
|
|
37
|
+
64 => 'qword',
|
|
38
|
+
128 => 'oword'
|
|
39
|
+
}.fetch(sz) { |k| "_#{sz}bits" }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
attr_accessor :instruction
|
|
43
|
+
def render
|
|
44
|
+
r = []
|
|
45
|
+
r << ( qualifier(@sz) << ' ptr ' ) if @sz and (not instruction or not @instruction.args.find { |a| a.kind_of? Reg and a.sz == @sz })
|
|
46
|
+
r << @seg << ':' if seg
|
|
47
|
+
|
|
48
|
+
e = nil
|
|
49
|
+
e = Expression[e, :+, @b] if b
|
|
50
|
+
e = Expression[e, :+, @imm] if imm
|
|
51
|
+
e = Expression[e, :+, (@s == 1 ? @i : [@s, :*, @i])] if s
|
|
52
|
+
r << '[' << e << ']'
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def context
|
|
56
|
+
{'set targetsz' => lambda { |s| @sz = s },
|
|
57
|
+
'set seg' => lambda { |s| @seg = Seg.new s }}
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def render_instruction(i)
|
|
62
|
+
r = []
|
|
63
|
+
r << 'lock ' if i.prefix and i.prefix[:lock]
|
|
64
|
+
r << i.prefix[:rep] << ' ' if i.prefix and i.prefix[:rep]
|
|
65
|
+
r << i.opname
|
|
66
|
+
i.args.each { |a|
|
|
67
|
+
a.instruction = i if a.kind_of? ModRM
|
|
68
|
+
r << (r.last == i.opname ? ' ' : ', ') << a
|
|
69
|
+
}
|
|
70
|
+
r
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def instruction_context(i)
|
|
74
|
+
# XXX
|
|
75
|
+
h = {}
|
|
76
|
+
op = opcode_list_byname[i.opname].first
|
|
77
|
+
if i.prefix and i.prefix[:rep]
|
|
78
|
+
h['toogle repz'] = lambda { i.prefix[:rep] = {'repnz' => 'repz', 'repz' => 'repnz'}[i.prefix[:rep]] } if op.props[:stropz]
|
|
79
|
+
h['rm rep'] = lambda { i.prefix.delete :rep }
|
|
80
|
+
else
|
|
81
|
+
h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'rep' } if op.props[:strop]
|
|
82
|
+
h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'repz' } if op.props[:stropz]
|
|
83
|
+
end
|
|
84
|
+
if i.args.find { |a| a.kind_of? ModRM and a.seg }
|
|
85
|
+
h['rm seg'] = lambda { i.args.find { |a| a.kind_of? ModRM and a.seg }.seg = nil }
|
|
86
|
+
end
|
|
87
|
+
h['toggle lock'] = lambda { (i.prefix ||= {})[:lock] = !i.prefix[:lock] }
|
|
88
|
+
h
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
data/lib/metasm/main.rb
ADDED
|
@@ -0,0 +1,1193 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
module Metasm
|
|
8
|
+
|
|
9
|
+
VERSION = 0x0001 # major major minor minor
|
|
10
|
+
|
|
11
|
+
# superclass for all metasm exceptions
|
|
12
|
+
class Exception < RuntimeError ; end
|
|
13
|
+
# parse error
|
|
14
|
+
class ParseError < Exception ; end
|
|
15
|
+
# invalid exeformat signature
|
|
16
|
+
class InvalidExeFormat < Exception ; end
|
|
17
|
+
# cannot honor .offset specification, reloc fixup overflow
|
|
18
|
+
class EncodeError < Exception ; end
|
|
19
|
+
|
|
20
|
+
# holds context of a processor
|
|
21
|
+
# endianness, current mode, opcode list...
|
|
22
|
+
class CPU
|
|
23
|
+
attr_accessor :valid_args, :valid_props, :fields_mask
|
|
24
|
+
attr_accessor :endianness, :size
|
|
25
|
+
attr_accessor :generate_PIC
|
|
26
|
+
|
|
27
|
+
def opcode_list
|
|
28
|
+
@opcode_list ||= init_opcode_list
|
|
29
|
+
end
|
|
30
|
+
def opcode_list=(l) @opcode_list = l end
|
|
31
|
+
|
|
32
|
+
def initialize
|
|
33
|
+
@fields_mask = {}
|
|
34
|
+
@fields_shift= {}
|
|
35
|
+
@valid_args = []
|
|
36
|
+
@valid_props = [:setip, :saveip, :stopexec]
|
|
37
|
+
@generate_PIC = true
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# returns a hash opcode_name => array of opcodes with this name
|
|
41
|
+
def opcode_list_byname
|
|
42
|
+
@opcode_list_byname ||= opcode_list.inject({}) { |h, o| (h[o.name] ||= []) << o ; h }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# sets up the C parser : standard macro definitions, type model (size of int etc)
|
|
46
|
+
def tune_cparser(cp)
|
|
47
|
+
case @size
|
|
48
|
+
when 64; cp.lp64
|
|
49
|
+
when 32; cp.ilp32
|
|
50
|
+
when 16; cp.ilp16
|
|
51
|
+
end
|
|
52
|
+
cp.endianness = @endianness
|
|
53
|
+
cp.lexer.define_weak('_STDC', 1)
|
|
54
|
+
# TODO gcc -dM -E - </dev/null
|
|
55
|
+
tune_prepro(cp.lexer)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def tune_prepro(pp)
|
|
59
|
+
# TODO pp.define('BIGENDIAN')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# return a new AsmPreprocessor
|
|
63
|
+
def new_asmprepro(str='', exe=nil)
|
|
64
|
+
pp = AsmPreprocessor.new(str, exe)
|
|
65
|
+
tune_prepro(pp)
|
|
66
|
+
exe.tune_prepro(pp) if exe
|
|
67
|
+
pp
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# returns a new & tuned C::Parser
|
|
71
|
+
def new_cparser
|
|
72
|
+
C::Parser.new(self)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# returns a new C::Compiler
|
|
76
|
+
def new_ccompiler(parser, exe=ExeFormat.new)
|
|
77
|
+
exe.cpu = self if not exe.instance_variable_get("@cpu")
|
|
78
|
+
C::Compiler.new(parser, exe)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def shortname
|
|
82
|
+
self.class.name.sub(/.*::/, '').downcase
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# generic CPU, with no instructions, just size/endianness
|
|
87
|
+
class UnknownCPU < CPU
|
|
88
|
+
def initialize(size, endianness)
|
|
89
|
+
super()
|
|
90
|
+
@size, @endianness = size, endianness
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# a cpu instruction 'formal' description
|
|
95
|
+
class Opcode
|
|
96
|
+
# the name of the instruction
|
|
97
|
+
attr_accessor :name
|
|
98
|
+
# formal description of arguments (array of cpu-specific symbols)
|
|
99
|
+
attr_accessor :args
|
|
100
|
+
# binary encoding of the opcode (integer for risc, array of bytes for cisc)
|
|
101
|
+
attr_accessor :bin
|
|
102
|
+
# list of bit fields in the binary encoding
|
|
103
|
+
# hash position => field
|
|
104
|
+
# position is bit shift for risc, [byte index, bit shift] for risc
|
|
105
|
+
# field is cpu-specific
|
|
106
|
+
attr_accessor :fields
|
|
107
|
+
# hash of opcode generic properties/restrictions (mostly property => true/false)
|
|
108
|
+
attr_accessor :props
|
|
109
|
+
# binary mask for decoding
|
|
110
|
+
attr_accessor :bin_mask
|
|
111
|
+
|
|
112
|
+
def initialize(name, bin=nil)
|
|
113
|
+
@name = name
|
|
114
|
+
@bin = bin
|
|
115
|
+
@args = []
|
|
116
|
+
@fields = {}
|
|
117
|
+
@props = {}
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def basename
|
|
121
|
+
@name.sub(/\..*/, '')
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# defines an attribute self.backtrace (array of filename/lineno)
|
|
126
|
+
# and a method backtrace_str which dumps this array to a human-readable form
|
|
127
|
+
module Backtrace
|
|
128
|
+
# array [file, lineno, file, lineno]
|
|
129
|
+
# if file 'A' does #include 'B' you'll get ['A', linenoA, 'B', linenoB]
|
|
130
|
+
attr_accessor :backtrace
|
|
131
|
+
|
|
132
|
+
# builds a readable string from self.backtrace
|
|
133
|
+
def backtrace_str
|
|
134
|
+
Backtrace.backtrace_str(@backtrace)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# builds a readable backtrace string from an array of [file, lineno, file, lineno, ..]
|
|
138
|
+
def self.backtrace_str(ary)
|
|
139
|
+
return '' if not ary
|
|
140
|
+
i = ary.length
|
|
141
|
+
bt = ''
|
|
142
|
+
while i > 0
|
|
143
|
+
bt << ",\n\tincluded from " if ary[i]
|
|
144
|
+
i -= 2
|
|
145
|
+
bt << "#{ary[i].inspect} line #{ary[i+1]}"
|
|
146
|
+
end
|
|
147
|
+
bt
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def exception(msg='syntax error')
|
|
151
|
+
ParseError.new "at #{backtrace_str}: #{msg}"
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# an instruction: opcode name + arguments
|
|
156
|
+
class Instruction
|
|
157
|
+
# arguments (cpu-specific objects)
|
|
158
|
+
attr_accessor :args
|
|
159
|
+
# hash of prefixes (unused in simple cpus)
|
|
160
|
+
attr_accessor :prefix
|
|
161
|
+
# name of the associated opcode
|
|
162
|
+
attr_accessor :opname
|
|
163
|
+
# reference to the cpu which issued this instruction (used for rendering)
|
|
164
|
+
attr_accessor :cpu
|
|
165
|
+
|
|
166
|
+
include Backtrace
|
|
167
|
+
|
|
168
|
+
def initialize(cpu, opname=nil, args=[], pfx=nil, backtrace=nil)
|
|
169
|
+
@cpu = cpu
|
|
170
|
+
@opname = opname
|
|
171
|
+
@args = args
|
|
172
|
+
@prefix = pfx if pfx
|
|
173
|
+
@backtrace = backtrace
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# duplicates the argument list and prefix hash
|
|
177
|
+
def dup
|
|
178
|
+
Instruction.new(@cpu, (@opname.dup if opname), @args.dup, (@prefix.dup if prefix), (@backtrace.dup if backtrace))
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# all kind of data description (including repeated/uninitialized)
|
|
183
|
+
class Data
|
|
184
|
+
# maps data type to Expression parameters (signedness/bit size)
|
|
185
|
+
INT_TYPE = {'db' => :a8, 'dw' => :a16, 'dd' => :a32, 'dq' => :a64}
|
|
186
|
+
|
|
187
|
+
# an Expression, an Array of Data, a String, or :uninitialized
|
|
188
|
+
attr_accessor :data
|
|
189
|
+
# the data type, from INT_TYPE (TODO store directly Expression parameters ?)
|
|
190
|
+
attr_accessor :type
|
|
191
|
+
# the repetition count of the data parameter (dup constructs)
|
|
192
|
+
attr_accessor :count
|
|
193
|
+
|
|
194
|
+
include Backtrace
|
|
195
|
+
|
|
196
|
+
def initialize(type, data, count=1, backtrace=nil)
|
|
197
|
+
@data, @type, @count, @backtrace = data, type, count, backtrace
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# a name for a location
|
|
202
|
+
class Label
|
|
203
|
+
attr_accessor :name
|
|
204
|
+
|
|
205
|
+
include Backtrace
|
|
206
|
+
|
|
207
|
+
def initialize(name, backtrace=nil)
|
|
208
|
+
@name, @backtrace = name, backtrace
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# alignment directive
|
|
213
|
+
class Align
|
|
214
|
+
# the size to align to
|
|
215
|
+
attr_accessor :val
|
|
216
|
+
# the Data used to pad
|
|
217
|
+
attr_accessor :fillwith
|
|
218
|
+
|
|
219
|
+
include Backtrace
|
|
220
|
+
|
|
221
|
+
def initialize(val, fillwith=nil, backtrace=nil)
|
|
222
|
+
@val, @fillwith, @backtrace = val, fillwith, backtrace
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# padding directive
|
|
227
|
+
class Padding
|
|
228
|
+
# Data used to pad
|
|
229
|
+
attr_accessor :fillwith
|
|
230
|
+
|
|
231
|
+
include Backtrace
|
|
232
|
+
|
|
233
|
+
def initialize(fillwith=nil, backtrace=nil)
|
|
234
|
+
@fillwith, @backtrace = fillwith, backtrace
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# offset directive
|
|
239
|
+
# can be used to fix padding length or to assert some code/data compiled length
|
|
240
|
+
class Offset
|
|
241
|
+
# the assembler will arrange to make this pseudo-instruction
|
|
242
|
+
# be at this offset from beginning of current section
|
|
243
|
+
attr_accessor :val
|
|
244
|
+
|
|
245
|
+
include Backtrace
|
|
246
|
+
|
|
247
|
+
def initialize(val, backtrace=nil)
|
|
248
|
+
@val, @backtrace = val, backtrace
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# the superclass of all real executable formats
|
|
253
|
+
# main methods:
|
|
254
|
+
# self.decode(str) => decodes the file format (imports/relocs/etc), no asm disassembly
|
|
255
|
+
# parse(source) => parses assembler source, fills self.source
|
|
256
|
+
# assemble => assembles self.source in binary sections/segments/whatever
|
|
257
|
+
# encode => builds imports/relocs tables, put all this together, links everything in self.encoded
|
|
258
|
+
class ExeFormat
|
|
259
|
+
# array of Data/Instruction/Align/Padding/Offset/Label, populated in parse
|
|
260
|
+
attr_accessor :cursource
|
|
261
|
+
# contains the binary version of the compiled program (EncodedData)
|
|
262
|
+
attr_accessor :encoded
|
|
263
|
+
# hash of labels generated by new_label
|
|
264
|
+
attr_accessor :unique_labels_cache
|
|
265
|
+
|
|
266
|
+
# initializes self.cpu, creates an empty self.encoded
|
|
267
|
+
def initialize(cpu=nil)
|
|
268
|
+
@cpu = cpu
|
|
269
|
+
@encoded = EncodedData.new
|
|
270
|
+
@unique_labels_cache = {}
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
attr_writer :cpu # custom reader
|
|
274
|
+
def cpu
|
|
275
|
+
@cpu ||= cpu_from_headers
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# return the label name corresponding to the specified offset of the encodeddata, creates it if necessary
|
|
279
|
+
def label_at(edata, offset, base = '')
|
|
280
|
+
if not l = edata.inv_export[offset]
|
|
281
|
+
edata.add_export(l = new_label(base), offset)
|
|
282
|
+
end
|
|
283
|
+
l
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# creates a new label, that is guaranteed to never be returned again as long as this object (ExeFormat) exists
|
|
287
|
+
def new_label(base = '')
|
|
288
|
+
base = base.dup.tr('^a-zA-Z0-9_', '_')
|
|
289
|
+
# use %x instead of to_s(16) for negative values
|
|
290
|
+
base = (base << '_uuid' << ('%08x' % base.object_id)).freeze if base.empty? or @unique_labels_cache[base]
|
|
291
|
+
@unique_labels_cache[base] = true
|
|
292
|
+
base
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# share self.unique_labels_cache with other, checks for conflicts, returns self
|
|
296
|
+
def share_namespace(other)
|
|
297
|
+
return self if other.unique_labels_cache.equal? @unique_labels_cache
|
|
298
|
+
raise "share_ns #{(other.unique_labels_cache.keys & @unique_labels_cache.keys).inspect}" if !(other.unique_labels_cache.keys & @unique_labels_cache.keys).empty?
|
|
299
|
+
@unique_labels_cache.update other.unique_labels_cache
|
|
300
|
+
other.unique_labels_cache = @unique_labels_cache
|
|
301
|
+
self
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# superclass for classes similar to Expression
|
|
306
|
+
# must define #bind, #reduce_rec, #match_rec, #externals
|
|
307
|
+
class ExpressionType
|
|
308
|
+
def +(o) Expression[self, :+, o].reduce end
|
|
309
|
+
def -(o) Expression[self, :-, o].reduce end
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# handle immediate values, and arbitrary arithmetic/logic expression involving variables
|
|
313
|
+
# boolean values are treated as in C : true is 1, false is 0
|
|
314
|
+
# TODO replace #type with #size => bits + #type => [:signed/:unsigned/:any/:floating]
|
|
315
|
+
# TODO handle floats
|
|
316
|
+
class Expression < ExpressionType
|
|
317
|
+
INT_SIZE = {}
|
|
318
|
+
INT_MIN = {}
|
|
319
|
+
INT_MAX = {}
|
|
320
|
+
|
|
321
|
+
[8, 16, 32, 64].each { |sz|
|
|
322
|
+
INT_SIZE["i#{sz}".to_sym] =
|
|
323
|
+
INT_SIZE["u#{sz}".to_sym] =
|
|
324
|
+
INT_SIZE["a#{sz}".to_sym] = sz
|
|
325
|
+
|
|
326
|
+
INT_MIN["a#{sz}".to_sym] =
|
|
327
|
+
INT_MIN["i#{sz}".to_sym] = -(1 << (sz-1)) # -0x8000
|
|
328
|
+
INT_MIN["u#{sz}".to_sym] = 0
|
|
329
|
+
|
|
330
|
+
INT_MAX["i#{sz}".to_sym] = (1 << (sz-1)) - 1 # 0x7fff
|
|
331
|
+
INT_MAX["a#{sz}".to_sym] =
|
|
332
|
+
INT_MAX["u#{sz}".to_sym] = (1 << sz) - 1 # 0xffff
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
# alternative constructor
|
|
336
|
+
# in operands order, and allows nesting using sub-arrays
|
|
337
|
+
# ex: Expression[[:-, 42], :*, [1, :+, [4, :*, 7]]]
|
|
338
|
+
# with a single argument, return it if already an Expression, else construct a new one (using unary +/-)
|
|
339
|
+
def self.[](l, op=nil, r=nil)
|
|
340
|
+
if not r # need to shift args
|
|
341
|
+
if not op
|
|
342
|
+
raise ArgumentError, 'invalid Expression[nil]' if not l
|
|
343
|
+
return l if l.kind_of? Expression
|
|
344
|
+
if l.kind_of? ::Numeric and l < 0
|
|
345
|
+
r = -l
|
|
346
|
+
op = :'-'
|
|
347
|
+
else
|
|
348
|
+
r = l
|
|
349
|
+
op = :'+'
|
|
350
|
+
end
|
|
351
|
+
else
|
|
352
|
+
r = op
|
|
353
|
+
op = l
|
|
354
|
+
end
|
|
355
|
+
l = nil
|
|
356
|
+
else
|
|
357
|
+
l = self[*l] if l.kind_of? ::Array
|
|
358
|
+
end
|
|
359
|
+
r = self[*r] if r.kind_of? ::Array
|
|
360
|
+
new(op, r, l)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# checks if a given Expression/Integer is in the type range
|
|
364
|
+
# returns true if it is, false if it overflows, and nil if cannot be determined (eg unresolved variable)
|
|
365
|
+
def self.in_range?(val, type)
|
|
366
|
+
val = val.reduce if val.kind_of? self
|
|
367
|
+
return unless val.kind_of? ::Numeric
|
|
368
|
+
|
|
369
|
+
if INT_MIN[type]
|
|
370
|
+
val == val.to_i and
|
|
371
|
+
val >= INT_MIN[type] and val <= INT_MAX[type]
|
|
372
|
+
end
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
# casts an unsigned value to a two-complement signed if the sign bit is set
|
|
376
|
+
def self.make_signed(val, bitlength)
|
|
377
|
+
if val.kind_of? Integer
|
|
378
|
+
val = val - (1 << bitlength) if val >> (bitlength - 1) == 1
|
|
379
|
+
end
|
|
380
|
+
val
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# the operator (symbol)
|
|
384
|
+
attr_accessor :op
|
|
385
|
+
# the lefthandside expression (nil for unary expressions)
|
|
386
|
+
attr_accessor :lexpr
|
|
387
|
+
# the righthandside expression
|
|
388
|
+
attr_accessor :rexpr
|
|
389
|
+
|
|
390
|
+
# basic constructor
|
|
391
|
+
# XXX funny args order, you should use +Expression[]+ instead
|
|
392
|
+
def initialize(op, rexpr, lexpr)
|
|
393
|
+
raise ArgumentError, "Expression: invalid arg order: #{[lexpr, op, rexpr].inspect}" if not op.kind_of? ::Symbol
|
|
394
|
+
@op, @lexpr, @rexpr = op, lexpr, rexpr
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
# recursive check of equity using #==
|
|
398
|
+
# will not match 1+2 and 2+1
|
|
399
|
+
def ==(o)
|
|
400
|
+
# shortcircuit recursion
|
|
401
|
+
o.object_id == object_id or (o.kind_of?(Expression) and @op == o.op and @lexpr == o.lexpr and @rexpr == o.rexpr)
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
# make it useable as Hash key (see +==+)
|
|
405
|
+
def hash
|
|
406
|
+
(@lexpr.hash + @op.hash + @rexpr.hash) & 0x7fff_ffff
|
|
407
|
+
end
|
|
408
|
+
alias eql? ==
|
|
409
|
+
|
|
410
|
+
# returns a new Expression with all variables found in the binding replaced with their value
|
|
411
|
+
# does not check the binding's key class except for numeric
|
|
412
|
+
# calls lexpr/rexpr #bind if they respond_to? it
|
|
413
|
+
def bind(binding = {})
|
|
414
|
+
if binding[self]
|
|
415
|
+
return binding[self].dup
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
l, r = @lexpr, @rexpr
|
|
419
|
+
if l and binding[l]
|
|
420
|
+
raise "internal error - bound #{l.inspect}" if l.kind_of? ::Numeric
|
|
421
|
+
l = binding[l]
|
|
422
|
+
elsif l.kind_of? ExpressionType
|
|
423
|
+
l = l.bind(binding)
|
|
424
|
+
end
|
|
425
|
+
if r and binding[r]
|
|
426
|
+
raise "internal error - bound #{r.inspect}" if r.kind_of? ::Numeric
|
|
427
|
+
r = binding[r]
|
|
428
|
+
elsif r.kind_of? ExpressionType
|
|
429
|
+
r = r.bind(binding)
|
|
430
|
+
end
|
|
431
|
+
Expression[l, @op, r]
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
# bind in place (replace self.lexpr/self.rexpr with the binding value)
|
|
435
|
+
# only recurse with Expressions (does not use respond_to?)
|
|
436
|
+
def bind!(binding = {})
|
|
437
|
+
if @lexpr.kind_of?(Expression)
|
|
438
|
+
@lexpr.bind!(binding)
|
|
439
|
+
elsif @lexpr
|
|
440
|
+
@lexpr = binding[@lexpr] || @lexpr
|
|
441
|
+
end
|
|
442
|
+
if @rexpr.kind_of?(Expression)
|
|
443
|
+
@rexpr.bind!(binding)
|
|
444
|
+
elsif @rexpr
|
|
445
|
+
@rexpr = binding[@rexpr] || @rexpr
|
|
446
|
+
end
|
|
447
|
+
self
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# reduce_lambda is a callback called after the standard reduction procedure for custom algorithms
|
|
451
|
+
# the lambda may return a new expression or nil (to keep the old expr)
|
|
452
|
+
# exemple: lambda { |e| e.lexpr if e.kind_of? Expression and e.op == :& and e.rexpr == 0xffff_ffff }
|
|
453
|
+
# returns old lambda
|
|
454
|
+
def self.reduce_lambda(&b)
|
|
455
|
+
old = @@reduce_lambda
|
|
456
|
+
@@reduce_lambda = b if block_given?
|
|
457
|
+
old
|
|
458
|
+
end
|
|
459
|
+
def self.reduce_lambda=(p)
|
|
460
|
+
@@reduce_lambda = p
|
|
461
|
+
end
|
|
462
|
+
@@reduce_lambda = nil
|
|
463
|
+
|
|
464
|
+
# returns a simplified copy of self
|
|
465
|
+
# can return an +Expression+ or a +Numeric+, may return self
|
|
466
|
+
# see +reduce_rec+ for simplifications description
|
|
467
|
+
# if given a block, it will temporarily overwrite the global @@reduce_lambda XXX THIS IS NOT THREADSAFE
|
|
468
|
+
def reduce(&b)
|
|
469
|
+
old_rp, @@reduce_lambda = @@reduce_lambda, b if b
|
|
470
|
+
case e = reduce_rec
|
|
471
|
+
when Expression, Numeric; e
|
|
472
|
+
else Expression[e]
|
|
473
|
+
end
|
|
474
|
+
ensure
|
|
475
|
+
@@reduce_lambda = old_rp if b
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
# resolves logic operations (true || false, etc)
|
|
479
|
+
# computes numeric operations (1 + 3)
|
|
480
|
+
# expands substractions to addition of the opposite
|
|
481
|
+
# reduces double-oppositions (-(-1) => 1)
|
|
482
|
+
# reduces addition of 0 and unary +
|
|
483
|
+
# canonicalize additions: put variables in the lhs, descend addition tree in the rhs => (a + (b + (c + 12)))
|
|
484
|
+
# make formal reduction if finds somewhere in addition tree (a) and (-a)
|
|
485
|
+
def reduce_rec
|
|
486
|
+
l = @lexpr.kind_of?(ExpressionType) ? @lexpr.reduce_rec : @lexpr
|
|
487
|
+
r = @rexpr.kind_of?(ExpressionType) ? @rexpr.reduce_rec : @rexpr
|
|
488
|
+
|
|
489
|
+
if @@reduce_lambda
|
|
490
|
+
l = @@reduce_lambda[l] || l if not @lexpr.kind_of? Expression
|
|
491
|
+
r = @@reduce_lambda[r] || r if not @rexpr.kind_of? Expression
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
v =
|
|
495
|
+
if r.kind_of?(::Numeric) and (l == nil or l.kind_of?(::Numeric))
|
|
496
|
+
# calculate numerics
|
|
497
|
+
if [:'&&', :'||', :'>', :'<', :'>=', :'<=', :'==', :'!='].include?(@op)
|
|
498
|
+
# bool expr
|
|
499
|
+
raise 'internal error' if not l
|
|
500
|
+
case @op
|
|
501
|
+
when :'&&'; (l != 0) && (r != 0)
|
|
502
|
+
when :'||'; (l != 0) || (r != 0)
|
|
503
|
+
when :'>' ; l > r
|
|
504
|
+
when :'>='; l >= r
|
|
505
|
+
when :'<' ; l < r
|
|
506
|
+
when :'<='; l <= r
|
|
507
|
+
when :'=='; l == r
|
|
508
|
+
when :'!='; l != r
|
|
509
|
+
end ? 1 : 0
|
|
510
|
+
elsif not l
|
|
511
|
+
case @op
|
|
512
|
+
when :'!'; (r == 0) ? 1 : 0
|
|
513
|
+
when :+; r
|
|
514
|
+
when :-; -r
|
|
515
|
+
when :~; ~r
|
|
516
|
+
end
|
|
517
|
+
else
|
|
518
|
+
# use ruby evaluator
|
|
519
|
+
l.send(@op, r)
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
elsif @op == :'&&'
|
|
523
|
+
if l == 0 # shortcircuit eval
|
|
524
|
+
0
|
|
525
|
+
elsif l == 1
|
|
526
|
+
Expression[r, :'!=', 0].reduce_rec
|
|
527
|
+
elsif r == 0
|
|
528
|
+
0 # XXX l could be a special ExprType with sideeffects ?
|
|
529
|
+
end
|
|
530
|
+
elsif @op == :'||'
|
|
531
|
+
if l.kind_of? ::Numeric and l != 0 # shortcircuit eval
|
|
532
|
+
1
|
|
533
|
+
elsif l == 0
|
|
534
|
+
Expression[r, :'!=', 0].reduce_rec
|
|
535
|
+
elsif r == 0
|
|
536
|
+
Expression[l, :'!=', 0].reduce_rec
|
|
537
|
+
end
|
|
538
|
+
elsif @op == :>> or @op == :<<
|
|
539
|
+
if l == 0; 0
|
|
540
|
+
elsif r == 0; l
|
|
541
|
+
elsif l.kind_of? Expression and l.op == @op
|
|
542
|
+
Expression[l.lexpr, @op, [l.rexpr, :+, r]].reduce_rec
|
|
543
|
+
# XXX (a >> 1) << 1 != a (lose low bit)
|
|
544
|
+
# XXX (a << 1) >> 1 != a (with real cpus, lose high bit)
|
|
545
|
+
# (a | b) << i
|
|
546
|
+
elsif r.kind_of? Integer and l.kind_of? Expression and [:&, :|, :^].include? l.op
|
|
547
|
+
Expression[[l.lexpr, @op, r], l.op, [l.rexpr, @op, r]].reduce_rec
|
|
548
|
+
end
|
|
549
|
+
elsif @op == :'!'
|
|
550
|
+
if r.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[r.op]
|
|
551
|
+
Expression[r.lexpr, op, r.rexpr].reduce_rec
|
|
552
|
+
end
|
|
553
|
+
elsif @op == :==
|
|
554
|
+
if l == r; 1
|
|
555
|
+
elsif r == 0 and l.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[l.op]
|
|
556
|
+
Expression[l.lexpr, op, l.rexpr].reduce_rec
|
|
557
|
+
elsif r == 1 and l.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[l.op]
|
|
558
|
+
l
|
|
559
|
+
elsif r == 0 and l.kind_of? Expression and l.op == :+
|
|
560
|
+
if l.rexpr.kind_of? Expression and l.rexpr.op == :- and not l.rexpr.lexpr
|
|
561
|
+
Expression[l.lexpr, @op, l.rexpr.rexpr].reduce_rec
|
|
562
|
+
elsif l.rexpr.kind_of? ::Integer
|
|
563
|
+
Expression[l.lexpr, @op, -l.rexpr].reduce_rec
|
|
564
|
+
end
|
|
565
|
+
end
|
|
566
|
+
elsif @op == :'!='
|
|
567
|
+
if l == r; 0
|
|
568
|
+
end
|
|
569
|
+
elsif @op == :^
|
|
570
|
+
if l == :unknown or r == :unknown; :unknown
|
|
571
|
+
elsif l == 0; r
|
|
572
|
+
elsif r == 0; l
|
|
573
|
+
elsif l == r; 0
|
|
574
|
+
elsif r == 1 and l.kind_of? Expression and [:'==', :'!=', :<, :>, :<=, :>=].include? l.op
|
|
575
|
+
Expression[nil, :'!', l].reduce_rec
|
|
576
|
+
elsif l.kind_of?(::Numeric)
|
|
577
|
+
if r.kind_of? Expression and r.op == :^
|
|
578
|
+
# 1^(x^y) => x^(y^1)
|
|
579
|
+
Expression[r.lexpr, :^, [r.rexpr, :^, l]].reduce_rec
|
|
580
|
+
else
|
|
581
|
+
# 1^a => a^1
|
|
582
|
+
Expression[r, :^, l].reduce_rec
|
|
583
|
+
end
|
|
584
|
+
elsif l.kind_of? Expression and l.op == :^
|
|
585
|
+
# (a^b)^c => a^(b^c)
|
|
586
|
+
Expression[l.lexpr, :^, [l.rexpr, :^, r]].reduce_rec
|
|
587
|
+
elsif r.kind_of? Expression and r.op == :^
|
|
588
|
+
if r.rexpr == l
|
|
589
|
+
# a^(a^b) => b
|
|
590
|
+
r.lexpr
|
|
591
|
+
elsif r.lexpr == l
|
|
592
|
+
# a^(b^a) => b
|
|
593
|
+
r.rexpr
|
|
594
|
+
else
|
|
595
|
+
# a^(b^(c^(a^d))) => b^(a^(c^(a^d)))
|
|
596
|
+
# XXX ugly..
|
|
597
|
+
tr = r
|
|
598
|
+
found = false
|
|
599
|
+
while not found and tr.kind_of?(Expression) and tr.op == :^
|
|
600
|
+
found = true if tr.lexpr == l or tr.rexpr == l
|
|
601
|
+
tr = tr.rexpr
|
|
602
|
+
end
|
|
603
|
+
if found
|
|
604
|
+
Expression[r.lexpr, :^, [l, :^, r.rexpr]].reduce_rec
|
|
605
|
+
end
|
|
606
|
+
end
|
|
607
|
+
elsif l.kind_of?(Expression) and l.op == :& and l.rexpr.kind_of?(::Integer) and (l.rexpr & (l.rexpr+1)) == 0
|
|
608
|
+
if r.kind_of?(::Integer) and r & l.rexpr == r
|
|
609
|
+
# (a&0xfff)^12 => (a^12)&0xfff
|
|
610
|
+
Expression[[l.lexpr, :^, r], :&, l.rexpr].reduce_rec
|
|
611
|
+
elsif r.kind_of?(Expression) and r.op == :& and r.rexpr.kind_of?(::Integer) and r.rexpr == l.rexpr
|
|
612
|
+
# (a&0xfff)^(b&0xfff) => (a^b)&0xfff
|
|
613
|
+
Expression[[l.lexpr, :^, r.lexpr], :&, l.rexpr].reduce_rec
|
|
614
|
+
end
|
|
615
|
+
end
|
|
616
|
+
elsif @op == :&
|
|
617
|
+
if l == 0 or r == 0; 0
|
|
618
|
+
elsif r == 1 and l.kind_of?(Expression) and [:'==', :'!=', :<, :>, :<=, :>=].include?(l.op)
|
|
619
|
+
l
|
|
620
|
+
elsif l == r; l
|
|
621
|
+
elsif l.kind_of?(Integer); Expression[r, @op, l].reduce_rec
|
|
622
|
+
elsif l.kind_of?(Expression) and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
|
|
623
|
+
elsif l.kind_of?(Expression) and [:|, :^].include?(l.op) and r.kind_of?(Integer) and (l.op == :| or (r & (r+1)) != 0)
|
|
624
|
+
# (a ^| b) & i => (a&i ^| b&i)
|
|
625
|
+
Expression[[l.lexpr, :&, r], l.op, [l.rexpr, :&, r]].reduce_rec
|
|
626
|
+
elsif r.kind_of?(::Integer) and l.kind_of?(Expression) and (r & (r+1)) == 0
|
|
627
|
+
# foo & 0xffff
|
|
628
|
+
reduce_rec_mod2(l, r)
|
|
629
|
+
end
|
|
630
|
+
elsif @op == :|
|
|
631
|
+
if l == 0; r
|
|
632
|
+
elsif r == 0; l
|
|
633
|
+
elsif l == -1 or r == -1; -1
|
|
634
|
+
elsif l == r; l
|
|
635
|
+
elsif l.kind_of? Integer; Expression[r, @op, l].reduce_rec
|
|
636
|
+
elsif l.kind_of? Expression and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
|
|
637
|
+
end
|
|
638
|
+
elsif @op == :*
|
|
639
|
+
if l == 0 or r == 0; 0
|
|
640
|
+
elsif l == 1; r
|
|
641
|
+
elsif r == 1; l
|
|
642
|
+
elsif r.kind_of? Integer; Expression[r, @op, l].reduce_rec
|
|
643
|
+
elsif r.kind_of? Expression and r.op == @op; Expression[[l, @op, r.lexpr], @op, r.rexpr].reduce_rec
|
|
644
|
+
elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :* and r.lexpr.kind_of? Integer; Expression[l*r.lexpr, :*, r.rexpr].reduce_rec # XXX need & regsize..
|
|
645
|
+
elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :+ and r.rexpr.kind_of? Integer; Expression[[l, :*, r.lexpr], :+, l*r.rexpr].reduce_rec
|
|
646
|
+
end
|
|
647
|
+
elsif @op == :/
|
|
648
|
+
if r == 0
|
|
649
|
+
elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :+ and l.rexpr.kind_of? Integer and l.rexpr % r == 0
|
|
650
|
+
Expression[[l.lexpr, :/, r], :+, l.rexpr/r].reduce_rec
|
|
651
|
+
elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :* and l.lexpr % r == 0
|
|
652
|
+
Expression[l.lexpr/r, :*, l.rexpr].reduce_rec
|
|
653
|
+
end
|
|
654
|
+
elsif @op == :-
|
|
655
|
+
if l == :unknown or r == :unknown; :unknown
|
|
656
|
+
elsif not l and r.kind_of? Expression and (r.op == :- or r.op == :+)
|
|
657
|
+
if r.op == :- # no lexpr (reduced)
|
|
658
|
+
# -(-x) => x
|
|
659
|
+
r.rexpr
|
|
660
|
+
else # :+ and lexpr (r is reduced)
|
|
661
|
+
# -(a+b) => (-a)+(-b)
|
|
662
|
+
Expression[[:-, r.lexpr], :+, [:-, r.rexpr]].reduce_rec
|
|
663
|
+
end
|
|
664
|
+
elsif l.kind_of? Expression and l.op == :+ and l.lexpr == r
|
|
665
|
+
# shortcircuit for a common occurence [citation needed]
|
|
666
|
+
# (a+b)-a
|
|
667
|
+
l.rexpr
|
|
668
|
+
elsif l
|
|
669
|
+
# a-b => a+(-b)
|
|
670
|
+
Expression[l, :+, [:-, r]].reduce_rec
|
|
671
|
+
end
|
|
672
|
+
elsif @op == :+
|
|
673
|
+
if l == :unknown or r == :unknown; :unknown
|
|
674
|
+
elsif not l; r # +x => x
|
|
675
|
+
elsif r == 0; l # x+0 => x
|
|
676
|
+
elsif l.kind_of?(::Numeric)
|
|
677
|
+
if r.kind_of? Expression and r.op == :+
|
|
678
|
+
# 1+(x+y) => x+(y+1)
|
|
679
|
+
Expression[r.lexpr, :+, [r.rexpr, :+, l]].reduce_rec
|
|
680
|
+
else
|
|
681
|
+
# 1+a => a+1
|
|
682
|
+
Expression[r, :+, l].reduce_rec
|
|
683
|
+
end
|
|
684
|
+
# (a+b)+foo => a+(b+foo)
|
|
685
|
+
elsif l.kind_of? Expression and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
|
|
686
|
+
elsif l.kind_of? Expression and r.kind_of? Expression and l.op == :% and r.op == :% and l.rexpr.kind_of?(::Integer) and l.rexpr == r.rexpr
|
|
687
|
+
Expression[[l.lexpr, :+, r.lexpr], :%, l.rexpr].reduce_rec
|
|
688
|
+
else
|
|
689
|
+
reduce_rec_add(l, r)
|
|
690
|
+
end
|
|
691
|
+
end
|
|
692
|
+
|
|
693
|
+
ret = case v
|
|
694
|
+
when nil
|
|
695
|
+
# no dup if no new value
|
|
696
|
+
(r == :unknown or l == :unknown) ? :unknown :
|
|
697
|
+
((r == @rexpr and l == @lexpr) ? self : Expression[l, @op, r])
|
|
698
|
+
when Expression
|
|
699
|
+
(v.lexpr == :unknown or v.rexpr == :unknown) ? :unknown : v
|
|
700
|
+
else v
|
|
701
|
+
end
|
|
702
|
+
if @@reduce_lambda and ret.kind_of? ExpressionType and newret = @@reduce_lambda[ret] and newret != ret
|
|
703
|
+
if newret.kind_of? ExpressionType
|
|
704
|
+
ret = newret.reduce_rec
|
|
705
|
+
else
|
|
706
|
+
ret = newret
|
|
707
|
+
end
|
|
708
|
+
end
|
|
709
|
+
ret
|
|
710
|
+
end
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
# a+(b+(c+(-a))) => b+c+0
|
|
714
|
+
# a+((-a)+(b+c)) => 0+b+c
|
|
715
|
+
def reduce_rec_add(l, r)
|
|
716
|
+
if l.kind_of? Expression and l.op == :- and not l.lexpr
|
|
717
|
+
neg_l = l.rexpr
|
|
718
|
+
else
|
|
719
|
+
neg_l = Expression[:-, l]
|
|
720
|
+
end
|
|
721
|
+
|
|
722
|
+
# recursive search & replace -lexpr by 0
|
|
723
|
+
simplifier = lambda { |cur|
|
|
724
|
+
if neg_l == cur
|
|
725
|
+
# -l found
|
|
726
|
+
0
|
|
727
|
+
elsif cur.kind_of? Expression and cur.op == :+
|
|
728
|
+
# recurse
|
|
729
|
+
if newl = simplifier[cur.lexpr]
|
|
730
|
+
Expression[newl, cur.op, cur.rexpr].reduce_rec
|
|
731
|
+
elsif newr = simplifier[cur.rexpr]
|
|
732
|
+
Expression[cur.lexpr, cur.op, newr].reduce_rec
|
|
733
|
+
end
|
|
734
|
+
end
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
simplifier[r]
|
|
738
|
+
end
|
|
739
|
+
|
|
740
|
+
# expr & 0xffff
|
|
741
|
+
def reduce_rec_mod2(e, mask)
|
|
742
|
+
case e.op
|
|
743
|
+
when :+, :^
|
|
744
|
+
if e.lexpr.kind_of?(Expression) and e.lexpr.op == :& and
|
|
745
|
+
e.lexpr.rexpr.kind_of?(::Integer) and e.lexpr.rexpr & mask == mask
|
|
746
|
+
# ((a&m) + b) & m => (a+b) & m
|
|
747
|
+
Expression[[e.lexpr.lexpr, e.op, e.rexpr], :&, mask].reduce_rec
|
|
748
|
+
elsif e.rexpr.kind_of?(Expression) and e.rexpr.op == :& and
|
|
749
|
+
e.rexpr.rexpr.kind_of?(::Integer) and e.rexpr.rexpr & mask == mask
|
|
750
|
+
# (a + (b&m)) & m => (a+b) & m
|
|
751
|
+
Expression[[e.lexpr, e.op, e.rexpr.lexpr], :&, mask].reduce_rec
|
|
752
|
+
else
|
|
753
|
+
Expression[e, :&, mask]
|
|
754
|
+
end
|
|
755
|
+
when :|
|
|
756
|
+
# rol/ror composition
|
|
757
|
+
reduce_rec_composerol e, mask
|
|
758
|
+
else
|
|
759
|
+
Expression[e, :&, mask]
|
|
760
|
+
end
|
|
761
|
+
end
|
|
762
|
+
|
|
763
|
+
# a check to see if an Expr is the composition of two rotations (rol eax, 4 ; rol eax, 6 => rol eax, 10)
|
|
764
|
+
# this is a bit too ugly to stay in the main reduce_rec body.
|
|
765
|
+
def reduce_rec_composerol(e, mask)
|
|
766
|
+
m = Expression[['var', :sh_op, 'amt'], :|, ['var', :inv_sh_op, 'inv_amt']]
|
|
767
|
+
if vars = e.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and vars[:sh_op] == {:>> => :<<, :<< => :>>}[vars[:inv_sh_op]] and
|
|
768
|
+
((vars['amt'].kind_of?(::Integer) and vars['inv_amt'].kind_of?(::Integer) and ampl = vars['amt'] + vars['inv_amt']) or
|
|
769
|
+
(vars['amt'].kind_of? Expression and vars['amt'].op == :% and vars['amt'].rexpr.kind_of? ::Integer and
|
|
770
|
+
vars['inv_amt'].kind_of? Expression and vars['inv_amt'].op == :% and vars['amt'].rexpr == vars['inv_amt'].rexpr and ampl = vars['amt'].rexpr)) and
|
|
771
|
+
mask == (1<<ampl)-1 and vars['var'].kind_of? Expression and # it's a rotation
|
|
772
|
+
|
|
773
|
+
vars['var'].op == :& and vars['var'].rexpr == mask and
|
|
774
|
+
ivars = vars['var'].lexpr.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and ivars[:sh_op] == {:>> => :<<, :<< => :>>}[ivars[:inv_sh_op]] and
|
|
775
|
+
((ivars['amt'].kind_of?(::Integer) and ivars['inv_amt'].kind_of?(::Integer) and ampl = ivars['amt'] + ivars['inv_amt']) or
|
|
776
|
+
(ivars['amt'].kind_of? Expression and ivars['amt'].op == :% and ivars['amt'].rexpr.kind_of? ::Integer and
|
|
777
|
+
ivars['inv_amt'].kind_of? Expression and ivars['inv_amt'].op == :% and ivars['amt'].rexpr == ivars['inv_amt'].rexpr and ampl = ivars['amt'].rexpr))
|
|
778
|
+
if ivars[:sh_op] != vars[:sh_op]
|
|
779
|
+
# ensure the rotations are the same orientation
|
|
780
|
+
ivars[:sh_op], ivars[:inv_sh_op] = ivars[:inv_sh_op], ivars[:sh_op]
|
|
781
|
+
ivars['amt'], ivars['inv_amt'] = ivars['inv_amt'], ivars['amt']
|
|
782
|
+
end
|
|
783
|
+
amt = Expression[[vars['amt'], :+, ivars['amt']], :%, ampl]
|
|
784
|
+
invamt = Expression[[vars['inv_amt'], :+, ivars['inv_amt']], :%, ampl]
|
|
785
|
+
Expression[[[[ivars['var'], :&, mask], vars[:sh_op], amt], :|, [[ivars['var'], :&, mask], vars[:inv_sh_op], invamt]], :&, mask].reduce_rec
|
|
786
|
+
else
|
|
787
|
+
Expression[e, :&, mask]
|
|
788
|
+
end
|
|
789
|
+
end
|
|
790
|
+
|
|
791
|
+
# a pattern-matching method
|
|
792
|
+
# Expression[42, :+, 28].match(Expression['any', :+, 28], 'any') => {'any' => 42}
|
|
793
|
+
# Expression[42, :+, 28].match(Expression['any', :+, 'any'], 'any') => false
|
|
794
|
+
# Expression[42, :+, 42].match(Expression['any', :+, 'any'], 'any') => {'any' => 42}
|
|
795
|
+
# vars can match anything except nil
|
|
796
|
+
def match(target, *vars)
|
|
797
|
+
match_rec(target, vars.inject({}) { |h, v| h.update v => nil })
|
|
798
|
+
end
|
|
799
|
+
|
|
800
|
+
def match_rec(target, vars)
|
|
801
|
+
return false if not target.kind_of? Expression
|
|
802
|
+
[target.lexpr, target.op, target.rexpr].zip([@lexpr, @op, @rexpr]) { |targ, exp|
|
|
803
|
+
if targ and vars[targ]
|
|
804
|
+
return false if exp != vars[targ]
|
|
805
|
+
elsif targ and vars.has_key? targ
|
|
806
|
+
return false if not vars[targ] = exp
|
|
807
|
+
elsif targ.kind_of? ExpressionType
|
|
808
|
+
return false if not exp.kind_of? ExpressionType or not exp.match_rec(targ, vars)
|
|
809
|
+
else
|
|
810
|
+
return false if targ != exp
|
|
811
|
+
end
|
|
812
|
+
}
|
|
813
|
+
vars
|
|
814
|
+
end
|
|
815
|
+
|
|
816
|
+
# returns the array of non-numeric members of the expression
|
|
817
|
+
# if a variables appears 3 times, it will be present 3 times in the returned array
|
|
818
|
+
def externals
|
|
819
|
+
[@rexpr, @lexpr].inject([]) { |a, e|
|
|
820
|
+
case e
|
|
821
|
+
when ExpressionType; a.concat e.externals
|
|
822
|
+
when nil, ::Numeric; a
|
|
823
|
+
else a << e
|
|
824
|
+
end
|
|
825
|
+
}
|
|
826
|
+
end
|
|
827
|
+
|
|
828
|
+
# returns the externals that appears in the expression, does not walk through other ExpressionType
|
|
829
|
+
def expr_externals
|
|
830
|
+
[@rexpr, @lexpr].inject([]) { |a, e|
|
|
831
|
+
case e
|
|
832
|
+
when Expression; a.concat e.expr_externals
|
|
833
|
+
when nil, ::Numeric, ExpressionType; a
|
|
834
|
+
else a << e
|
|
835
|
+
end
|
|
836
|
+
}
|
|
837
|
+
end
|
|
838
|
+
|
|
839
|
+
def inspect
|
|
840
|
+
"Expression[#{@lexpr.inspect.sub(/^Expression/, '') + ', ' if @lexpr}#{@op.inspect + ', ' if @lexpr or @op != :+}#{@rexpr.inspect.sub(/^Expression/, '')}]"
|
|
841
|
+
end
|
|
842
|
+
|
|
843
|
+
Unknown = self[:unknown]
|
|
844
|
+
end
|
|
845
|
+
|
|
846
|
+
# an EncodedData relocation, specifies a value to patch in
|
|
847
|
+
class Relocation
|
|
848
|
+
# the relocation value (an Expression)
|
|
849
|
+
attr_accessor :target
|
|
850
|
+
# the relocation expression type
|
|
851
|
+
attr_accessor :type
|
|
852
|
+
# the endianness of the relocation
|
|
853
|
+
attr_accessor :endianness
|
|
854
|
+
|
|
855
|
+
include Backtrace
|
|
856
|
+
|
|
857
|
+
def initialize(target, type, endianness, backtrace = nil)
|
|
858
|
+
raise ArgumentError, "bad args #{[target, type, endianness].inspect}" if not target.kind_of? Expression or not type.kind_of? ::Symbol or not endianness.kind_of? ::Symbol
|
|
859
|
+
@target, @type, @endianness, @backtrace = target, type, endianness, backtrace
|
|
860
|
+
end
|
|
861
|
+
|
|
862
|
+
# fixup the encodeddata with value (reloc starts at off)
|
|
863
|
+
def fixup(edata, off, value)
|
|
864
|
+
str = Expression.encode_imm(value, @type, @endianness, @backtrace)
|
|
865
|
+
edata.fill off
|
|
866
|
+
edata.data[off, str.length] = str
|
|
867
|
+
end
|
|
868
|
+
|
|
869
|
+
# size of the relocation field, in bytes
|
|
870
|
+
def length
|
|
871
|
+
Expression::INT_SIZE[@type]/8
|
|
872
|
+
end
|
|
873
|
+
end
|
|
874
|
+
|
|
875
|
+
# a String-like, with export/relocation informations added
|
|
876
|
+
class EncodedData
|
|
877
|
+
# string with raw data
|
|
878
|
+
attr_accessor :data
|
|
879
|
+
# hash, key = offset within data, value = +Relocation+
|
|
880
|
+
attr_accessor :reloc
|
|
881
|
+
# hash, key = export name, value = offset within data - use add_export to update
|
|
882
|
+
attr_accessor :export
|
|
883
|
+
# hash, key = offset, value = 1st export name
|
|
884
|
+
attr_accessor :inv_export
|
|
885
|
+
# virtual size of data (all 0 by default, see +fill+)
|
|
886
|
+
attr_accessor :virtsize
|
|
887
|
+
# arbitrary pointer, often used when decoding immediates
|
|
888
|
+
# may be initialized with an export value
|
|
889
|
+
attr_reader :ptr # custom writer
|
|
890
|
+
def ptr=(p) @ptr = @export[p] || p end
|
|
891
|
+
|
|
892
|
+
# opts' keys in :reloc, :export, :virtsize, defaults to empty/empty/data.length
|
|
893
|
+
def initialize(data = '', opts={})
|
|
894
|
+
@data = data
|
|
895
|
+
@reloc = opts[:reloc] || {}
|
|
896
|
+
@export = opts[:export] || {}
|
|
897
|
+
@inv_export = @export.invert
|
|
898
|
+
@virtsize = opts[:virtsize] || @data.length
|
|
899
|
+
@ptr = 0
|
|
900
|
+
end
|
|
901
|
+
|
|
902
|
+
def add_export(label, off=@ptr, set_inv=false)
|
|
903
|
+
@export[label] = off
|
|
904
|
+
if set_inv or not @inv_export[off]
|
|
905
|
+
@inv_export[off] = label
|
|
906
|
+
end
|
|
907
|
+
end
|
|
908
|
+
|
|
909
|
+
def del_export(label, off=@ptr)
|
|
910
|
+
@export.delete label
|
|
911
|
+
if e = @export.index(off)
|
|
912
|
+
@inv_export[off] = e
|
|
913
|
+
else
|
|
914
|
+
@inv_export.delete off
|
|
915
|
+
end
|
|
916
|
+
end
|
|
917
|
+
|
|
918
|
+
# returns the size of raw data, that is [data.length, last relocation end].max
|
|
919
|
+
def rawsize
|
|
920
|
+
[@data.length, *@reloc.map { |off, rel| off + rel.length } ].max
|
|
921
|
+
end
|
|
922
|
+
# String-like
|
|
923
|
+
alias length virtsize
|
|
924
|
+
# String-like
|
|
925
|
+
alias size virtsize
|
|
926
|
+
|
|
927
|
+
def empty?
|
|
928
|
+
@virtsize == 0
|
|
929
|
+
end
|
|
930
|
+
|
|
931
|
+
def eos?
|
|
932
|
+
ptr.to_i >= @virtsize
|
|
933
|
+
end
|
|
934
|
+
|
|
935
|
+
# returns a copy of itself, with reloc/export duped (but not deep)
|
|
936
|
+
def dup
|
|
937
|
+
self.class.new @data.dup, :reloc => @reloc.dup, :export => @export.dup, :virtsize => @virtsize
|
|
938
|
+
end
|
|
939
|
+
|
|
940
|
+
# resolve relocations:
|
|
941
|
+
# calculate each reloc target using Expression#bind(binding)
|
|
942
|
+
# if numeric, replace the raw data with the encoding of this value (+fill+s preceding data if needed) and remove the reloc
|
|
943
|
+
# if replace_target is true, the reloc target is replaced with its bound counterpart
|
|
944
|
+
def fixup_choice(binding, replace_target)
|
|
945
|
+
@reloc.keys.each { |off|
|
|
946
|
+
val = @reloc[off].target.bind(binding).reduce
|
|
947
|
+
if val.kind_of? Integer
|
|
948
|
+
reloc = @reloc[off]
|
|
949
|
+
reloc.fixup(self, off, val)
|
|
950
|
+
@reloc.delete(off) # delete only if not overflowed
|
|
951
|
+
elsif replace_target
|
|
952
|
+
@reloc[off].target = val
|
|
953
|
+
end
|
|
954
|
+
}
|
|
955
|
+
end
|
|
956
|
+
|
|
957
|
+
# +fixup_choice+ binding, false
|
|
958
|
+
def fixup(binding)
|
|
959
|
+
fixup_choice(binding, false)
|
|
960
|
+
end
|
|
961
|
+
|
|
962
|
+
# +fixup_choice+ binding, true
|
|
963
|
+
def fixup!(binding)
|
|
964
|
+
fixup_choice(binding, true)
|
|
965
|
+
end
|
|
966
|
+
|
|
967
|
+
# returns a default binding suitable for use in +fixup+
|
|
968
|
+
# every export is expressed as base + offset
|
|
969
|
+
# base defaults to the first export name + its offset
|
|
970
|
+
def binding(base = nil)
|
|
971
|
+
if not base
|
|
972
|
+
key = @export.index(@export.values.min)
|
|
973
|
+
return {} if not key
|
|
974
|
+
base = (@export[key] == 0 ? key : Expression[key, :-, @export[key]])
|
|
975
|
+
end
|
|
976
|
+
@export.inject({}) { |binding, (n, o)| binding.update n => Expression.new(:+, o, base) }
|
|
977
|
+
end
|
|
978
|
+
|
|
979
|
+
# returns an array of variables that needs to be defined for a complete #fixup
|
|
980
|
+
# ie the list of externals for all relocations
|
|
981
|
+
def reloc_externals
|
|
982
|
+
@reloc.values.map { |r| r.target.externals }.flatten.uniq - @export.keys
|
|
983
|
+
end
|
|
984
|
+
|
|
985
|
+
# returns the offset where the relocation for target t is to be applied
|
|
986
|
+
def offset_of_reloc(t)
|
|
987
|
+
t = Expression[t]
|
|
988
|
+
@reloc.keys.find { |off| @reloc[off].target == t }
|
|
989
|
+
end
|
|
990
|
+
|
|
991
|
+
# fill virtual space by repeating pattern (String) up to len
|
|
992
|
+
# expand self if len is larger than self.virtsize
|
|
993
|
+
def fill(len = @virtsize, pattern = [0].pack('C'))
|
|
994
|
+
@virtsize = len if len > @virtsize
|
|
995
|
+
@data = @data.to_str.ljust(len, pattern) if len > @data.length
|
|
996
|
+
end
|
|
997
|
+
|
|
998
|
+
# rounds up virtsize to next multiple of len
|
|
999
|
+
def align(len, pattern=nil)
|
|
1000
|
+
@virtsize = EncodedData.align_size(@virtsize, len)
|
|
1001
|
+
fill(@virtsize, pattern) if pattern
|
|
1002
|
+
end
|
|
1003
|
+
|
|
1004
|
+
# returns the value val rounded up to next multiple of len
|
|
1005
|
+
def self.align_size(val, len)
|
|
1006
|
+
return val if len == 0
|
|
1007
|
+
((val + len - 1) / len).to_i * len
|
|
1008
|
+
end
|
|
1009
|
+
|
|
1010
|
+
# concatenation of another +EncodedData+ (or nil/Fixnum/anything supporting String#<<)
|
|
1011
|
+
def << other
|
|
1012
|
+
case other
|
|
1013
|
+
when nil
|
|
1014
|
+
when ::Fixnum
|
|
1015
|
+
fill
|
|
1016
|
+
@data = @data.to_str if not @data.kind_of? String
|
|
1017
|
+
@data << other
|
|
1018
|
+
@virtsize += 1
|
|
1019
|
+
when EncodedData
|
|
1020
|
+
fill if not other.data.empty?
|
|
1021
|
+
other.reloc.each { |k, v| @reloc[k + @virtsize] = v } if not other.reloc.empty?
|
|
1022
|
+
if not other.export.empty?
|
|
1023
|
+
other.export.each { |k, v|
|
|
1024
|
+
if @export[k] and @export[k] != v + @virtsize
|
|
1025
|
+
cf = (other.export.keys & @export.keys).find_all { |k_| other.export[k_] != @export[k_] - @virtsize }
|
|
1026
|
+
raise "edata merge: label conflict #{cf.inspect}"
|
|
1027
|
+
end
|
|
1028
|
+
@export[k] = v + @virtsize
|
|
1029
|
+
}
|
|
1030
|
+
other.inv_export.each { |k, v| @inv_export[@virtsize + k] = v }
|
|
1031
|
+
end
|
|
1032
|
+
if @data.empty?; @data = other.data.dup
|
|
1033
|
+
elsif not @data.kind_of?(String); @data = @data.to_str << other.data
|
|
1034
|
+
else @data << other.data
|
|
1035
|
+
end
|
|
1036
|
+
@virtsize += other.virtsize
|
|
1037
|
+
else
|
|
1038
|
+
fill
|
|
1039
|
+
if @data.empty?; @data = other.dup
|
|
1040
|
+
elsif not @data.kind_of?(String); @data = @data.to_str << other
|
|
1041
|
+
else @data << other
|
|
1042
|
+
end
|
|
1043
|
+
@virtsize += other.length
|
|
1044
|
+
end
|
|
1045
|
+
|
|
1046
|
+
self
|
|
1047
|
+
end
|
|
1048
|
+
|
|
1049
|
+
# equivalent to dup << other, filters out Integers & nil
|
|
1050
|
+
def + other
|
|
1051
|
+
raise ArgumentError if not other or other.kind_of?(Integer)
|
|
1052
|
+
dup << other
|
|
1053
|
+
end
|
|
1054
|
+
|
|
1055
|
+
# slice
|
|
1056
|
+
def [](from, len=nil)
|
|
1057
|
+
if not len and from.kind_of? Range
|
|
1058
|
+
b = from.begin
|
|
1059
|
+
e = from.end
|
|
1060
|
+
b = @export[b] if @export[b]
|
|
1061
|
+
e = @export[e] if @export[e]
|
|
1062
|
+
b = b + @virtsize if b < 0
|
|
1063
|
+
e = e + @virtsize if e < 0
|
|
1064
|
+
len = e - b
|
|
1065
|
+
len += 1 if not from.exclude_end?
|
|
1066
|
+
from = b
|
|
1067
|
+
end
|
|
1068
|
+
from = @export[from] if @export[from]
|
|
1069
|
+
from = from + @virtsize if from < 0
|
|
1070
|
+
return if from > @virtsize or from < 0
|
|
1071
|
+
|
|
1072
|
+
return @data[from] if not len
|
|
1073
|
+
len = @virtsize - from if from+len > @virtsize
|
|
1074
|
+
ret = EncodedData.new @data[from, len]
|
|
1075
|
+
ret.virtsize = len
|
|
1076
|
+
@reloc.each { |o, r|
|
|
1077
|
+
ret.reloc[o - from] = r if o >= from and o + r.length <= from+len
|
|
1078
|
+
}
|
|
1079
|
+
@export.each { |e_, o|
|
|
1080
|
+
ret.export[e_] = o - from if o >= from and o <= from+len # XXX include end ?
|
|
1081
|
+
}
|
|
1082
|
+
@inv_export.each { |o, e_|
|
|
1083
|
+
ret.inv_export[o-from] = e_ if o >= from and o <= from+len
|
|
1084
|
+
}
|
|
1085
|
+
ret
|
|
1086
|
+
end
|
|
1087
|
+
|
|
1088
|
+
# slice replacement, supports size change (shifts following relocs/exports)
|
|
1089
|
+
# discards old exports/relocs from the overwritten space
|
|
1090
|
+
def []=(from, len, val=nil)
|
|
1091
|
+
if not val
|
|
1092
|
+
val = len
|
|
1093
|
+
len = nil
|
|
1094
|
+
end
|
|
1095
|
+
if not len and from.kind_of? ::Range
|
|
1096
|
+
b = from.begin
|
|
1097
|
+
e = from.end
|
|
1098
|
+
b = @export[b] if @export[b]
|
|
1099
|
+
e = @export[e] if @export[e]
|
|
1100
|
+
b = b + @virtsize if b < 0
|
|
1101
|
+
e = e + @virtsize if e < 0
|
|
1102
|
+
len = e - b
|
|
1103
|
+
len += 1 if not from.exclude_end?
|
|
1104
|
+
from = b
|
|
1105
|
+
end
|
|
1106
|
+
from = @export[from] || from
|
|
1107
|
+
raise "invalid offset #{from}" if not from.kind_of? ::Integer
|
|
1108
|
+
from = from + @virtsize if from < 0
|
|
1109
|
+
|
|
1110
|
+
if not len
|
|
1111
|
+
val = val.chr if val.kind_of? ::Integer
|
|
1112
|
+
len = val.length
|
|
1113
|
+
end
|
|
1114
|
+
raise "invalid slice length #{len}" if not len.kind_of? ::Integer or len < 0
|
|
1115
|
+
|
|
1116
|
+
if from >= @virtsize
|
|
1117
|
+
len = 0
|
|
1118
|
+
elsif from+len > @virtsize
|
|
1119
|
+
len = @virtsize-from
|
|
1120
|
+
end
|
|
1121
|
+
|
|
1122
|
+
val = EncodedData.new << val
|
|
1123
|
+
|
|
1124
|
+
# remove overwritten metadata
|
|
1125
|
+
@export.delete_if { |name, off| off > from and off < from + len }
|
|
1126
|
+
@reloc.delete_if { |off, rel| off - rel.length > from and off < from + len }
|
|
1127
|
+
# shrink/grow
|
|
1128
|
+
if val.length != len
|
|
1129
|
+
diff = val.length - len
|
|
1130
|
+
@export.keys.each { |name| @export[name] = @export[name] + diff if @export[name] > from }
|
|
1131
|
+
@inv_export.keys.each { |off| @inv_export[off+diff] = @inv_export.delete(off) if off > from }
|
|
1132
|
+
@reloc.keys.each { |off| @reloc[off + diff] = @reloc.delete(off) if off > from }
|
|
1133
|
+
if @virtsize >= from+len
|
|
1134
|
+
@virtsize += diff
|
|
1135
|
+
end
|
|
1136
|
+
end
|
|
1137
|
+
|
|
1138
|
+
@virtsize = from + val.length if @virtsize < from + val.length
|
|
1139
|
+
|
|
1140
|
+
if from + len < @data.length # patch real data
|
|
1141
|
+
val.fill
|
|
1142
|
+
@data[from, len] = val.data
|
|
1143
|
+
elsif not val.data.empty? # patch end of real data
|
|
1144
|
+
@data << ([0].pack('C')*(from-@data.length)) if @data.length < from
|
|
1145
|
+
@data[from..-1] = val.data
|
|
1146
|
+
else # patch end of real data with fully virtual
|
|
1147
|
+
@data = @data[0, from]
|
|
1148
|
+
end
|
|
1149
|
+
val.export.each { |name, off| @export[name] = from + off }
|
|
1150
|
+
val.inv_export.each { |off, name| @inv_export[from+off] = name }
|
|
1151
|
+
val.reloc.each { |off, rel| @reloc[from + off] = rel }
|
|
1152
|
+
end
|
|
1153
|
+
|
|
1154
|
+
# replace a portion of self
|
|
1155
|
+
# from/to may be Integers (offsets) or labels (from self.export)
|
|
1156
|
+
# content is a String or an EncodedData, which will be inserted in the specified location (padded if necessary)
|
|
1157
|
+
# raise if the string does not fit in.
|
|
1158
|
+
def patch(from, to, content)
|
|
1159
|
+
from = @export[from] || from
|
|
1160
|
+
raise "invalid offset specification #{from}" if not from.kind_of? Integer
|
|
1161
|
+
to = @export[to] || to
|
|
1162
|
+
raise "invalid offset specification #{to}" if not to.kind_of? Integer
|
|
1163
|
+
raise EncodeError, 'cannot patch data: new content too long' if to - from < content.length
|
|
1164
|
+
self[from, content.length] = content
|
|
1165
|
+
end
|
|
1166
|
+
|
|
1167
|
+
# returns a list of offsets where /pat/ can be found inside @data
|
|
1168
|
+
# scan is done per chunk of chunksz bytes, with a margin for chunk-overlapping patterns
|
|
1169
|
+
# yields each offset found, and only include it in the result if the block returns !false
|
|
1170
|
+
def pattern_scan(pat, chunksz=nil, margin=nil)
|
|
1171
|
+
chunksz ||= 4*1024*1024 # scan 4MB at a time
|
|
1172
|
+
margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
|
|
1173
|
+
pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String
|
|
1174
|
+
|
|
1175
|
+
found = []
|
|
1176
|
+
chunkoff = 0
|
|
1177
|
+
while chunkoff < @data.length
|
|
1178
|
+
chunk = @data[chunkoff, chunksz+margin].to_str
|
|
1179
|
+
off = 0
|
|
1180
|
+
while match_off = (chunk[off..-1] =~ pat)
|
|
1181
|
+
break if off+match_off >= chunksz # match fully in margin
|
|
1182
|
+
match_addr = chunkoff + off + match_off
|
|
1183
|
+
found << match_addr if not block_given? or yield(match_addr)
|
|
1184
|
+
off += match_off + 1
|
|
1185
|
+
# XXX +1 or +lastmatch.length ?
|
|
1186
|
+
# 'aaaabc'.pattern_scan(/a*bc/) will match 5 times here
|
|
1187
|
+
end
|
|
1188
|
+
chunkoff += chunksz
|
|
1189
|
+
end
|
|
1190
|
+
found
|
|
1191
|
+
end
|
|
1192
|
+
end
|
|
1193
|
+
end
|