metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,327 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/opcodes'
8
+ require 'metasm/ia32/encode'
9
+ require 'metasm/parse'
10
+
11
+ module Metasm
12
+ class Ia32
13
+ class ModRM
14
+ # may return a SegReg
15
+ # must be called before SegReg parser (which could match only the seg part of a modrm)
16
+ def self.parse(lexer, otok, cpu)
17
+ tok = otok
18
+
19
+ # read operand size specifier
20
+ if tok and tok.type == :string and tok.raw =~ /^(?:byte|[dqo]?word|_(\d+)bits)$/
21
+ ptsz =
22
+ if $1
23
+ $1.to_i
24
+ else
25
+ case tok.raw
26
+ when 'byte'; 8
27
+ when 'word'; 16
28
+ when 'dword'; 32
29
+ when 'qword'; 64
30
+ when 'oword'; 128
31
+ else raise otok, 'mrm: bad ptr size'
32
+ end
33
+ end
34
+ lexer.skip_space
35
+ if tok = lexer.readtok and tok.type == :string and tok.raw == 'ptr'
36
+ lexer.skip_space
37
+ tok = lexer.readtok
38
+ end
39
+ end
40
+
41
+ # read segment selector
42
+ if tok and tok.type == :string and seg = SegReg.s_to_i[tok.raw]
43
+ lexer.skip_space
44
+ seg = SegReg.new(seg)
45
+ if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':'
46
+ raise otok, 'invalid modrm' if ptsz
47
+ lexer.unreadtok ntok
48
+ return seg
49
+ end
50
+ lexer.skip_space
51
+ tok = lexer.readtok
52
+ end
53
+
54
+ # ensure we have a modrm
55
+ if not tok or tok.type != :punct or tok.raw != '['
56
+ raise otok, 'invalid modrm' if ptsz or seg
57
+ return
58
+ end
59
+ lexer.skip_space_eol
60
+
61
+ # support fasm syntax [fs:eax] for segment selector
62
+ if tok = lexer.readtok and tok.type == :string and not seg and seg = SegReg.s_to_i[tok.raw]
63
+ raise otok, 'invalid modrm' if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':'
64
+ seg = SegReg.new(seg)
65
+ lexer.skip_space_eol
66
+ else
67
+ lexer.unreadtok tok
68
+ end
69
+
70
+ # read modrm content as generic expression
71
+ content = Expression.parse(lexer)
72
+ lexer.skip_space_eol
73
+ raise(otok, 'bad modrm') if not content or not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ']'
74
+
75
+ # converts matching externals to Regs in an expression
76
+ regify = lambda { |o|
77
+ case o
78
+ when Expression
79
+ o.lexpr = regify[o.lexpr]
80
+ o.rexpr = regify[o.rexpr]
81
+ o
82
+ when String
83
+ cpu.str_to_reg(o) || o
84
+ else o
85
+ end
86
+ }
87
+
88
+ s = i = b = imm = nil
89
+
90
+ # assigns the Regs in the expression to base or index field of the modrm
91
+ walker = lambda { |o|
92
+ case o
93
+ when nil
94
+ when Reg
95
+ if b
96
+ raise otok, 'mrm: too many regs' if i
97
+ i = o
98
+ s = 1
99
+ else
100
+ b = o
101
+ end
102
+ when Expression
103
+ if o.op == :* and (o.rexpr.kind_of? Reg or o.lexpr.kind_of? Reg)
104
+ # scaled index
105
+ raise otok, 'mrm: too many indexes' if i
106
+ s = o.lexpr
107
+ i = o.rexpr
108
+ s, i = i, s if s.kind_of? Reg
109
+ raise otok, 'mrm: bad scale' unless s.kind_of? Integer
110
+ elsif o.op == :+
111
+ # recurse
112
+ walker[o.lexpr]
113
+ walker[o.rexpr]
114
+ else
115
+ # found (a part of) the immediate
116
+ imm = Expression[imm, :+, o]
117
+ end
118
+ else
119
+ # found (a part of) the immediate
120
+ imm = Expression[imm, :+, o]
121
+ end
122
+ }
123
+
124
+ # do it
125
+ walker[regify[content.reduce]]
126
+
127
+ # ensure found immediate is really an immediate
128
+ raise otok, 'mrm: reg in imm' if imm.kind_of? Expression and not imm.externals.grep(Reg).empty?
129
+
130
+ # find default address size
131
+ adsz = b ? b.sz : i ? i.sz : nil
132
+ # ptsz may be nil now, will be fixed up later (in parse_instr_fixup) to match another instruction argument's size
133
+ new adsz, ptsz, s, i, b, imm, seg
134
+ end
135
+ end
136
+
137
+
138
+ # handles cpu-specific parser instruction, falls back to Ancestor's version if unknown keyword
139
+ # XXX changing the cpu size in the middle of the code may have baaad effects...
140
+ def parse_parser_instruction(lexer, instr)
141
+ case instr.raw.downcase
142
+ when '.mode', '.bits'
143
+ lexer.skip_space
144
+ if tok = lexer.readtok and tok.type == :string and (tok.raw == '16' or tok.raw == '32')
145
+ @size = tok.raw.to_i
146
+ lexer.skip_space
147
+ raise instr, 'syntax error' if ntok = lexer.nexttok and ntok.type != :eol
148
+ else
149
+ raise instr, 'invalid cpu mode'
150
+ end
151
+ else super(lexer, instr)
152
+ end
153
+ end
154
+
155
+ def parse_prefix(i, pfx)
156
+ # XXX check for redefinition ?
157
+ # implicit 'true' return value when assignment occur
158
+ i.prefix ||= {}
159
+ case pfx
160
+ when 'lock'; i.prefix[:lock] = true
161
+ when 'rep'; i.prefix[:rep] = 'rep'
162
+ when 'repe', 'repz'; i.prefix[:rep] = 'repz'
163
+ when 'repne', 'repnz'; i.prefix[:rep] = 'repnz'
164
+ when 'code16'; i.prefix[:sz] = 16
165
+ when 'code32'; i.prefix[:sz] = 32
166
+ end
167
+ end
168
+
169
+ def parse_argregclasslist
170
+ [Reg, SimdReg, SegReg, DbgReg, CtrlReg, FpReg]
171
+ end
172
+ def parse_modrm(lex, tok, cpu)
173
+ ModRM.parse(lex, tok, cpu)
174
+ end
175
+
176
+ # parses an arbitrary ia32 instruction argument
177
+ def parse_argument(lexer)
178
+ lexer = AsmPreprocessor.new(lexer) if lexer.kind_of? String
179
+
180
+ # reserved names (registers/segments etc)
181
+ @args_token ||= parse_argregclasslist.map { |a| a.s_to_i.keys }.flatten.inject({}) { |h, e| h.update e => true }
182
+
183
+ lexer.skip_space
184
+ return if not tok = lexer.readtok
185
+
186
+ if tok.type == :string and tok.raw == 'ST'
187
+ lexer.skip_space
188
+ if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == '('
189
+ lexer.skip_space
190
+ if not nntok = lexer.readtok or nntok.type != :string or nntok.raw !~ /^[0-9]$/ or
191
+ not ntok = (lexer.skip_space; lexer.readtok) or ntok.type != :punct or ntok.raw != ')'
192
+ raise tok, 'invalid FP register'
193
+ else
194
+ tok.raw << '(' << nntok.raw << ')'
195
+ fpr = parse_argregclasslist.last
196
+ if fpr.s_to_i.has_key? tok.raw
197
+ return fpr.new(fpr.s_to_i[tok.raw])
198
+ else
199
+ raise tok, 'invalid FP register'
200
+ end
201
+ end
202
+ else
203
+ lexer.unreadtok ntok
204
+ end
205
+ end
206
+
207
+ if ret = parse_modrm(lexer, tok, self)
208
+ ret
209
+ elsif @args_token[tok.raw]
210
+ parse_argregclasslist.each { |a|
211
+ return a.from_str(tok.raw) if a.s_to_i.has_key? tok.raw
212
+ }
213
+ raise tok, 'internal error'
214
+ else
215
+ lexer.unreadtok tok
216
+ expr = Expression.parse(lexer)
217
+ lexer.skip_space
218
+
219
+ # may be a farptr
220
+ if expr and ntok = lexer.readtok and ntok.type == :punct and ntok.raw == ':'
221
+ raise tok, 'invalid farptr' if not addr = Expression.parse(lexer)
222
+ Farptr.new expr, addr
223
+ else
224
+ lexer.unreadtok ntok
225
+ Expression[expr.reduce] if expr
226
+ end
227
+ end
228
+ end
229
+
230
+ # check if the argument matches the opcode's argument spec
231
+ def parse_arg_valid?(o, spec, arg)
232
+ if o.name == 'movsx' or o.name == 'movzx'
233
+ if not arg.kind_of? Reg and not arg.kind_of? ModRM
234
+ return
235
+ elsif not arg.sz
236
+ puts "ambiguous arg size for indirection in #{o.name}" if $VERBOSE
237
+ return
238
+ elsif spec == :reg # reg=dst, modrm=src (smaller)
239
+ return (arg.kind_of? Reg and arg.sz >= 16)
240
+ elsif o.props[:argsz]
241
+ return arg.sz == o.props[:argsz]
242
+ else
243
+ return arg.sz <= 16
244
+ end
245
+ end
246
+
247
+ return false if arg.kind_of? ModRM and arg.adsz and o.props[:adsz] and arg.adsz != o.props[:adsz]
248
+
249
+ cond = true
250
+ if s = o.props[:argsz] and (arg.kind_of? Reg or arg.kind_of? ModRM)
251
+ cond = (!arg.sz or arg.sz == s or spec == :reg_dx)
252
+ end
253
+
254
+ cond and
255
+ case spec
256
+ when :reg; arg.kind_of? Reg and (arg.sz >= 16 or o.props[:argsz])
257
+ when :modrm; (arg.kind_of? ModRM or arg.kind_of? Reg) and (!arg.sz or arg.sz >= 16 or o.props[:argsz])
258
+ when :i; arg.kind_of? Expression
259
+ when :imm_val1; arg.kind_of? Expression and arg.reduce == 1
260
+ when :imm_val3; arg.kind_of? Expression and arg.reduce == 3
261
+ when :reg_eax; arg.kind_of? Reg and arg.val == 0
262
+ when :reg_cl; arg.kind_of? Reg and arg.val == 1 and arg.sz == 8
263
+ when :reg_dx; arg.kind_of? Reg and arg.val == 2 and arg.sz == 16
264
+ when :seg3; arg.kind_of? SegReg
265
+ when :seg3A; arg.kind_of? SegReg and arg.val > 3
266
+ when :seg2; arg.kind_of? SegReg and arg.val < 4
267
+ when :seg2A; arg.kind_of? SegReg and arg.val < 4 and arg.val != 1
268
+ when :eeec; arg.kind_of? CtrlReg
269
+ when :eeed; arg.kind_of? DbgReg
270
+ when :modrmA; arg.kind_of? ModRM
271
+ when :mrm_imm; arg.kind_of? ModRM and not arg.s and not arg.i and not arg.b
272
+ when :farptr; arg.kind_of? Farptr
273
+ when :regfp; arg.kind_of? FpReg
274
+ when :regfp0; arg.kind_of? FpReg and (arg.val == nil or arg.val == 0)
275
+ when :modrmmmx; arg.kind_of? ModRM or (arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx])))
276
+ when :regmmx; arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx]))
277
+ when :modrmxmm; arg.kind_of? ModRM or (arg.kind_of? SimdReg and arg.sz == 128)
278
+ when :regxmm; arg.kind_of? SimdReg and arg.sz == 128
279
+ when :i8, :u8, :u16
280
+ arg.kind_of? Expression and
281
+ (o.props[:setip] or Expression.in_range?(arg, spec) != false) # true or nil allowed
282
+ # jz 0x28282828 may fit in :i8 depending on instr addr
283
+ else raise EncodeError, "Internal error: unknown argument specification #{spec.inspect}"
284
+ end
285
+ end
286
+
287
+ def parse_instruction_checkproto(i)
288
+ case i.opname
289
+ when 'imul'
290
+ if i.args.length == 2 and i.args.first.kind_of? Reg and i.args.last.kind_of? Expression
291
+ i.args.unshift i.args.first.dup
292
+ end
293
+ end
294
+ super(i)
295
+ end
296
+
297
+ # fixup the sz of a modrm argument, defaults to other argument size or current cpu mode
298
+ def parse_instruction_fixup(i)
299
+ if m = i.args.grep(ModRM).first and not m.sz
300
+ if i.opname == 'movzx' or i.opname == 'movsx'
301
+ m.sz = 8
302
+ else
303
+ if r = i.args.grep(Reg).first
304
+ m.sz = r.sz
305
+ elsif opcode_list_byname[i.opname].all? { |o| o.props[:argsz] }
306
+ m.sz = opcode_list_byname[i.opname].first.props[:argsz]
307
+ else
308
+ # this is also the size of ctrlreg/dbgreg etc
309
+ # XXX fpu/simd ?
310
+ m.sz = i.prefix[:sz] || @size
311
+ end
312
+ end
313
+ end
314
+ if m and not m.adsz
315
+ if opcode_list_byname[i.opname].all? { |o| o.props[:adsz] }
316
+ m.adsz = opcode_list_byname[i.opname].first.props[:adsz]
317
+ else
318
+ m.adsz = i.prefix[:sz] || @size
319
+ end
320
+ end
321
+ end
322
+
323
+ def instr_uncond_jump_to(target)
324
+ parse_instruction("jmp #{target}")
325
+ end
326
+ end
327
+ end
@@ -0,0 +1,91 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/opcodes'
8
+ require 'metasm/render'
9
+
10
+ # XXX move context in another file ?
11
+ module Metasm
12
+ class Ia32
13
+ class Argument
14
+ include Renderable
15
+ end
16
+
17
+ [SegReg, DbgReg, CtrlReg, FpReg].each { |c| c.class_eval {
18
+ def render ; [self.class.i_to_s[@val]] end
19
+ } }
20
+ [Reg, SimdReg].each { |c| c.class_eval {
21
+ def render ; [self.class.i_to_s[@sz][@val]] end
22
+ def context ; {'set sz' => lambda { |s| @sz = s }} end
23
+ } }
24
+
25
+ class Farptr
26
+ def render
27
+ [@seg, ':', @addr]
28
+ end
29
+ end
30
+
31
+ class ModRM
32
+ def qualifier(sz)
33
+ {
34
+ 8 => 'byte',
35
+ 16 => 'word',
36
+ 32 => 'dword',
37
+ 64 => 'qword',
38
+ 128 => 'oword'
39
+ }.fetch(sz) { |k| "_#{sz}bits" }
40
+ end
41
+
42
+ attr_accessor :instruction
43
+ def render
44
+ r = []
45
+ r << ( qualifier(@sz) << ' ptr ' ) if @sz and (not instruction or not @instruction.args.find { |a| a.kind_of? Reg and a.sz == @sz })
46
+ r << @seg << ':' if seg
47
+
48
+ e = nil
49
+ e = Expression[e, :+, @b] if b
50
+ e = Expression[e, :+, @imm] if imm
51
+ e = Expression[e, :+, (@s == 1 ? @i : [@s, :*, @i])] if s
52
+ r << '[' << e << ']'
53
+ end
54
+
55
+ def context
56
+ {'set targetsz' => lambda { |s| @sz = s },
57
+ 'set seg' => lambda { |s| @seg = Seg.new s }}
58
+ end
59
+ end
60
+
61
+ def render_instruction(i)
62
+ r = []
63
+ r << 'lock ' if i.prefix and i.prefix[:lock]
64
+ r << i.prefix[:rep] << ' ' if i.prefix and i.prefix[:rep]
65
+ r << i.opname
66
+ i.args.each { |a|
67
+ a.instruction = i if a.kind_of? ModRM
68
+ r << (r.last == i.opname ? ' ' : ', ') << a
69
+ }
70
+ r
71
+ end
72
+
73
+ def instruction_context(i)
74
+ # XXX
75
+ h = {}
76
+ op = opcode_list_byname[i.opname].first
77
+ if i.prefix and i.prefix[:rep]
78
+ h['toogle repz'] = lambda { i.prefix[:rep] = {'repnz' => 'repz', 'repz' => 'repnz'}[i.prefix[:rep]] } if op.props[:stropz]
79
+ h['rm rep'] = lambda { i.prefix.delete :rep }
80
+ else
81
+ h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'rep' } if op.props[:strop]
82
+ h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'repz' } if op.props[:stropz]
83
+ end
84
+ if i.args.find { |a| a.kind_of? ModRM and a.seg }
85
+ h['rm seg'] = lambda { i.args.find { |a| a.kind_of? ModRM and a.seg }.seg = nil }
86
+ end
87
+ h['toggle lock'] = lambda { (i.prefix ||= {})[:lock] = !i.prefix[:lock] }
88
+ h
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,1193 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ module Metasm
8
+
9
+ VERSION = 0x0001 # major major minor minor
10
+
11
+ # superclass for all metasm exceptions
12
+ class Exception < RuntimeError ; end
13
+ # parse error
14
+ class ParseError < Exception ; end
15
+ # invalid exeformat signature
16
+ class InvalidExeFormat < Exception ; end
17
+ # cannot honor .offset specification, reloc fixup overflow
18
+ class EncodeError < Exception ; end
19
+
20
+ # holds context of a processor
21
+ # endianness, current mode, opcode list...
22
+ class CPU
23
+ attr_accessor :valid_args, :valid_props, :fields_mask
24
+ attr_accessor :endianness, :size
25
+ attr_accessor :generate_PIC
26
+
27
+ def opcode_list
28
+ @opcode_list ||= init_opcode_list
29
+ end
30
+ def opcode_list=(l) @opcode_list = l end
31
+
32
+ def initialize
33
+ @fields_mask = {}
34
+ @fields_shift= {}
35
+ @valid_args = []
36
+ @valid_props = [:setip, :saveip, :stopexec]
37
+ @generate_PIC = true
38
+ end
39
+
40
+ # returns a hash opcode_name => array of opcodes with this name
41
+ def opcode_list_byname
42
+ @opcode_list_byname ||= opcode_list.inject({}) { |h, o| (h[o.name] ||= []) << o ; h }
43
+ end
44
+
45
+ # sets up the C parser : standard macro definitions, type model (size of int etc)
46
+ def tune_cparser(cp)
47
+ case @size
48
+ when 64; cp.lp64
49
+ when 32; cp.ilp32
50
+ when 16; cp.ilp16
51
+ end
52
+ cp.endianness = @endianness
53
+ cp.lexer.define_weak('_STDC', 1)
54
+ # TODO gcc -dM -E - </dev/null
55
+ tune_prepro(cp.lexer)
56
+ end
57
+
58
+ def tune_prepro(pp)
59
+ # TODO pp.define('BIGENDIAN')
60
+ end
61
+
62
+ # return a new AsmPreprocessor
63
+ def new_asmprepro(str='', exe=nil)
64
+ pp = AsmPreprocessor.new(str, exe)
65
+ tune_prepro(pp)
66
+ exe.tune_prepro(pp) if exe
67
+ pp
68
+ end
69
+
70
+ # returns a new & tuned C::Parser
71
+ def new_cparser
72
+ C::Parser.new(self)
73
+ end
74
+
75
+ # returns a new C::Compiler
76
+ def new_ccompiler(parser, exe=ExeFormat.new)
77
+ exe.cpu = self if not exe.instance_variable_get("@cpu")
78
+ C::Compiler.new(parser, exe)
79
+ end
80
+
81
+ def shortname
82
+ self.class.name.sub(/.*::/, '').downcase
83
+ end
84
+ end
85
+
86
+ # generic CPU, with no instructions, just size/endianness
87
+ class UnknownCPU < CPU
88
+ def initialize(size, endianness)
89
+ super()
90
+ @size, @endianness = size, endianness
91
+ end
92
+ end
93
+
94
+ # a cpu instruction 'formal' description
95
+ class Opcode
96
+ # the name of the instruction
97
+ attr_accessor :name
98
+ # formal description of arguments (array of cpu-specific symbols)
99
+ attr_accessor :args
100
+ # binary encoding of the opcode (integer for risc, array of bytes for cisc)
101
+ attr_accessor :bin
102
+ # list of bit fields in the binary encoding
103
+ # hash position => field
104
+ # position is bit shift for risc, [byte index, bit shift] for risc
105
+ # field is cpu-specific
106
+ attr_accessor :fields
107
+ # hash of opcode generic properties/restrictions (mostly property => true/false)
108
+ attr_accessor :props
109
+ # binary mask for decoding
110
+ attr_accessor :bin_mask
111
+
112
+ def initialize(name, bin=nil)
113
+ @name = name
114
+ @bin = bin
115
+ @args = []
116
+ @fields = {}
117
+ @props = {}
118
+ end
119
+
120
+ def basename
121
+ @name.sub(/\..*/, '')
122
+ end
123
+ end
124
+
125
+ # defines an attribute self.backtrace (array of filename/lineno)
126
+ # and a method backtrace_str which dumps this array to a human-readable form
127
+ module Backtrace
128
+ # array [file, lineno, file, lineno]
129
+ # if file 'A' does #include 'B' you'll get ['A', linenoA, 'B', linenoB]
130
+ attr_accessor :backtrace
131
+
132
+ # builds a readable string from self.backtrace
133
+ def backtrace_str
134
+ Backtrace.backtrace_str(@backtrace)
135
+ end
136
+
137
+ # builds a readable backtrace string from an array of [file, lineno, file, lineno, ..]
138
+ def self.backtrace_str(ary)
139
+ return '' if not ary
140
+ i = ary.length
141
+ bt = ''
142
+ while i > 0
143
+ bt << ",\n\tincluded from " if ary[i]
144
+ i -= 2
145
+ bt << "#{ary[i].inspect} line #{ary[i+1]}"
146
+ end
147
+ bt
148
+ end
149
+
150
+ def exception(msg='syntax error')
151
+ ParseError.new "at #{backtrace_str}: #{msg}"
152
+ end
153
+ end
154
+
155
+ # an instruction: opcode name + arguments
156
+ class Instruction
157
+ # arguments (cpu-specific objects)
158
+ attr_accessor :args
159
+ # hash of prefixes (unused in simple cpus)
160
+ attr_accessor :prefix
161
+ # name of the associated opcode
162
+ attr_accessor :opname
163
+ # reference to the cpu which issued this instruction (used for rendering)
164
+ attr_accessor :cpu
165
+
166
+ include Backtrace
167
+
168
+ def initialize(cpu, opname=nil, args=[], pfx=nil, backtrace=nil)
169
+ @cpu = cpu
170
+ @opname = opname
171
+ @args = args
172
+ @prefix = pfx if pfx
173
+ @backtrace = backtrace
174
+ end
175
+
176
+ # duplicates the argument list and prefix hash
177
+ def dup
178
+ Instruction.new(@cpu, (@opname.dup if opname), @args.dup, (@prefix.dup if prefix), (@backtrace.dup if backtrace))
179
+ end
180
+ end
181
+
182
+ # all kind of data description (including repeated/uninitialized)
183
+ class Data
184
+ # maps data type to Expression parameters (signedness/bit size)
185
+ INT_TYPE = {'db' => :a8, 'dw' => :a16, 'dd' => :a32, 'dq' => :a64}
186
+
187
+ # an Expression, an Array of Data, a String, or :uninitialized
188
+ attr_accessor :data
189
+ # the data type, from INT_TYPE (TODO store directly Expression parameters ?)
190
+ attr_accessor :type
191
+ # the repetition count of the data parameter (dup constructs)
192
+ attr_accessor :count
193
+
194
+ include Backtrace
195
+
196
+ def initialize(type, data, count=1, backtrace=nil)
197
+ @data, @type, @count, @backtrace = data, type, count, backtrace
198
+ end
199
+ end
200
+
201
+ # a name for a location
202
+ class Label
203
+ attr_accessor :name
204
+
205
+ include Backtrace
206
+
207
+ def initialize(name, backtrace=nil)
208
+ @name, @backtrace = name, backtrace
209
+ end
210
+ end
211
+
212
+ # alignment directive
213
+ class Align
214
+ # the size to align to
215
+ attr_accessor :val
216
+ # the Data used to pad
217
+ attr_accessor :fillwith
218
+
219
+ include Backtrace
220
+
221
+ def initialize(val, fillwith=nil, backtrace=nil)
222
+ @val, @fillwith, @backtrace = val, fillwith, backtrace
223
+ end
224
+ end
225
+
226
+ # padding directive
227
+ class Padding
228
+ # Data used to pad
229
+ attr_accessor :fillwith
230
+
231
+ include Backtrace
232
+
233
+ def initialize(fillwith=nil, backtrace=nil)
234
+ @fillwith, @backtrace = fillwith, backtrace
235
+ end
236
+ end
237
+
238
+ # offset directive
239
+ # can be used to fix padding length or to assert some code/data compiled length
240
+ class Offset
241
+ # the assembler will arrange to make this pseudo-instruction
242
+ # be at this offset from beginning of current section
243
+ attr_accessor :val
244
+
245
+ include Backtrace
246
+
247
+ def initialize(val, backtrace=nil)
248
+ @val, @backtrace = val, backtrace
249
+ end
250
+ end
251
+
252
+ # the superclass of all real executable formats
253
+ # main methods:
254
+ # self.decode(str) => decodes the file format (imports/relocs/etc), no asm disassembly
255
+ # parse(source) => parses assembler source, fills self.source
256
+ # assemble => assembles self.source in binary sections/segments/whatever
257
+ # encode => builds imports/relocs tables, put all this together, links everything in self.encoded
258
+ class ExeFormat
259
+ # array of Data/Instruction/Align/Padding/Offset/Label, populated in parse
260
+ attr_accessor :cursource
261
+ # contains the binary version of the compiled program (EncodedData)
262
+ attr_accessor :encoded
263
+ # hash of labels generated by new_label
264
+ attr_accessor :unique_labels_cache
265
+
266
+ # initializes self.cpu, creates an empty self.encoded
267
+ def initialize(cpu=nil)
268
+ @cpu = cpu
269
+ @encoded = EncodedData.new
270
+ @unique_labels_cache = {}
271
+ end
272
+
273
+ attr_writer :cpu # custom reader
274
+ def cpu
275
+ @cpu ||= cpu_from_headers
276
+ end
277
+
278
+ # return the label name corresponding to the specified offset of the encodeddata, creates it if necessary
279
+ def label_at(edata, offset, base = '')
280
+ if not l = edata.inv_export[offset]
281
+ edata.add_export(l = new_label(base), offset)
282
+ end
283
+ l
284
+ end
285
+
286
+ # creates a new label, that is guaranteed to never be returned again as long as this object (ExeFormat) exists
287
+ def new_label(base = '')
288
+ base = base.dup.tr('^a-zA-Z0-9_', '_')
289
+ # use %x instead of to_s(16) for negative values
290
+ base = (base << '_uuid' << ('%08x' % base.object_id)).freeze if base.empty? or @unique_labels_cache[base]
291
+ @unique_labels_cache[base] = true
292
+ base
293
+ end
294
+
295
+ # share self.unique_labels_cache with other, checks for conflicts, returns self
296
+ def share_namespace(other)
297
+ return self if other.unique_labels_cache.equal? @unique_labels_cache
298
+ raise "share_ns #{(other.unique_labels_cache.keys & @unique_labels_cache.keys).inspect}" if !(other.unique_labels_cache.keys & @unique_labels_cache.keys).empty?
299
+ @unique_labels_cache.update other.unique_labels_cache
300
+ other.unique_labels_cache = @unique_labels_cache
301
+ self
302
+ end
303
+ end
304
+
305
+ # superclass for classes similar to Expression
306
+ # must define #bind, #reduce_rec, #match_rec, #externals
307
+ class ExpressionType
308
+ def +(o) Expression[self, :+, o].reduce end
309
+ def -(o) Expression[self, :-, o].reduce end
310
+ end
311
+
312
+ # handle immediate values, and arbitrary arithmetic/logic expression involving variables
313
+ # boolean values are treated as in C : true is 1, false is 0
314
+ # TODO replace #type with #size => bits + #type => [:signed/:unsigned/:any/:floating]
315
+ # TODO handle floats
316
+ class Expression < ExpressionType
317
+ INT_SIZE = {}
318
+ INT_MIN = {}
319
+ INT_MAX = {}
320
+
321
+ [8, 16, 32, 64].each { |sz|
322
+ INT_SIZE["i#{sz}".to_sym] =
323
+ INT_SIZE["u#{sz}".to_sym] =
324
+ INT_SIZE["a#{sz}".to_sym] = sz
325
+
326
+ INT_MIN["a#{sz}".to_sym] =
327
+ INT_MIN["i#{sz}".to_sym] = -(1 << (sz-1)) # -0x8000
328
+ INT_MIN["u#{sz}".to_sym] = 0
329
+
330
+ INT_MAX["i#{sz}".to_sym] = (1 << (sz-1)) - 1 # 0x7fff
331
+ INT_MAX["a#{sz}".to_sym] =
332
+ INT_MAX["u#{sz}".to_sym] = (1 << sz) - 1 # 0xffff
333
+ }
334
+
335
+ # alternative constructor
336
+ # in operands order, and allows nesting using sub-arrays
337
+ # ex: Expression[[:-, 42], :*, [1, :+, [4, :*, 7]]]
338
+ # with a single argument, return it if already an Expression, else construct a new one (using unary +/-)
339
+ def self.[](l, op=nil, r=nil)
340
+ if not r # need to shift args
341
+ if not op
342
+ raise ArgumentError, 'invalid Expression[nil]' if not l
343
+ return l if l.kind_of? Expression
344
+ if l.kind_of? ::Numeric and l < 0
345
+ r = -l
346
+ op = :'-'
347
+ else
348
+ r = l
349
+ op = :'+'
350
+ end
351
+ else
352
+ r = op
353
+ op = l
354
+ end
355
+ l = nil
356
+ else
357
+ l = self[*l] if l.kind_of? ::Array
358
+ end
359
+ r = self[*r] if r.kind_of? ::Array
360
+ new(op, r, l)
361
+ end
362
+
363
+ # checks if a given Expression/Integer is in the type range
364
+ # returns true if it is, false if it overflows, and nil if cannot be determined (eg unresolved variable)
365
+ def self.in_range?(val, type)
366
+ val = val.reduce if val.kind_of? self
367
+ return unless val.kind_of? ::Numeric
368
+
369
+ if INT_MIN[type]
370
+ val == val.to_i and
371
+ val >= INT_MIN[type] and val <= INT_MAX[type]
372
+ end
373
+ end
374
+
375
+ # casts an unsigned value to a two-complement signed if the sign bit is set
376
+ def self.make_signed(val, bitlength)
377
+ if val.kind_of? Integer
378
+ val = val - (1 << bitlength) if val >> (bitlength - 1) == 1
379
+ end
380
+ val
381
+ end
382
+
383
+ # the operator (symbol)
384
+ attr_accessor :op
385
+ # the lefthandside expression (nil for unary expressions)
386
+ attr_accessor :lexpr
387
+ # the righthandside expression
388
+ attr_accessor :rexpr
389
+
390
+ # basic constructor
391
+ # XXX funny args order, you should use +Expression[]+ instead
392
+ def initialize(op, rexpr, lexpr)
393
+ raise ArgumentError, "Expression: invalid arg order: #{[lexpr, op, rexpr].inspect}" if not op.kind_of? ::Symbol
394
+ @op, @lexpr, @rexpr = op, lexpr, rexpr
395
+ end
396
+
397
+ # recursive check of equity using #==
398
+ # will not match 1+2 and 2+1
399
+ def ==(o)
400
+ # shortcircuit recursion
401
+ o.object_id == object_id or (o.kind_of?(Expression) and @op == o.op and @lexpr == o.lexpr and @rexpr == o.rexpr)
402
+ end
403
+
404
+ # make it useable as Hash key (see +==+)
405
+ def hash
406
+ (@lexpr.hash + @op.hash + @rexpr.hash) & 0x7fff_ffff
407
+ end
408
+ alias eql? ==
409
+
410
+ # returns a new Expression with all variables found in the binding replaced with their value
411
+ # does not check the binding's key class except for numeric
412
+ # calls lexpr/rexpr #bind if they respond_to? it
413
+ def bind(binding = {})
414
+ if binding[self]
415
+ return binding[self].dup
416
+ end
417
+
418
+ l, r = @lexpr, @rexpr
419
+ if l and binding[l]
420
+ raise "internal error - bound #{l.inspect}" if l.kind_of? ::Numeric
421
+ l = binding[l]
422
+ elsif l.kind_of? ExpressionType
423
+ l = l.bind(binding)
424
+ end
425
+ if r and binding[r]
426
+ raise "internal error - bound #{r.inspect}" if r.kind_of? ::Numeric
427
+ r = binding[r]
428
+ elsif r.kind_of? ExpressionType
429
+ r = r.bind(binding)
430
+ end
431
+ Expression[l, @op, r]
432
+ end
433
+
434
+ # bind in place (replace self.lexpr/self.rexpr with the binding value)
435
+ # only recurse with Expressions (does not use respond_to?)
436
+ def bind!(binding = {})
437
+ if @lexpr.kind_of?(Expression)
438
+ @lexpr.bind!(binding)
439
+ elsif @lexpr
440
+ @lexpr = binding[@lexpr] || @lexpr
441
+ end
442
+ if @rexpr.kind_of?(Expression)
443
+ @rexpr.bind!(binding)
444
+ elsif @rexpr
445
+ @rexpr = binding[@rexpr] || @rexpr
446
+ end
447
+ self
448
+ end
449
+
450
+ # reduce_lambda is a callback called after the standard reduction procedure for custom algorithms
451
+ # the lambda may return a new expression or nil (to keep the old expr)
452
+ # exemple: lambda { |e| e.lexpr if e.kind_of? Expression and e.op == :& and e.rexpr == 0xffff_ffff }
453
+ # returns old lambda
454
+ def self.reduce_lambda(&b)
455
+ old = @@reduce_lambda
456
+ @@reduce_lambda = b if block_given?
457
+ old
458
+ end
459
+ def self.reduce_lambda=(p)
460
+ @@reduce_lambda = p
461
+ end
462
+ @@reduce_lambda = nil
463
+
464
+ # returns a simplified copy of self
465
+ # can return an +Expression+ or a +Numeric+, may return self
466
+ # see +reduce_rec+ for simplifications description
467
+ # if given a block, it will temporarily overwrite the global @@reduce_lambda XXX THIS IS NOT THREADSAFE
468
+ def reduce(&b)
469
+ old_rp, @@reduce_lambda = @@reduce_lambda, b if b
470
+ case e = reduce_rec
471
+ when Expression, Numeric; e
472
+ else Expression[e]
473
+ end
474
+ ensure
475
+ @@reduce_lambda = old_rp if b
476
+ end
477
+
478
+ # resolves logic operations (true || false, etc)
479
+ # computes numeric operations (1 + 3)
480
+ # expands substractions to addition of the opposite
481
+ # reduces double-oppositions (-(-1) => 1)
482
+ # reduces addition of 0 and unary +
483
+ # canonicalize additions: put variables in the lhs, descend addition tree in the rhs => (a + (b + (c + 12)))
484
+ # make formal reduction if finds somewhere in addition tree (a) and (-a)
485
+ def reduce_rec
486
+ l = @lexpr.kind_of?(ExpressionType) ? @lexpr.reduce_rec : @lexpr
487
+ r = @rexpr.kind_of?(ExpressionType) ? @rexpr.reduce_rec : @rexpr
488
+
489
+ if @@reduce_lambda
490
+ l = @@reduce_lambda[l] || l if not @lexpr.kind_of? Expression
491
+ r = @@reduce_lambda[r] || r if not @rexpr.kind_of? Expression
492
+ end
493
+
494
+ v =
495
+ if r.kind_of?(::Numeric) and (l == nil or l.kind_of?(::Numeric))
496
+ # calculate numerics
497
+ if [:'&&', :'||', :'>', :'<', :'>=', :'<=', :'==', :'!='].include?(@op)
498
+ # bool expr
499
+ raise 'internal error' if not l
500
+ case @op
501
+ when :'&&'; (l != 0) && (r != 0)
502
+ when :'||'; (l != 0) || (r != 0)
503
+ when :'>' ; l > r
504
+ when :'>='; l >= r
505
+ when :'<' ; l < r
506
+ when :'<='; l <= r
507
+ when :'=='; l == r
508
+ when :'!='; l != r
509
+ end ? 1 : 0
510
+ elsif not l
511
+ case @op
512
+ when :'!'; (r == 0) ? 1 : 0
513
+ when :+; r
514
+ when :-; -r
515
+ when :~; ~r
516
+ end
517
+ else
518
+ # use ruby evaluator
519
+ l.send(@op, r)
520
+ end
521
+
522
+ elsif @op == :'&&'
523
+ if l == 0 # shortcircuit eval
524
+ 0
525
+ elsif l == 1
526
+ Expression[r, :'!=', 0].reduce_rec
527
+ elsif r == 0
528
+ 0 # XXX l could be a special ExprType with sideeffects ?
529
+ end
530
+ elsif @op == :'||'
531
+ if l.kind_of? ::Numeric and l != 0 # shortcircuit eval
532
+ 1
533
+ elsif l == 0
534
+ Expression[r, :'!=', 0].reduce_rec
535
+ elsif r == 0
536
+ Expression[l, :'!=', 0].reduce_rec
537
+ end
538
+ elsif @op == :>> or @op == :<<
539
+ if l == 0; 0
540
+ elsif r == 0; l
541
+ elsif l.kind_of? Expression and l.op == @op
542
+ Expression[l.lexpr, @op, [l.rexpr, :+, r]].reduce_rec
543
+ # XXX (a >> 1) << 1 != a (lose low bit)
544
+ # XXX (a << 1) >> 1 != a (with real cpus, lose high bit)
545
+ # (a | b) << i
546
+ elsif r.kind_of? Integer and l.kind_of? Expression and [:&, :|, :^].include? l.op
547
+ Expression[[l.lexpr, @op, r], l.op, [l.rexpr, @op, r]].reduce_rec
548
+ end
549
+ elsif @op == :'!'
550
+ if r.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[r.op]
551
+ Expression[r.lexpr, op, r.rexpr].reduce_rec
552
+ end
553
+ elsif @op == :==
554
+ if l == r; 1
555
+ elsif r == 0 and l.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[l.op]
556
+ Expression[l.lexpr, op, l.rexpr].reduce_rec
557
+ elsif r == 1 and l.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[l.op]
558
+ l
559
+ elsif r == 0 and l.kind_of? Expression and l.op == :+
560
+ if l.rexpr.kind_of? Expression and l.rexpr.op == :- and not l.rexpr.lexpr
561
+ Expression[l.lexpr, @op, l.rexpr.rexpr].reduce_rec
562
+ elsif l.rexpr.kind_of? ::Integer
563
+ Expression[l.lexpr, @op, -l.rexpr].reduce_rec
564
+ end
565
+ end
566
+ elsif @op == :'!='
567
+ if l == r; 0
568
+ end
569
+ elsif @op == :^
570
+ if l == :unknown or r == :unknown; :unknown
571
+ elsif l == 0; r
572
+ elsif r == 0; l
573
+ elsif l == r; 0
574
+ elsif r == 1 and l.kind_of? Expression and [:'==', :'!=', :<, :>, :<=, :>=].include? l.op
575
+ Expression[nil, :'!', l].reduce_rec
576
+ elsif l.kind_of?(::Numeric)
577
+ if r.kind_of? Expression and r.op == :^
578
+ # 1^(x^y) => x^(y^1)
579
+ Expression[r.lexpr, :^, [r.rexpr, :^, l]].reduce_rec
580
+ else
581
+ # 1^a => a^1
582
+ Expression[r, :^, l].reduce_rec
583
+ end
584
+ elsif l.kind_of? Expression and l.op == :^
585
+ # (a^b)^c => a^(b^c)
586
+ Expression[l.lexpr, :^, [l.rexpr, :^, r]].reduce_rec
587
+ elsif r.kind_of? Expression and r.op == :^
588
+ if r.rexpr == l
589
+ # a^(a^b) => b
590
+ r.lexpr
591
+ elsif r.lexpr == l
592
+ # a^(b^a) => b
593
+ r.rexpr
594
+ else
595
+ # a^(b^(c^(a^d))) => b^(a^(c^(a^d)))
596
+ # XXX ugly..
597
+ tr = r
598
+ found = false
599
+ while not found and tr.kind_of?(Expression) and tr.op == :^
600
+ found = true if tr.lexpr == l or tr.rexpr == l
601
+ tr = tr.rexpr
602
+ end
603
+ if found
604
+ Expression[r.lexpr, :^, [l, :^, r.rexpr]].reduce_rec
605
+ end
606
+ end
607
+ elsif l.kind_of?(Expression) and l.op == :& and l.rexpr.kind_of?(::Integer) and (l.rexpr & (l.rexpr+1)) == 0
608
+ if r.kind_of?(::Integer) and r & l.rexpr == r
609
+ # (a&0xfff)^12 => (a^12)&0xfff
610
+ Expression[[l.lexpr, :^, r], :&, l.rexpr].reduce_rec
611
+ elsif r.kind_of?(Expression) and r.op == :& and r.rexpr.kind_of?(::Integer) and r.rexpr == l.rexpr
612
+ # (a&0xfff)^(b&0xfff) => (a^b)&0xfff
613
+ Expression[[l.lexpr, :^, r.lexpr], :&, l.rexpr].reduce_rec
614
+ end
615
+ end
616
+ elsif @op == :&
617
+ if l == 0 or r == 0; 0
618
+ elsif r == 1 and l.kind_of?(Expression) and [:'==', :'!=', :<, :>, :<=, :>=].include?(l.op)
619
+ l
620
+ elsif l == r; l
621
+ elsif l.kind_of?(Integer); Expression[r, @op, l].reduce_rec
622
+ elsif l.kind_of?(Expression) and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
623
+ elsif l.kind_of?(Expression) and [:|, :^].include?(l.op) and r.kind_of?(Integer) and (l.op == :| or (r & (r+1)) != 0)
624
+ # (a ^| b) & i => (a&i ^| b&i)
625
+ Expression[[l.lexpr, :&, r], l.op, [l.rexpr, :&, r]].reduce_rec
626
+ elsif r.kind_of?(::Integer) and l.kind_of?(Expression) and (r & (r+1)) == 0
627
+ # foo & 0xffff
628
+ reduce_rec_mod2(l, r)
629
+ end
630
+ elsif @op == :|
631
+ if l == 0; r
632
+ elsif r == 0; l
633
+ elsif l == -1 or r == -1; -1
634
+ elsif l == r; l
635
+ elsif l.kind_of? Integer; Expression[r, @op, l].reduce_rec
636
+ elsif l.kind_of? Expression and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
637
+ end
638
+ elsif @op == :*
639
+ if l == 0 or r == 0; 0
640
+ elsif l == 1; r
641
+ elsif r == 1; l
642
+ elsif r.kind_of? Integer; Expression[r, @op, l].reduce_rec
643
+ elsif r.kind_of? Expression and r.op == @op; Expression[[l, @op, r.lexpr], @op, r.rexpr].reduce_rec
644
+ elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :* and r.lexpr.kind_of? Integer; Expression[l*r.lexpr, :*, r.rexpr].reduce_rec # XXX need & regsize..
645
+ elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :+ and r.rexpr.kind_of? Integer; Expression[[l, :*, r.lexpr], :+, l*r.rexpr].reduce_rec
646
+ end
647
+ elsif @op == :/
648
+ if r == 0
649
+ elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :+ and l.rexpr.kind_of? Integer and l.rexpr % r == 0
650
+ Expression[[l.lexpr, :/, r], :+, l.rexpr/r].reduce_rec
651
+ elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :* and l.lexpr % r == 0
652
+ Expression[l.lexpr/r, :*, l.rexpr].reduce_rec
653
+ end
654
+ elsif @op == :-
655
+ if l == :unknown or r == :unknown; :unknown
656
+ elsif not l and r.kind_of? Expression and (r.op == :- or r.op == :+)
657
+ if r.op == :- # no lexpr (reduced)
658
+ # -(-x) => x
659
+ r.rexpr
660
+ else # :+ and lexpr (r is reduced)
661
+ # -(a+b) => (-a)+(-b)
662
+ Expression[[:-, r.lexpr], :+, [:-, r.rexpr]].reduce_rec
663
+ end
664
+ elsif l.kind_of? Expression and l.op == :+ and l.lexpr == r
665
+ # shortcircuit for a common occurence [citation needed]
666
+ # (a+b)-a
667
+ l.rexpr
668
+ elsif l
669
+ # a-b => a+(-b)
670
+ Expression[l, :+, [:-, r]].reduce_rec
671
+ end
672
+ elsif @op == :+
673
+ if l == :unknown or r == :unknown; :unknown
674
+ elsif not l; r # +x => x
675
+ elsif r == 0; l # x+0 => x
676
+ elsif l.kind_of?(::Numeric)
677
+ if r.kind_of? Expression and r.op == :+
678
+ # 1+(x+y) => x+(y+1)
679
+ Expression[r.lexpr, :+, [r.rexpr, :+, l]].reduce_rec
680
+ else
681
+ # 1+a => a+1
682
+ Expression[r, :+, l].reduce_rec
683
+ end
684
+ # (a+b)+foo => a+(b+foo)
685
+ elsif l.kind_of? Expression and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
686
+ elsif l.kind_of? Expression and r.kind_of? Expression and l.op == :% and r.op == :% and l.rexpr.kind_of?(::Integer) and l.rexpr == r.rexpr
687
+ Expression[[l.lexpr, :+, r.lexpr], :%, l.rexpr].reduce_rec
688
+ else
689
+ reduce_rec_add(l, r)
690
+ end
691
+ end
692
+
693
+ ret = case v
694
+ when nil
695
+ # no dup if no new value
696
+ (r == :unknown or l == :unknown) ? :unknown :
697
+ ((r == @rexpr and l == @lexpr) ? self : Expression[l, @op, r])
698
+ when Expression
699
+ (v.lexpr == :unknown or v.rexpr == :unknown) ? :unknown : v
700
+ else v
701
+ end
702
+ if @@reduce_lambda and ret.kind_of? ExpressionType and newret = @@reduce_lambda[ret] and newret != ret
703
+ if newret.kind_of? ExpressionType
704
+ ret = newret.reduce_rec
705
+ else
706
+ ret = newret
707
+ end
708
+ end
709
+ ret
710
+ end
711
+
712
+
713
+ # a+(b+(c+(-a))) => b+c+0
714
+ # a+((-a)+(b+c)) => 0+b+c
715
+ def reduce_rec_add(l, r)
716
+ if l.kind_of? Expression and l.op == :- and not l.lexpr
717
+ neg_l = l.rexpr
718
+ else
719
+ neg_l = Expression[:-, l]
720
+ end
721
+
722
+ # recursive search & replace -lexpr by 0
723
+ simplifier = lambda { |cur|
724
+ if neg_l == cur
725
+ # -l found
726
+ 0
727
+ elsif cur.kind_of? Expression and cur.op == :+
728
+ # recurse
729
+ if newl = simplifier[cur.lexpr]
730
+ Expression[newl, cur.op, cur.rexpr].reduce_rec
731
+ elsif newr = simplifier[cur.rexpr]
732
+ Expression[cur.lexpr, cur.op, newr].reduce_rec
733
+ end
734
+ end
735
+ }
736
+
737
+ simplifier[r]
738
+ end
739
+
740
+ # expr & 0xffff
741
+ def reduce_rec_mod2(e, mask)
742
+ case e.op
743
+ when :+, :^
744
+ if e.lexpr.kind_of?(Expression) and e.lexpr.op == :& and
745
+ e.lexpr.rexpr.kind_of?(::Integer) and e.lexpr.rexpr & mask == mask
746
+ # ((a&m) + b) & m => (a+b) & m
747
+ Expression[[e.lexpr.lexpr, e.op, e.rexpr], :&, mask].reduce_rec
748
+ elsif e.rexpr.kind_of?(Expression) and e.rexpr.op == :& and
749
+ e.rexpr.rexpr.kind_of?(::Integer) and e.rexpr.rexpr & mask == mask
750
+ # (a + (b&m)) & m => (a+b) & m
751
+ Expression[[e.lexpr, e.op, e.rexpr.lexpr], :&, mask].reduce_rec
752
+ else
753
+ Expression[e, :&, mask]
754
+ end
755
+ when :|
756
+ # rol/ror composition
757
+ reduce_rec_composerol e, mask
758
+ else
759
+ Expression[e, :&, mask]
760
+ end
761
+ end
762
+
763
+ # a check to see if an Expr is the composition of two rotations (rol eax, 4 ; rol eax, 6 => rol eax, 10)
764
+ # this is a bit too ugly to stay in the main reduce_rec body.
765
+ def reduce_rec_composerol(e, mask)
766
+ m = Expression[['var', :sh_op, 'amt'], :|, ['var', :inv_sh_op, 'inv_amt']]
767
+ if vars = e.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and vars[:sh_op] == {:>> => :<<, :<< => :>>}[vars[:inv_sh_op]] and
768
+ ((vars['amt'].kind_of?(::Integer) and vars['inv_amt'].kind_of?(::Integer) and ampl = vars['amt'] + vars['inv_amt']) or
769
+ (vars['amt'].kind_of? Expression and vars['amt'].op == :% and vars['amt'].rexpr.kind_of? ::Integer and
770
+ vars['inv_amt'].kind_of? Expression and vars['inv_amt'].op == :% and vars['amt'].rexpr == vars['inv_amt'].rexpr and ampl = vars['amt'].rexpr)) and
771
+ mask == (1<<ampl)-1 and vars['var'].kind_of? Expression and # it's a rotation
772
+
773
+ vars['var'].op == :& and vars['var'].rexpr == mask and
774
+ ivars = vars['var'].lexpr.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and ivars[:sh_op] == {:>> => :<<, :<< => :>>}[ivars[:inv_sh_op]] and
775
+ ((ivars['amt'].kind_of?(::Integer) and ivars['inv_amt'].kind_of?(::Integer) and ampl = ivars['amt'] + ivars['inv_amt']) or
776
+ (ivars['amt'].kind_of? Expression and ivars['amt'].op == :% and ivars['amt'].rexpr.kind_of? ::Integer and
777
+ ivars['inv_amt'].kind_of? Expression and ivars['inv_amt'].op == :% and ivars['amt'].rexpr == ivars['inv_amt'].rexpr and ampl = ivars['amt'].rexpr))
778
+ if ivars[:sh_op] != vars[:sh_op]
779
+ # ensure the rotations are the same orientation
780
+ ivars[:sh_op], ivars[:inv_sh_op] = ivars[:inv_sh_op], ivars[:sh_op]
781
+ ivars['amt'], ivars['inv_amt'] = ivars['inv_amt'], ivars['amt']
782
+ end
783
+ amt = Expression[[vars['amt'], :+, ivars['amt']], :%, ampl]
784
+ invamt = Expression[[vars['inv_amt'], :+, ivars['inv_amt']], :%, ampl]
785
+ Expression[[[[ivars['var'], :&, mask], vars[:sh_op], amt], :|, [[ivars['var'], :&, mask], vars[:inv_sh_op], invamt]], :&, mask].reduce_rec
786
+ else
787
+ Expression[e, :&, mask]
788
+ end
789
+ end
790
+
791
+ # a pattern-matching method
792
+ # Expression[42, :+, 28].match(Expression['any', :+, 28], 'any') => {'any' => 42}
793
+ # Expression[42, :+, 28].match(Expression['any', :+, 'any'], 'any') => false
794
+ # Expression[42, :+, 42].match(Expression['any', :+, 'any'], 'any') => {'any' => 42}
795
+ # vars can match anything except nil
796
+ def match(target, *vars)
797
+ match_rec(target, vars.inject({}) { |h, v| h.update v => nil })
798
+ end
799
+
800
+ def match_rec(target, vars)
801
+ return false if not target.kind_of? Expression
802
+ [target.lexpr, target.op, target.rexpr].zip([@lexpr, @op, @rexpr]) { |targ, exp|
803
+ if targ and vars[targ]
804
+ return false if exp != vars[targ]
805
+ elsif targ and vars.has_key? targ
806
+ return false if not vars[targ] = exp
807
+ elsif targ.kind_of? ExpressionType
808
+ return false if not exp.kind_of? ExpressionType or not exp.match_rec(targ, vars)
809
+ else
810
+ return false if targ != exp
811
+ end
812
+ }
813
+ vars
814
+ end
815
+
816
+ # returns the array of non-numeric members of the expression
817
+ # if a variables appears 3 times, it will be present 3 times in the returned array
818
+ def externals
819
+ [@rexpr, @lexpr].inject([]) { |a, e|
820
+ case e
821
+ when ExpressionType; a.concat e.externals
822
+ when nil, ::Numeric; a
823
+ else a << e
824
+ end
825
+ }
826
+ end
827
+
828
+ # returns the externals that appears in the expression, does not walk through other ExpressionType
829
+ def expr_externals
830
+ [@rexpr, @lexpr].inject([]) { |a, e|
831
+ case e
832
+ when Expression; a.concat e.expr_externals
833
+ when nil, ::Numeric, ExpressionType; a
834
+ else a << e
835
+ end
836
+ }
837
+ end
838
+
839
+ def inspect
840
+ "Expression[#{@lexpr.inspect.sub(/^Expression/, '') + ', ' if @lexpr}#{@op.inspect + ', ' if @lexpr or @op != :+}#{@rexpr.inspect.sub(/^Expression/, '')}]"
841
+ end
842
+
843
+ Unknown = self[:unknown]
844
+ end
845
+
846
+ # an EncodedData relocation, specifies a value to patch in
847
+ class Relocation
848
+ # the relocation value (an Expression)
849
+ attr_accessor :target
850
+ # the relocation expression type
851
+ attr_accessor :type
852
+ # the endianness of the relocation
853
+ attr_accessor :endianness
854
+
855
+ include Backtrace
856
+
857
+ def initialize(target, type, endianness, backtrace = nil)
858
+ raise ArgumentError, "bad args #{[target, type, endianness].inspect}" if not target.kind_of? Expression or not type.kind_of? ::Symbol or not endianness.kind_of? ::Symbol
859
+ @target, @type, @endianness, @backtrace = target, type, endianness, backtrace
860
+ end
861
+
862
+ # fixup the encodeddata with value (reloc starts at off)
863
+ def fixup(edata, off, value)
864
+ str = Expression.encode_imm(value, @type, @endianness, @backtrace)
865
+ edata.fill off
866
+ edata.data[off, str.length] = str
867
+ end
868
+
869
+ # size of the relocation field, in bytes
870
+ def length
871
+ Expression::INT_SIZE[@type]/8
872
+ end
873
+ end
874
+
875
+ # a String-like, with export/relocation informations added
876
+ class EncodedData
877
+ # string with raw data
878
+ attr_accessor :data
879
+ # hash, key = offset within data, value = +Relocation+
880
+ attr_accessor :reloc
881
+ # hash, key = export name, value = offset within data - use add_export to update
882
+ attr_accessor :export
883
+ # hash, key = offset, value = 1st export name
884
+ attr_accessor :inv_export
885
+ # virtual size of data (all 0 by default, see +fill+)
886
+ attr_accessor :virtsize
887
+ # arbitrary pointer, often used when decoding immediates
888
+ # may be initialized with an export value
889
+ attr_reader :ptr # custom writer
890
+ def ptr=(p) @ptr = @export[p] || p end
891
+
892
+ # opts' keys in :reloc, :export, :virtsize, defaults to empty/empty/data.length
893
+ def initialize(data = '', opts={})
894
+ @data = data
895
+ @reloc = opts[:reloc] || {}
896
+ @export = opts[:export] || {}
897
+ @inv_export = @export.invert
898
+ @virtsize = opts[:virtsize] || @data.length
899
+ @ptr = 0
900
+ end
901
+
902
+ def add_export(label, off=@ptr, set_inv=false)
903
+ @export[label] = off
904
+ if set_inv or not @inv_export[off]
905
+ @inv_export[off] = label
906
+ end
907
+ end
908
+
909
+ def del_export(label, off=@ptr)
910
+ @export.delete label
911
+ if e = @export.index(off)
912
+ @inv_export[off] = e
913
+ else
914
+ @inv_export.delete off
915
+ end
916
+ end
917
+
918
+ # returns the size of raw data, that is [data.length, last relocation end].max
919
+ def rawsize
920
+ [@data.length, *@reloc.map { |off, rel| off + rel.length } ].max
921
+ end
922
+ # String-like
923
+ alias length virtsize
924
+ # String-like
925
+ alias size virtsize
926
+
927
+ def empty?
928
+ @virtsize == 0
929
+ end
930
+
931
+ def eos?
932
+ ptr.to_i >= @virtsize
933
+ end
934
+
935
+ # returns a copy of itself, with reloc/export duped (but not deep)
936
+ def dup
937
+ self.class.new @data.dup, :reloc => @reloc.dup, :export => @export.dup, :virtsize => @virtsize
938
+ end
939
+
940
+ # resolve relocations:
941
+ # calculate each reloc target using Expression#bind(binding)
942
+ # if numeric, replace the raw data with the encoding of this value (+fill+s preceding data if needed) and remove the reloc
943
+ # if replace_target is true, the reloc target is replaced with its bound counterpart
944
+ def fixup_choice(binding, replace_target)
945
+ @reloc.keys.each { |off|
946
+ val = @reloc[off].target.bind(binding).reduce
947
+ if val.kind_of? Integer
948
+ reloc = @reloc[off]
949
+ reloc.fixup(self, off, val)
950
+ @reloc.delete(off) # delete only if not overflowed
951
+ elsif replace_target
952
+ @reloc[off].target = val
953
+ end
954
+ }
955
+ end
956
+
957
+ # +fixup_choice+ binding, false
958
+ def fixup(binding)
959
+ fixup_choice(binding, false)
960
+ end
961
+
962
+ # +fixup_choice+ binding, true
963
+ def fixup!(binding)
964
+ fixup_choice(binding, true)
965
+ end
966
+
967
+ # returns a default binding suitable for use in +fixup+
968
+ # every export is expressed as base + offset
969
+ # base defaults to the first export name + its offset
970
+ def binding(base = nil)
971
+ if not base
972
+ key = @export.index(@export.values.min)
973
+ return {} if not key
974
+ base = (@export[key] == 0 ? key : Expression[key, :-, @export[key]])
975
+ end
976
+ @export.inject({}) { |binding, (n, o)| binding.update n => Expression.new(:+, o, base) }
977
+ end
978
+
979
+ # returns an array of variables that needs to be defined for a complete #fixup
980
+ # ie the list of externals for all relocations
981
+ def reloc_externals
982
+ @reloc.values.map { |r| r.target.externals }.flatten.uniq - @export.keys
983
+ end
984
+
985
+ # returns the offset where the relocation for target t is to be applied
986
+ def offset_of_reloc(t)
987
+ t = Expression[t]
988
+ @reloc.keys.find { |off| @reloc[off].target == t }
989
+ end
990
+
991
+ # fill virtual space by repeating pattern (String) up to len
992
+ # expand self if len is larger than self.virtsize
993
+ def fill(len = @virtsize, pattern = [0].pack('C'))
994
+ @virtsize = len if len > @virtsize
995
+ @data = @data.to_str.ljust(len, pattern) if len > @data.length
996
+ end
997
+
998
+ # rounds up virtsize to next multiple of len
999
+ def align(len, pattern=nil)
1000
+ @virtsize = EncodedData.align_size(@virtsize, len)
1001
+ fill(@virtsize, pattern) if pattern
1002
+ end
1003
+
1004
+ # returns the value val rounded up to next multiple of len
1005
+ def self.align_size(val, len)
1006
+ return val if len == 0
1007
+ ((val + len - 1) / len).to_i * len
1008
+ end
1009
+
1010
+ # concatenation of another +EncodedData+ (or nil/Fixnum/anything supporting String#<<)
1011
+ def << other
1012
+ case other
1013
+ when nil
1014
+ when ::Fixnum
1015
+ fill
1016
+ @data = @data.to_str if not @data.kind_of? String
1017
+ @data << other
1018
+ @virtsize += 1
1019
+ when EncodedData
1020
+ fill if not other.data.empty?
1021
+ other.reloc.each { |k, v| @reloc[k + @virtsize] = v } if not other.reloc.empty?
1022
+ if not other.export.empty?
1023
+ other.export.each { |k, v|
1024
+ if @export[k] and @export[k] != v + @virtsize
1025
+ cf = (other.export.keys & @export.keys).find_all { |k_| other.export[k_] != @export[k_] - @virtsize }
1026
+ raise "edata merge: label conflict #{cf.inspect}"
1027
+ end
1028
+ @export[k] = v + @virtsize
1029
+ }
1030
+ other.inv_export.each { |k, v| @inv_export[@virtsize + k] = v }
1031
+ end
1032
+ if @data.empty?; @data = other.data.dup
1033
+ elsif not @data.kind_of?(String); @data = @data.to_str << other.data
1034
+ else @data << other.data
1035
+ end
1036
+ @virtsize += other.virtsize
1037
+ else
1038
+ fill
1039
+ if @data.empty?; @data = other.dup
1040
+ elsif not @data.kind_of?(String); @data = @data.to_str << other
1041
+ else @data << other
1042
+ end
1043
+ @virtsize += other.length
1044
+ end
1045
+
1046
+ self
1047
+ end
1048
+
1049
+ # equivalent to dup << other, filters out Integers & nil
1050
+ def + other
1051
+ raise ArgumentError if not other or other.kind_of?(Integer)
1052
+ dup << other
1053
+ end
1054
+
1055
+ # slice
1056
+ def [](from, len=nil)
1057
+ if not len and from.kind_of? Range
1058
+ b = from.begin
1059
+ e = from.end
1060
+ b = @export[b] if @export[b]
1061
+ e = @export[e] if @export[e]
1062
+ b = b + @virtsize if b < 0
1063
+ e = e + @virtsize if e < 0
1064
+ len = e - b
1065
+ len += 1 if not from.exclude_end?
1066
+ from = b
1067
+ end
1068
+ from = @export[from] if @export[from]
1069
+ from = from + @virtsize if from < 0
1070
+ return if from > @virtsize or from < 0
1071
+
1072
+ return @data[from] if not len
1073
+ len = @virtsize - from if from+len > @virtsize
1074
+ ret = EncodedData.new @data[from, len]
1075
+ ret.virtsize = len
1076
+ @reloc.each { |o, r|
1077
+ ret.reloc[o - from] = r if o >= from and o + r.length <= from+len
1078
+ }
1079
+ @export.each { |e_, o|
1080
+ ret.export[e_] = o - from if o >= from and o <= from+len # XXX include end ?
1081
+ }
1082
+ @inv_export.each { |o, e_|
1083
+ ret.inv_export[o-from] = e_ if o >= from and o <= from+len
1084
+ }
1085
+ ret
1086
+ end
1087
+
1088
+ # slice replacement, supports size change (shifts following relocs/exports)
1089
+ # discards old exports/relocs from the overwritten space
1090
+ def []=(from, len, val=nil)
1091
+ if not val
1092
+ val = len
1093
+ len = nil
1094
+ end
1095
+ if not len and from.kind_of? ::Range
1096
+ b = from.begin
1097
+ e = from.end
1098
+ b = @export[b] if @export[b]
1099
+ e = @export[e] if @export[e]
1100
+ b = b + @virtsize if b < 0
1101
+ e = e + @virtsize if e < 0
1102
+ len = e - b
1103
+ len += 1 if not from.exclude_end?
1104
+ from = b
1105
+ end
1106
+ from = @export[from] || from
1107
+ raise "invalid offset #{from}" if not from.kind_of? ::Integer
1108
+ from = from + @virtsize if from < 0
1109
+
1110
+ if not len
1111
+ val = val.chr if val.kind_of? ::Integer
1112
+ len = val.length
1113
+ end
1114
+ raise "invalid slice length #{len}" if not len.kind_of? ::Integer or len < 0
1115
+
1116
+ if from >= @virtsize
1117
+ len = 0
1118
+ elsif from+len > @virtsize
1119
+ len = @virtsize-from
1120
+ end
1121
+
1122
+ val = EncodedData.new << val
1123
+
1124
+ # remove overwritten metadata
1125
+ @export.delete_if { |name, off| off > from and off < from + len }
1126
+ @reloc.delete_if { |off, rel| off - rel.length > from and off < from + len }
1127
+ # shrink/grow
1128
+ if val.length != len
1129
+ diff = val.length - len
1130
+ @export.keys.each { |name| @export[name] = @export[name] + diff if @export[name] > from }
1131
+ @inv_export.keys.each { |off| @inv_export[off+diff] = @inv_export.delete(off) if off > from }
1132
+ @reloc.keys.each { |off| @reloc[off + diff] = @reloc.delete(off) if off > from }
1133
+ if @virtsize >= from+len
1134
+ @virtsize += diff
1135
+ end
1136
+ end
1137
+
1138
+ @virtsize = from + val.length if @virtsize < from + val.length
1139
+
1140
+ if from + len < @data.length # patch real data
1141
+ val.fill
1142
+ @data[from, len] = val.data
1143
+ elsif not val.data.empty? # patch end of real data
1144
+ @data << ([0].pack('C')*(from-@data.length)) if @data.length < from
1145
+ @data[from..-1] = val.data
1146
+ else # patch end of real data with fully virtual
1147
+ @data = @data[0, from]
1148
+ end
1149
+ val.export.each { |name, off| @export[name] = from + off }
1150
+ val.inv_export.each { |off, name| @inv_export[from+off] = name }
1151
+ val.reloc.each { |off, rel| @reloc[from + off] = rel }
1152
+ end
1153
+
1154
+ # replace a portion of self
1155
+ # from/to may be Integers (offsets) or labels (from self.export)
1156
+ # content is a String or an EncodedData, which will be inserted in the specified location (padded if necessary)
1157
+ # raise if the string does not fit in.
1158
+ def patch(from, to, content)
1159
+ from = @export[from] || from
1160
+ raise "invalid offset specification #{from}" if not from.kind_of? Integer
1161
+ to = @export[to] || to
1162
+ raise "invalid offset specification #{to}" if not to.kind_of? Integer
1163
+ raise EncodeError, 'cannot patch data: new content too long' if to - from < content.length
1164
+ self[from, content.length] = content
1165
+ end
1166
+
1167
+ # returns a list of offsets where /pat/ can be found inside @data
1168
+ # scan is done per chunk of chunksz bytes, with a margin for chunk-overlapping patterns
1169
+ # yields each offset found, and only include it in the result if the block returns !false
1170
+ def pattern_scan(pat, chunksz=nil, margin=nil)
1171
+ chunksz ||= 4*1024*1024 # scan 4MB at a time
1172
+ margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
1173
+ pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String
1174
+
1175
+ found = []
1176
+ chunkoff = 0
1177
+ while chunkoff < @data.length
1178
+ chunk = @data[chunkoff, chunksz+margin].to_str
1179
+ off = 0
1180
+ while match_off = (chunk[off..-1] =~ pat)
1181
+ break if off+match_off >= chunksz # match fully in margin
1182
+ match_addr = chunkoff + off + match_off
1183
+ found << match_addr if not block_given? or yield(match_addr)
1184
+ off += match_off + 1
1185
+ # XXX +1 or +lastmatch.length ?
1186
+ # 'aaaabc'.pattern_scan(/a*bc/) will match 5 times here
1187
+ end
1188
+ chunkoff += chunksz
1189
+ end
1190
+ found
1191
+ end
1192
+ end
1193
+ end