metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,327 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/opcodes'
8
+ require 'metasm/ia32/encode'
9
+ require 'metasm/parse'
10
+
11
+ module Metasm
12
+ class Ia32
13
+ class ModRM
14
+ # may return a SegReg
15
+ # must be called before SegReg parser (which could match only the seg part of a modrm)
16
+ def self.parse(lexer, otok, cpu)
17
+ tok = otok
18
+
19
+ # read operand size specifier
20
+ if tok and tok.type == :string and tok.raw =~ /^(?:byte|[dqo]?word|_(\d+)bits)$/
21
+ ptsz =
22
+ if $1
23
+ $1.to_i
24
+ else
25
+ case tok.raw
26
+ when 'byte'; 8
27
+ when 'word'; 16
28
+ when 'dword'; 32
29
+ when 'qword'; 64
30
+ when 'oword'; 128
31
+ else raise otok, 'mrm: bad ptr size'
32
+ end
33
+ end
34
+ lexer.skip_space
35
+ if tok = lexer.readtok and tok.type == :string and tok.raw == 'ptr'
36
+ lexer.skip_space
37
+ tok = lexer.readtok
38
+ end
39
+ end
40
+
41
+ # read segment selector
42
+ if tok and tok.type == :string and seg = SegReg.s_to_i[tok.raw]
43
+ lexer.skip_space
44
+ seg = SegReg.new(seg)
45
+ if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':'
46
+ raise otok, 'invalid modrm' if ptsz
47
+ lexer.unreadtok ntok
48
+ return seg
49
+ end
50
+ lexer.skip_space
51
+ tok = lexer.readtok
52
+ end
53
+
54
+ # ensure we have a modrm
55
+ if not tok or tok.type != :punct or tok.raw != '['
56
+ raise otok, 'invalid modrm' if ptsz or seg
57
+ return
58
+ end
59
+ lexer.skip_space_eol
60
+
61
+ # support fasm syntax [fs:eax] for segment selector
62
+ if tok = lexer.readtok and tok.type == :string and not seg and seg = SegReg.s_to_i[tok.raw]
63
+ raise otok, 'invalid modrm' if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':'
64
+ seg = SegReg.new(seg)
65
+ lexer.skip_space_eol
66
+ else
67
+ lexer.unreadtok tok
68
+ end
69
+
70
+ # read modrm content as generic expression
71
+ content = Expression.parse(lexer)
72
+ lexer.skip_space_eol
73
+ raise(otok, 'bad modrm') if not content or not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ']'
74
+
75
+ # converts matching externals to Regs in an expression
76
+ regify = lambda { |o|
77
+ case o
78
+ when Expression
79
+ o.lexpr = regify[o.lexpr]
80
+ o.rexpr = regify[o.rexpr]
81
+ o
82
+ when String
83
+ cpu.str_to_reg(o) || o
84
+ else o
85
+ end
86
+ }
87
+
88
+ s = i = b = imm = nil
89
+
90
+ # assigns the Regs in the expression to base or index field of the modrm
91
+ walker = lambda { |o|
92
+ case o
93
+ when nil
94
+ when Reg
95
+ if b
96
+ raise otok, 'mrm: too many regs' if i
97
+ i = o
98
+ s = 1
99
+ else
100
+ b = o
101
+ end
102
+ when Expression
103
+ if o.op == :* and (o.rexpr.kind_of? Reg or o.lexpr.kind_of? Reg)
104
+ # scaled index
105
+ raise otok, 'mrm: too many indexes' if i
106
+ s = o.lexpr
107
+ i = o.rexpr
108
+ s, i = i, s if s.kind_of? Reg
109
+ raise otok, 'mrm: bad scale' unless s.kind_of? Integer
110
+ elsif o.op == :+
111
+ # recurse
112
+ walker[o.lexpr]
113
+ walker[o.rexpr]
114
+ else
115
+ # found (a part of) the immediate
116
+ imm = Expression[imm, :+, o]
117
+ end
118
+ else
119
+ # found (a part of) the immediate
120
+ imm = Expression[imm, :+, o]
121
+ end
122
+ }
123
+
124
+ # do it
125
+ walker[regify[content.reduce]]
126
+
127
+ # ensure found immediate is really an immediate
128
+ raise otok, 'mrm: reg in imm' if imm.kind_of? Expression and not imm.externals.grep(Reg).empty?
129
+
130
+ # find default address size
131
+ adsz = b ? b.sz : i ? i.sz : nil
132
+ # ptsz may be nil now, will be fixed up later (in parse_instr_fixup) to match another instruction argument's size
133
+ new adsz, ptsz, s, i, b, imm, seg
134
+ end
135
+ end
136
+
137
+
138
+ # handles cpu-specific parser instruction, falls back to Ancestor's version if unknown keyword
139
+ # XXX changing the cpu size in the middle of the code may have baaad effects...
140
+ def parse_parser_instruction(lexer, instr)
141
+ case instr.raw.downcase
142
+ when '.mode', '.bits'
143
+ lexer.skip_space
144
+ if tok = lexer.readtok and tok.type == :string and (tok.raw == '16' or tok.raw == '32')
145
+ @size = tok.raw.to_i
146
+ lexer.skip_space
147
+ raise instr, 'syntax error' if ntok = lexer.nexttok and ntok.type != :eol
148
+ else
149
+ raise instr, 'invalid cpu mode'
150
+ end
151
+ else super(lexer, instr)
152
+ end
153
+ end
154
+
155
+ def parse_prefix(i, pfx)
156
+ # XXX check for redefinition ?
157
+ # implicit 'true' return value when assignment occur
158
+ i.prefix ||= {}
159
+ case pfx
160
+ when 'lock'; i.prefix[:lock] = true
161
+ when 'rep'; i.prefix[:rep] = 'rep'
162
+ when 'repe', 'repz'; i.prefix[:rep] = 'repz'
163
+ when 'repne', 'repnz'; i.prefix[:rep] = 'repnz'
164
+ when 'code16'; i.prefix[:sz] = 16
165
+ when 'code32'; i.prefix[:sz] = 32
166
+ end
167
+ end
168
+
169
+ def parse_argregclasslist
170
+ [Reg, SimdReg, SegReg, DbgReg, CtrlReg, FpReg]
171
+ end
172
+ def parse_modrm(lex, tok, cpu)
173
+ ModRM.parse(lex, tok, cpu)
174
+ end
175
+
176
+ # parses an arbitrary ia32 instruction argument
177
+ def parse_argument(lexer)
178
+ lexer = AsmPreprocessor.new(lexer) if lexer.kind_of? String
179
+
180
+ # reserved names (registers/segments etc)
181
+ @args_token ||= parse_argregclasslist.map { |a| a.s_to_i.keys }.flatten.inject({}) { |h, e| h.update e => true }
182
+
183
+ lexer.skip_space
184
+ return if not tok = lexer.readtok
185
+
186
+ if tok.type == :string and tok.raw == 'ST'
187
+ lexer.skip_space
188
+ if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == '('
189
+ lexer.skip_space
190
+ if not nntok = lexer.readtok or nntok.type != :string or nntok.raw !~ /^[0-9]$/ or
191
+ not ntok = (lexer.skip_space; lexer.readtok) or ntok.type != :punct or ntok.raw != ')'
192
+ raise tok, 'invalid FP register'
193
+ else
194
+ tok.raw << '(' << nntok.raw << ')'
195
+ fpr = parse_argregclasslist.last
196
+ if fpr.s_to_i.has_key? tok.raw
197
+ return fpr.new(fpr.s_to_i[tok.raw])
198
+ else
199
+ raise tok, 'invalid FP register'
200
+ end
201
+ end
202
+ else
203
+ lexer.unreadtok ntok
204
+ end
205
+ end
206
+
207
+ if ret = parse_modrm(lexer, tok, self)
208
+ ret
209
+ elsif @args_token[tok.raw]
210
+ parse_argregclasslist.each { |a|
211
+ return a.from_str(tok.raw) if a.s_to_i.has_key? tok.raw
212
+ }
213
+ raise tok, 'internal error'
214
+ else
215
+ lexer.unreadtok tok
216
+ expr = Expression.parse(lexer)
217
+ lexer.skip_space
218
+
219
+ # may be a farptr
220
+ if expr and ntok = lexer.readtok and ntok.type == :punct and ntok.raw == ':'
221
+ raise tok, 'invalid farptr' if not addr = Expression.parse(lexer)
222
+ Farptr.new expr, addr
223
+ else
224
+ lexer.unreadtok ntok
225
+ Expression[expr.reduce] if expr
226
+ end
227
+ end
228
+ end
229
+
230
+ # check if the argument matches the opcode's argument spec
231
+ def parse_arg_valid?(o, spec, arg)
232
+ if o.name == 'movsx' or o.name == 'movzx'
233
+ if not arg.kind_of? Reg and not arg.kind_of? ModRM
234
+ return
235
+ elsif not arg.sz
236
+ puts "ambiguous arg size for indirection in #{o.name}" if $VERBOSE
237
+ return
238
+ elsif spec == :reg # reg=dst, modrm=src (smaller)
239
+ return (arg.kind_of? Reg and arg.sz >= 16)
240
+ elsif o.props[:argsz]
241
+ return arg.sz == o.props[:argsz]
242
+ else
243
+ return arg.sz <= 16
244
+ end
245
+ end
246
+
247
+ return false if arg.kind_of? ModRM and arg.adsz and o.props[:adsz] and arg.adsz != o.props[:adsz]
248
+
249
+ cond = true
250
+ if s = o.props[:argsz] and (arg.kind_of? Reg or arg.kind_of? ModRM)
251
+ cond = (!arg.sz or arg.sz == s or spec == :reg_dx)
252
+ end
253
+
254
+ cond and
255
+ case spec
256
+ when :reg; arg.kind_of? Reg and (arg.sz >= 16 or o.props[:argsz])
257
+ when :modrm; (arg.kind_of? ModRM or arg.kind_of? Reg) and (!arg.sz or arg.sz >= 16 or o.props[:argsz])
258
+ when :i; arg.kind_of? Expression
259
+ when :imm_val1; arg.kind_of? Expression and arg.reduce == 1
260
+ when :imm_val3; arg.kind_of? Expression and arg.reduce == 3
261
+ when :reg_eax; arg.kind_of? Reg and arg.val == 0
262
+ when :reg_cl; arg.kind_of? Reg and arg.val == 1 and arg.sz == 8
263
+ when :reg_dx; arg.kind_of? Reg and arg.val == 2 and arg.sz == 16
264
+ when :seg3; arg.kind_of? SegReg
265
+ when :seg3A; arg.kind_of? SegReg and arg.val > 3
266
+ when :seg2; arg.kind_of? SegReg and arg.val < 4
267
+ when :seg2A; arg.kind_of? SegReg and arg.val < 4 and arg.val != 1
268
+ when :eeec; arg.kind_of? CtrlReg
269
+ when :eeed; arg.kind_of? DbgReg
270
+ when :modrmA; arg.kind_of? ModRM
271
+ when :mrm_imm; arg.kind_of? ModRM and not arg.s and not arg.i and not arg.b
272
+ when :farptr; arg.kind_of? Farptr
273
+ when :regfp; arg.kind_of? FpReg
274
+ when :regfp0; arg.kind_of? FpReg and (arg.val == nil or arg.val == 0)
275
+ when :modrmmmx; arg.kind_of? ModRM or (arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx])))
276
+ when :regmmx; arg.kind_of? SimdReg and (arg.sz == 64 or (arg.sz == 128 and o.props[:xmmx]))
277
+ when :modrmxmm; arg.kind_of? ModRM or (arg.kind_of? SimdReg and arg.sz == 128)
278
+ when :regxmm; arg.kind_of? SimdReg and arg.sz == 128
279
+ when :i8, :u8, :u16
280
+ arg.kind_of? Expression and
281
+ (o.props[:setip] or Expression.in_range?(arg, spec) != false) # true or nil allowed
282
+ # jz 0x28282828 may fit in :i8 depending on instr addr
283
+ else raise EncodeError, "Internal error: unknown argument specification #{spec.inspect}"
284
+ end
285
+ end
286
+
287
+ def parse_instruction_checkproto(i)
288
+ case i.opname
289
+ when 'imul'
290
+ if i.args.length == 2 and i.args.first.kind_of? Reg and i.args.last.kind_of? Expression
291
+ i.args.unshift i.args.first.dup
292
+ end
293
+ end
294
+ super(i)
295
+ end
296
+
297
+ # fixup the sz of a modrm argument, defaults to other argument size or current cpu mode
298
+ def parse_instruction_fixup(i)
299
+ if m = i.args.grep(ModRM).first and not m.sz
300
+ if i.opname == 'movzx' or i.opname == 'movsx'
301
+ m.sz = 8
302
+ else
303
+ if r = i.args.grep(Reg).first
304
+ m.sz = r.sz
305
+ elsif opcode_list_byname[i.opname].all? { |o| o.props[:argsz] }
306
+ m.sz = opcode_list_byname[i.opname].first.props[:argsz]
307
+ else
308
+ # this is also the size of ctrlreg/dbgreg etc
309
+ # XXX fpu/simd ?
310
+ m.sz = i.prefix[:sz] || @size
311
+ end
312
+ end
313
+ end
314
+ if m and not m.adsz
315
+ if opcode_list_byname[i.opname].all? { |o| o.props[:adsz] }
316
+ m.adsz = opcode_list_byname[i.opname].first.props[:adsz]
317
+ else
318
+ m.adsz = i.prefix[:sz] || @size
319
+ end
320
+ end
321
+ end
322
+
323
+ def instr_uncond_jump_to(target)
324
+ parse_instruction("jmp #{target}")
325
+ end
326
+ end
327
+ end
@@ -0,0 +1,91 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/opcodes'
8
+ require 'metasm/render'
9
+
10
+ # XXX move context in another file ?
11
+ module Metasm
12
+ class Ia32
13
+ class Argument
14
+ include Renderable
15
+ end
16
+
17
+ [SegReg, DbgReg, CtrlReg, FpReg].each { |c| c.class_eval {
18
+ def render ; [self.class.i_to_s[@val]] end
19
+ } }
20
+ [Reg, SimdReg].each { |c| c.class_eval {
21
+ def render ; [self.class.i_to_s[@sz][@val]] end
22
+ def context ; {'set sz' => lambda { |s| @sz = s }} end
23
+ } }
24
+
25
+ class Farptr
26
+ def render
27
+ [@seg, ':', @addr]
28
+ end
29
+ end
30
+
31
+ class ModRM
32
+ def qualifier(sz)
33
+ {
34
+ 8 => 'byte',
35
+ 16 => 'word',
36
+ 32 => 'dword',
37
+ 64 => 'qword',
38
+ 128 => 'oword'
39
+ }.fetch(sz) { |k| "_#{sz}bits" }
40
+ end
41
+
42
+ attr_accessor :instruction
43
+ def render
44
+ r = []
45
+ r << ( qualifier(@sz) << ' ptr ' ) if @sz and (not instruction or not @instruction.args.find { |a| a.kind_of? Reg and a.sz == @sz })
46
+ r << @seg << ':' if seg
47
+
48
+ e = nil
49
+ e = Expression[e, :+, @b] if b
50
+ e = Expression[e, :+, @imm] if imm
51
+ e = Expression[e, :+, (@s == 1 ? @i : [@s, :*, @i])] if s
52
+ r << '[' << e << ']'
53
+ end
54
+
55
+ def context
56
+ {'set targetsz' => lambda { |s| @sz = s },
57
+ 'set seg' => lambda { |s| @seg = Seg.new s }}
58
+ end
59
+ end
60
+
61
+ def render_instruction(i)
62
+ r = []
63
+ r << 'lock ' if i.prefix and i.prefix[:lock]
64
+ r << i.prefix[:rep] << ' ' if i.prefix and i.prefix[:rep]
65
+ r << i.opname
66
+ i.args.each { |a|
67
+ a.instruction = i if a.kind_of? ModRM
68
+ r << (r.last == i.opname ? ' ' : ', ') << a
69
+ }
70
+ r
71
+ end
72
+
73
+ def instruction_context(i)
74
+ # XXX
75
+ h = {}
76
+ op = opcode_list_byname[i.opname].first
77
+ if i.prefix and i.prefix[:rep]
78
+ h['toogle repz'] = lambda { i.prefix[:rep] = {'repnz' => 'repz', 'repz' => 'repnz'}[i.prefix[:rep]] } if op.props[:stropz]
79
+ h['rm rep'] = lambda { i.prefix.delete :rep }
80
+ else
81
+ h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'rep' } if op.props[:strop]
82
+ h['set rep'] = lambda { (i.prefix ||= {})[:rep] = 'repz' } if op.props[:stropz]
83
+ end
84
+ if i.args.find { |a| a.kind_of? ModRM and a.seg }
85
+ h['rm seg'] = lambda { i.args.find { |a| a.kind_of? ModRM and a.seg }.seg = nil }
86
+ end
87
+ h['toggle lock'] = lambda { (i.prefix ||= {})[:lock] = !i.prefix[:lock] }
88
+ h
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,1193 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ module Metasm
8
+
9
+ VERSION = 0x0001 # major major minor minor
10
+
11
+ # superclass for all metasm exceptions
12
+ class Exception < RuntimeError ; end
13
+ # parse error
14
+ class ParseError < Exception ; end
15
+ # invalid exeformat signature
16
+ class InvalidExeFormat < Exception ; end
17
+ # cannot honor .offset specification, reloc fixup overflow
18
+ class EncodeError < Exception ; end
19
+
20
+ # holds context of a processor
21
+ # endianness, current mode, opcode list...
22
+ class CPU
23
+ attr_accessor :valid_args, :valid_props, :fields_mask
24
+ attr_accessor :endianness, :size
25
+ attr_accessor :generate_PIC
26
+
27
+ def opcode_list
28
+ @opcode_list ||= init_opcode_list
29
+ end
30
+ def opcode_list=(l) @opcode_list = l end
31
+
32
+ def initialize
33
+ @fields_mask = {}
34
+ @fields_shift= {}
35
+ @valid_args = []
36
+ @valid_props = [:setip, :saveip, :stopexec]
37
+ @generate_PIC = true
38
+ end
39
+
40
+ # returns a hash opcode_name => array of opcodes with this name
41
+ def opcode_list_byname
42
+ @opcode_list_byname ||= opcode_list.inject({}) { |h, o| (h[o.name] ||= []) << o ; h }
43
+ end
44
+
45
+ # sets up the C parser : standard macro definitions, type model (size of int etc)
46
+ def tune_cparser(cp)
47
+ case @size
48
+ when 64; cp.lp64
49
+ when 32; cp.ilp32
50
+ when 16; cp.ilp16
51
+ end
52
+ cp.endianness = @endianness
53
+ cp.lexer.define_weak('_STDC', 1)
54
+ # TODO gcc -dM -E - </dev/null
55
+ tune_prepro(cp.lexer)
56
+ end
57
+
58
+ def tune_prepro(pp)
59
+ # TODO pp.define('BIGENDIAN')
60
+ end
61
+
62
+ # return a new AsmPreprocessor
63
+ def new_asmprepro(str='', exe=nil)
64
+ pp = AsmPreprocessor.new(str, exe)
65
+ tune_prepro(pp)
66
+ exe.tune_prepro(pp) if exe
67
+ pp
68
+ end
69
+
70
+ # returns a new & tuned C::Parser
71
+ def new_cparser
72
+ C::Parser.new(self)
73
+ end
74
+
75
+ # returns a new C::Compiler
76
+ def new_ccompiler(parser, exe=ExeFormat.new)
77
+ exe.cpu = self if not exe.instance_variable_get("@cpu")
78
+ C::Compiler.new(parser, exe)
79
+ end
80
+
81
+ def shortname
82
+ self.class.name.sub(/.*::/, '').downcase
83
+ end
84
+ end
85
+
86
+ # generic CPU, with no instructions, just size/endianness
87
+ class UnknownCPU < CPU
88
+ def initialize(size, endianness)
89
+ super()
90
+ @size, @endianness = size, endianness
91
+ end
92
+ end
93
+
94
+ # a cpu instruction 'formal' description
95
+ class Opcode
96
+ # the name of the instruction
97
+ attr_accessor :name
98
+ # formal description of arguments (array of cpu-specific symbols)
99
+ attr_accessor :args
100
+ # binary encoding of the opcode (integer for risc, array of bytes for cisc)
101
+ attr_accessor :bin
102
+ # list of bit fields in the binary encoding
103
+ # hash position => field
104
+ # position is bit shift for risc, [byte index, bit shift] for risc
105
+ # field is cpu-specific
106
+ attr_accessor :fields
107
+ # hash of opcode generic properties/restrictions (mostly property => true/false)
108
+ attr_accessor :props
109
+ # binary mask for decoding
110
+ attr_accessor :bin_mask
111
+
112
+ def initialize(name, bin=nil)
113
+ @name = name
114
+ @bin = bin
115
+ @args = []
116
+ @fields = {}
117
+ @props = {}
118
+ end
119
+
120
+ def basename
121
+ @name.sub(/\..*/, '')
122
+ end
123
+ end
124
+
125
+ # defines an attribute self.backtrace (array of filename/lineno)
126
+ # and a method backtrace_str which dumps this array to a human-readable form
127
+ module Backtrace
128
+ # array [file, lineno, file, lineno]
129
+ # if file 'A' does #include 'B' you'll get ['A', linenoA, 'B', linenoB]
130
+ attr_accessor :backtrace
131
+
132
+ # builds a readable string from self.backtrace
133
+ def backtrace_str
134
+ Backtrace.backtrace_str(@backtrace)
135
+ end
136
+
137
+ # builds a readable backtrace string from an array of [file, lineno, file, lineno, ..]
138
+ def self.backtrace_str(ary)
139
+ return '' if not ary
140
+ i = ary.length
141
+ bt = ''
142
+ while i > 0
143
+ bt << ",\n\tincluded from " if ary[i]
144
+ i -= 2
145
+ bt << "#{ary[i].inspect} line #{ary[i+1]}"
146
+ end
147
+ bt
148
+ end
149
+
150
+ def exception(msg='syntax error')
151
+ ParseError.new "at #{backtrace_str}: #{msg}"
152
+ end
153
+ end
154
+
155
+ # an instruction: opcode name + arguments
156
+ class Instruction
157
+ # arguments (cpu-specific objects)
158
+ attr_accessor :args
159
+ # hash of prefixes (unused in simple cpus)
160
+ attr_accessor :prefix
161
+ # name of the associated opcode
162
+ attr_accessor :opname
163
+ # reference to the cpu which issued this instruction (used for rendering)
164
+ attr_accessor :cpu
165
+
166
+ include Backtrace
167
+
168
+ def initialize(cpu, opname=nil, args=[], pfx=nil, backtrace=nil)
169
+ @cpu = cpu
170
+ @opname = opname
171
+ @args = args
172
+ @prefix = pfx if pfx
173
+ @backtrace = backtrace
174
+ end
175
+
176
+ # duplicates the argument list and prefix hash
177
+ def dup
178
+ Instruction.new(@cpu, (@opname.dup if opname), @args.dup, (@prefix.dup if prefix), (@backtrace.dup if backtrace))
179
+ end
180
+ end
181
+
182
+ # all kind of data description (including repeated/uninitialized)
183
+ class Data
184
+ # maps data type to Expression parameters (signedness/bit size)
185
+ INT_TYPE = {'db' => :a8, 'dw' => :a16, 'dd' => :a32, 'dq' => :a64}
186
+
187
+ # an Expression, an Array of Data, a String, or :uninitialized
188
+ attr_accessor :data
189
+ # the data type, from INT_TYPE (TODO store directly Expression parameters ?)
190
+ attr_accessor :type
191
+ # the repetition count of the data parameter (dup constructs)
192
+ attr_accessor :count
193
+
194
+ include Backtrace
195
+
196
+ def initialize(type, data, count=1, backtrace=nil)
197
+ @data, @type, @count, @backtrace = data, type, count, backtrace
198
+ end
199
+ end
200
+
201
+ # a name for a location
202
+ class Label
203
+ attr_accessor :name
204
+
205
+ include Backtrace
206
+
207
+ def initialize(name, backtrace=nil)
208
+ @name, @backtrace = name, backtrace
209
+ end
210
+ end
211
+
212
+ # alignment directive
213
+ class Align
214
+ # the size to align to
215
+ attr_accessor :val
216
+ # the Data used to pad
217
+ attr_accessor :fillwith
218
+
219
+ include Backtrace
220
+
221
+ def initialize(val, fillwith=nil, backtrace=nil)
222
+ @val, @fillwith, @backtrace = val, fillwith, backtrace
223
+ end
224
+ end
225
+
226
+ # padding directive
227
+ class Padding
228
+ # Data used to pad
229
+ attr_accessor :fillwith
230
+
231
+ include Backtrace
232
+
233
+ def initialize(fillwith=nil, backtrace=nil)
234
+ @fillwith, @backtrace = fillwith, backtrace
235
+ end
236
+ end
237
+
238
+ # offset directive
239
+ # can be used to fix padding length or to assert some code/data compiled length
240
+ class Offset
241
+ # the assembler will arrange to make this pseudo-instruction
242
+ # be at this offset from beginning of current section
243
+ attr_accessor :val
244
+
245
+ include Backtrace
246
+
247
+ def initialize(val, backtrace=nil)
248
+ @val, @backtrace = val, backtrace
249
+ end
250
+ end
251
+
252
+ # the superclass of all real executable formats
253
+ # main methods:
254
+ # self.decode(str) => decodes the file format (imports/relocs/etc), no asm disassembly
255
+ # parse(source) => parses assembler source, fills self.source
256
+ # assemble => assembles self.source in binary sections/segments/whatever
257
+ # encode => builds imports/relocs tables, put all this together, links everything in self.encoded
258
+ class ExeFormat
259
+ # array of Data/Instruction/Align/Padding/Offset/Label, populated in parse
260
+ attr_accessor :cursource
261
+ # contains the binary version of the compiled program (EncodedData)
262
+ attr_accessor :encoded
263
+ # hash of labels generated by new_label
264
+ attr_accessor :unique_labels_cache
265
+
266
+ # initializes self.cpu, creates an empty self.encoded
267
+ def initialize(cpu=nil)
268
+ @cpu = cpu
269
+ @encoded = EncodedData.new
270
+ @unique_labels_cache = {}
271
+ end
272
+
273
+ attr_writer :cpu # custom reader
274
+ def cpu
275
+ @cpu ||= cpu_from_headers
276
+ end
277
+
278
+ # return the label name corresponding to the specified offset of the encodeddata, creates it if necessary
279
+ def label_at(edata, offset, base = '')
280
+ if not l = edata.inv_export[offset]
281
+ edata.add_export(l = new_label(base), offset)
282
+ end
283
+ l
284
+ end
285
+
286
+ # creates a new label, that is guaranteed to never be returned again as long as this object (ExeFormat) exists
287
+ def new_label(base = '')
288
+ base = base.dup.tr('^a-zA-Z0-9_', '_')
289
+ # use %x instead of to_s(16) for negative values
290
+ base = (base << '_uuid' << ('%08x' % base.object_id)).freeze if base.empty? or @unique_labels_cache[base]
291
+ @unique_labels_cache[base] = true
292
+ base
293
+ end
294
+
295
+ # share self.unique_labels_cache with other, checks for conflicts, returns self
296
+ def share_namespace(other)
297
+ return self if other.unique_labels_cache.equal? @unique_labels_cache
298
+ raise "share_ns #{(other.unique_labels_cache.keys & @unique_labels_cache.keys).inspect}" if !(other.unique_labels_cache.keys & @unique_labels_cache.keys).empty?
299
+ @unique_labels_cache.update other.unique_labels_cache
300
+ other.unique_labels_cache = @unique_labels_cache
301
+ self
302
+ end
303
+ end
304
+
305
+ # superclass for classes similar to Expression
306
+ # must define #bind, #reduce_rec, #match_rec, #externals
307
+ class ExpressionType
308
+ def +(o) Expression[self, :+, o].reduce end
309
+ def -(o) Expression[self, :-, o].reduce end
310
+ end
311
+
312
+ # handle immediate values, and arbitrary arithmetic/logic expression involving variables
313
+ # boolean values are treated as in C : true is 1, false is 0
314
+ # TODO replace #type with #size => bits + #type => [:signed/:unsigned/:any/:floating]
315
+ # TODO handle floats
316
+ class Expression < ExpressionType
317
+ INT_SIZE = {}
318
+ INT_MIN = {}
319
+ INT_MAX = {}
320
+
321
+ [8, 16, 32, 64].each { |sz|
322
+ INT_SIZE["i#{sz}".to_sym] =
323
+ INT_SIZE["u#{sz}".to_sym] =
324
+ INT_SIZE["a#{sz}".to_sym] = sz
325
+
326
+ INT_MIN["a#{sz}".to_sym] =
327
+ INT_MIN["i#{sz}".to_sym] = -(1 << (sz-1)) # -0x8000
328
+ INT_MIN["u#{sz}".to_sym] = 0
329
+
330
+ INT_MAX["i#{sz}".to_sym] = (1 << (sz-1)) - 1 # 0x7fff
331
+ INT_MAX["a#{sz}".to_sym] =
332
+ INT_MAX["u#{sz}".to_sym] = (1 << sz) - 1 # 0xffff
333
+ }
334
+
335
+ # alternative constructor
336
+ # in operands order, and allows nesting using sub-arrays
337
+ # ex: Expression[[:-, 42], :*, [1, :+, [4, :*, 7]]]
338
+ # with a single argument, return it if already an Expression, else construct a new one (using unary +/-)
339
+ def self.[](l, op=nil, r=nil)
340
+ if not r # need to shift args
341
+ if not op
342
+ raise ArgumentError, 'invalid Expression[nil]' if not l
343
+ return l if l.kind_of? Expression
344
+ if l.kind_of? ::Numeric and l < 0
345
+ r = -l
346
+ op = :'-'
347
+ else
348
+ r = l
349
+ op = :'+'
350
+ end
351
+ else
352
+ r = op
353
+ op = l
354
+ end
355
+ l = nil
356
+ else
357
+ l = self[*l] if l.kind_of? ::Array
358
+ end
359
+ r = self[*r] if r.kind_of? ::Array
360
+ new(op, r, l)
361
+ end
362
+
363
+ # checks if a given Expression/Integer is in the type range
364
+ # returns true if it is, false if it overflows, and nil if cannot be determined (eg unresolved variable)
365
+ def self.in_range?(val, type)
366
+ val = val.reduce if val.kind_of? self
367
+ return unless val.kind_of? ::Numeric
368
+
369
+ if INT_MIN[type]
370
+ val == val.to_i and
371
+ val >= INT_MIN[type] and val <= INT_MAX[type]
372
+ end
373
+ end
374
+
375
+ # casts an unsigned value to a two-complement signed if the sign bit is set
376
+ def self.make_signed(val, bitlength)
377
+ if val.kind_of? Integer
378
+ val = val - (1 << bitlength) if val >> (bitlength - 1) == 1
379
+ end
380
+ val
381
+ end
382
+
383
+ # the operator (symbol)
384
+ attr_accessor :op
385
+ # the lefthandside expression (nil for unary expressions)
386
+ attr_accessor :lexpr
387
+ # the righthandside expression
388
+ attr_accessor :rexpr
389
+
390
+ # basic constructor
391
+ # XXX funny args order, you should use +Expression[]+ instead
392
+ def initialize(op, rexpr, lexpr)
393
+ raise ArgumentError, "Expression: invalid arg order: #{[lexpr, op, rexpr].inspect}" if not op.kind_of? ::Symbol
394
+ @op, @lexpr, @rexpr = op, lexpr, rexpr
395
+ end
396
+
397
+ # recursive check of equity using #==
398
+ # will not match 1+2 and 2+1
399
+ def ==(o)
400
+ # shortcircuit recursion
401
+ o.object_id == object_id or (o.kind_of?(Expression) and @op == o.op and @lexpr == o.lexpr and @rexpr == o.rexpr)
402
+ end
403
+
404
+ # make it useable as Hash key (see +==+)
405
+ def hash
406
+ (@lexpr.hash + @op.hash + @rexpr.hash) & 0x7fff_ffff
407
+ end
408
+ alias eql? ==
409
+
410
+ # returns a new Expression with all variables found in the binding replaced with their value
411
+ # does not check the binding's key class except for numeric
412
+ # calls lexpr/rexpr #bind if they respond_to? it
413
+ def bind(binding = {})
414
+ if binding[self]
415
+ return binding[self].dup
416
+ end
417
+
418
+ l, r = @lexpr, @rexpr
419
+ if l and binding[l]
420
+ raise "internal error - bound #{l.inspect}" if l.kind_of? ::Numeric
421
+ l = binding[l]
422
+ elsif l.kind_of? ExpressionType
423
+ l = l.bind(binding)
424
+ end
425
+ if r and binding[r]
426
+ raise "internal error - bound #{r.inspect}" if r.kind_of? ::Numeric
427
+ r = binding[r]
428
+ elsif r.kind_of? ExpressionType
429
+ r = r.bind(binding)
430
+ end
431
+ Expression[l, @op, r]
432
+ end
433
+
434
+ # bind in place (replace self.lexpr/self.rexpr with the binding value)
435
+ # only recurse with Expressions (does not use respond_to?)
436
+ def bind!(binding = {})
437
+ if @lexpr.kind_of?(Expression)
438
+ @lexpr.bind!(binding)
439
+ elsif @lexpr
440
+ @lexpr = binding[@lexpr] || @lexpr
441
+ end
442
+ if @rexpr.kind_of?(Expression)
443
+ @rexpr.bind!(binding)
444
+ elsif @rexpr
445
+ @rexpr = binding[@rexpr] || @rexpr
446
+ end
447
+ self
448
+ end
449
+
450
+ # reduce_lambda is a callback called after the standard reduction procedure for custom algorithms
451
+ # the lambda may return a new expression or nil (to keep the old expr)
452
+ # exemple: lambda { |e| e.lexpr if e.kind_of? Expression and e.op == :& and e.rexpr == 0xffff_ffff }
453
+ # returns old lambda
454
+ def self.reduce_lambda(&b)
455
+ old = @@reduce_lambda
456
+ @@reduce_lambda = b if block_given?
457
+ old
458
+ end
459
+ def self.reduce_lambda=(p)
460
+ @@reduce_lambda = p
461
+ end
462
+ @@reduce_lambda = nil
463
+
464
+ # returns a simplified copy of self
465
+ # can return an +Expression+ or a +Numeric+, may return self
466
+ # see +reduce_rec+ for simplifications description
467
+ # if given a block, it will temporarily overwrite the global @@reduce_lambda XXX THIS IS NOT THREADSAFE
468
+ def reduce(&b)
469
+ old_rp, @@reduce_lambda = @@reduce_lambda, b if b
470
+ case e = reduce_rec
471
+ when Expression, Numeric; e
472
+ else Expression[e]
473
+ end
474
+ ensure
475
+ @@reduce_lambda = old_rp if b
476
+ end
477
+
478
+ # resolves logic operations (true || false, etc)
479
+ # computes numeric operations (1 + 3)
480
+ # expands substractions to addition of the opposite
481
+ # reduces double-oppositions (-(-1) => 1)
482
+ # reduces addition of 0 and unary +
483
+ # canonicalize additions: put variables in the lhs, descend addition tree in the rhs => (a + (b + (c + 12)))
484
+ # make formal reduction if finds somewhere in addition tree (a) and (-a)
485
+ def reduce_rec
486
+ l = @lexpr.kind_of?(ExpressionType) ? @lexpr.reduce_rec : @lexpr
487
+ r = @rexpr.kind_of?(ExpressionType) ? @rexpr.reduce_rec : @rexpr
488
+
489
+ if @@reduce_lambda
490
+ l = @@reduce_lambda[l] || l if not @lexpr.kind_of? Expression
491
+ r = @@reduce_lambda[r] || r if not @rexpr.kind_of? Expression
492
+ end
493
+
494
+ v =
495
+ if r.kind_of?(::Numeric) and (l == nil or l.kind_of?(::Numeric))
496
+ # calculate numerics
497
+ if [:'&&', :'||', :'>', :'<', :'>=', :'<=', :'==', :'!='].include?(@op)
498
+ # bool expr
499
+ raise 'internal error' if not l
500
+ case @op
501
+ when :'&&'; (l != 0) && (r != 0)
502
+ when :'||'; (l != 0) || (r != 0)
503
+ when :'>' ; l > r
504
+ when :'>='; l >= r
505
+ when :'<' ; l < r
506
+ when :'<='; l <= r
507
+ when :'=='; l == r
508
+ when :'!='; l != r
509
+ end ? 1 : 0
510
+ elsif not l
511
+ case @op
512
+ when :'!'; (r == 0) ? 1 : 0
513
+ when :+; r
514
+ when :-; -r
515
+ when :~; ~r
516
+ end
517
+ else
518
+ # use ruby evaluator
519
+ l.send(@op, r)
520
+ end
521
+
522
+ elsif @op == :'&&'
523
+ if l == 0 # shortcircuit eval
524
+ 0
525
+ elsif l == 1
526
+ Expression[r, :'!=', 0].reduce_rec
527
+ elsif r == 0
528
+ 0 # XXX l could be a special ExprType with sideeffects ?
529
+ end
530
+ elsif @op == :'||'
531
+ if l.kind_of? ::Numeric and l != 0 # shortcircuit eval
532
+ 1
533
+ elsif l == 0
534
+ Expression[r, :'!=', 0].reduce_rec
535
+ elsif r == 0
536
+ Expression[l, :'!=', 0].reduce_rec
537
+ end
538
+ elsif @op == :>> or @op == :<<
539
+ if l == 0; 0
540
+ elsif r == 0; l
541
+ elsif l.kind_of? Expression and l.op == @op
542
+ Expression[l.lexpr, @op, [l.rexpr, :+, r]].reduce_rec
543
+ # XXX (a >> 1) << 1 != a (lose low bit)
544
+ # XXX (a << 1) >> 1 != a (with real cpus, lose high bit)
545
+ # (a | b) << i
546
+ elsif r.kind_of? Integer and l.kind_of? Expression and [:&, :|, :^].include? l.op
547
+ Expression[[l.lexpr, @op, r], l.op, [l.rexpr, @op, r]].reduce_rec
548
+ end
549
+ elsif @op == :'!'
550
+ if r.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[r.op]
551
+ Expression[r.lexpr, op, r.rexpr].reduce_rec
552
+ end
553
+ elsif @op == :==
554
+ if l == r; 1
555
+ elsif r == 0 and l.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[l.op]
556
+ Expression[l.lexpr, op, l.rexpr].reduce_rec
557
+ elsif r == 1 and l.kind_of? Expression and op = {:'==' => :'!=', :'!=' => :'==', :< => :>=, :> => :<=, :<= => :>, :>= => :<}[l.op]
558
+ l
559
+ elsif r == 0 and l.kind_of? Expression and l.op == :+
560
+ if l.rexpr.kind_of? Expression and l.rexpr.op == :- and not l.rexpr.lexpr
561
+ Expression[l.lexpr, @op, l.rexpr.rexpr].reduce_rec
562
+ elsif l.rexpr.kind_of? ::Integer
563
+ Expression[l.lexpr, @op, -l.rexpr].reduce_rec
564
+ end
565
+ end
566
+ elsif @op == :'!='
567
+ if l == r; 0
568
+ end
569
+ elsif @op == :^
570
+ if l == :unknown or r == :unknown; :unknown
571
+ elsif l == 0; r
572
+ elsif r == 0; l
573
+ elsif l == r; 0
574
+ elsif r == 1 and l.kind_of? Expression and [:'==', :'!=', :<, :>, :<=, :>=].include? l.op
575
+ Expression[nil, :'!', l].reduce_rec
576
+ elsif l.kind_of?(::Numeric)
577
+ if r.kind_of? Expression and r.op == :^
578
+ # 1^(x^y) => x^(y^1)
579
+ Expression[r.lexpr, :^, [r.rexpr, :^, l]].reduce_rec
580
+ else
581
+ # 1^a => a^1
582
+ Expression[r, :^, l].reduce_rec
583
+ end
584
+ elsif l.kind_of? Expression and l.op == :^
585
+ # (a^b)^c => a^(b^c)
586
+ Expression[l.lexpr, :^, [l.rexpr, :^, r]].reduce_rec
587
+ elsif r.kind_of? Expression and r.op == :^
588
+ if r.rexpr == l
589
+ # a^(a^b) => b
590
+ r.lexpr
591
+ elsif r.lexpr == l
592
+ # a^(b^a) => b
593
+ r.rexpr
594
+ else
595
+ # a^(b^(c^(a^d))) => b^(a^(c^(a^d)))
596
+ # XXX ugly..
597
+ tr = r
598
+ found = false
599
+ while not found and tr.kind_of?(Expression) and tr.op == :^
600
+ found = true if tr.lexpr == l or tr.rexpr == l
601
+ tr = tr.rexpr
602
+ end
603
+ if found
604
+ Expression[r.lexpr, :^, [l, :^, r.rexpr]].reduce_rec
605
+ end
606
+ end
607
+ elsif l.kind_of?(Expression) and l.op == :& and l.rexpr.kind_of?(::Integer) and (l.rexpr & (l.rexpr+1)) == 0
608
+ if r.kind_of?(::Integer) and r & l.rexpr == r
609
+ # (a&0xfff)^12 => (a^12)&0xfff
610
+ Expression[[l.lexpr, :^, r], :&, l.rexpr].reduce_rec
611
+ elsif r.kind_of?(Expression) and r.op == :& and r.rexpr.kind_of?(::Integer) and r.rexpr == l.rexpr
612
+ # (a&0xfff)^(b&0xfff) => (a^b)&0xfff
613
+ Expression[[l.lexpr, :^, r.lexpr], :&, l.rexpr].reduce_rec
614
+ end
615
+ end
616
+ elsif @op == :&
617
+ if l == 0 or r == 0; 0
618
+ elsif r == 1 and l.kind_of?(Expression) and [:'==', :'!=', :<, :>, :<=, :>=].include?(l.op)
619
+ l
620
+ elsif l == r; l
621
+ elsif l.kind_of?(Integer); Expression[r, @op, l].reduce_rec
622
+ elsif l.kind_of?(Expression) and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
623
+ elsif l.kind_of?(Expression) and [:|, :^].include?(l.op) and r.kind_of?(Integer) and (l.op == :| or (r & (r+1)) != 0)
624
+ # (a ^| b) & i => (a&i ^| b&i)
625
+ Expression[[l.lexpr, :&, r], l.op, [l.rexpr, :&, r]].reduce_rec
626
+ elsif r.kind_of?(::Integer) and l.kind_of?(Expression) and (r & (r+1)) == 0
627
+ # foo & 0xffff
628
+ reduce_rec_mod2(l, r)
629
+ end
630
+ elsif @op == :|
631
+ if l == 0; r
632
+ elsif r == 0; l
633
+ elsif l == -1 or r == -1; -1
634
+ elsif l == r; l
635
+ elsif l.kind_of? Integer; Expression[r, @op, l].reduce_rec
636
+ elsif l.kind_of? Expression and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
637
+ end
638
+ elsif @op == :*
639
+ if l == 0 or r == 0; 0
640
+ elsif l == 1; r
641
+ elsif r == 1; l
642
+ elsif r.kind_of? Integer; Expression[r, @op, l].reduce_rec
643
+ elsif r.kind_of? Expression and r.op == @op; Expression[[l, @op, r.lexpr], @op, r.rexpr].reduce_rec
644
+ elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :* and r.lexpr.kind_of? Integer; Expression[l*r.lexpr, :*, r.rexpr].reduce_rec # XXX need & regsize..
645
+ elsif l.kind_of? Integer and r.kind_of? Expression and r.op == :+ and r.rexpr.kind_of? Integer; Expression[[l, :*, r.lexpr], :+, l*r.rexpr].reduce_rec
646
+ end
647
+ elsif @op == :/
648
+ if r == 0
649
+ elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :+ and l.rexpr.kind_of? Integer and l.rexpr % r == 0
650
+ Expression[[l.lexpr, :/, r], :+, l.rexpr/r].reduce_rec
651
+ elsif r.kind_of? Integer and l.kind_of? Expression and l.op == :* and l.lexpr % r == 0
652
+ Expression[l.lexpr/r, :*, l.rexpr].reduce_rec
653
+ end
654
+ elsif @op == :-
655
+ if l == :unknown or r == :unknown; :unknown
656
+ elsif not l and r.kind_of? Expression and (r.op == :- or r.op == :+)
657
+ if r.op == :- # no lexpr (reduced)
658
+ # -(-x) => x
659
+ r.rexpr
660
+ else # :+ and lexpr (r is reduced)
661
+ # -(a+b) => (-a)+(-b)
662
+ Expression[[:-, r.lexpr], :+, [:-, r.rexpr]].reduce_rec
663
+ end
664
+ elsif l.kind_of? Expression and l.op == :+ and l.lexpr == r
665
+ # shortcircuit for a common occurence [citation needed]
666
+ # (a+b)-a
667
+ l.rexpr
668
+ elsif l
669
+ # a-b => a+(-b)
670
+ Expression[l, :+, [:-, r]].reduce_rec
671
+ end
672
+ elsif @op == :+
673
+ if l == :unknown or r == :unknown; :unknown
674
+ elsif not l; r # +x => x
675
+ elsif r == 0; l # x+0 => x
676
+ elsif l.kind_of?(::Numeric)
677
+ if r.kind_of? Expression and r.op == :+
678
+ # 1+(x+y) => x+(y+1)
679
+ Expression[r.lexpr, :+, [r.rexpr, :+, l]].reduce_rec
680
+ else
681
+ # 1+a => a+1
682
+ Expression[r, :+, l].reduce_rec
683
+ end
684
+ # (a+b)+foo => a+(b+foo)
685
+ elsif l.kind_of? Expression and l.op == @op; Expression[l.lexpr, @op, [l.rexpr, @op, r]].reduce_rec
686
+ elsif l.kind_of? Expression and r.kind_of? Expression and l.op == :% and r.op == :% and l.rexpr.kind_of?(::Integer) and l.rexpr == r.rexpr
687
+ Expression[[l.lexpr, :+, r.lexpr], :%, l.rexpr].reduce_rec
688
+ else
689
+ reduce_rec_add(l, r)
690
+ end
691
+ end
692
+
693
+ ret = case v
694
+ when nil
695
+ # no dup if no new value
696
+ (r == :unknown or l == :unknown) ? :unknown :
697
+ ((r == @rexpr and l == @lexpr) ? self : Expression[l, @op, r])
698
+ when Expression
699
+ (v.lexpr == :unknown or v.rexpr == :unknown) ? :unknown : v
700
+ else v
701
+ end
702
+ if @@reduce_lambda and ret.kind_of? ExpressionType and newret = @@reduce_lambda[ret] and newret != ret
703
+ if newret.kind_of? ExpressionType
704
+ ret = newret.reduce_rec
705
+ else
706
+ ret = newret
707
+ end
708
+ end
709
+ ret
710
+ end
711
+
712
+
713
+ # a+(b+(c+(-a))) => b+c+0
714
+ # a+((-a)+(b+c)) => 0+b+c
715
+ def reduce_rec_add(l, r)
716
+ if l.kind_of? Expression and l.op == :- and not l.lexpr
717
+ neg_l = l.rexpr
718
+ else
719
+ neg_l = Expression[:-, l]
720
+ end
721
+
722
+ # recursive search & replace -lexpr by 0
723
+ simplifier = lambda { |cur|
724
+ if neg_l == cur
725
+ # -l found
726
+ 0
727
+ elsif cur.kind_of? Expression and cur.op == :+
728
+ # recurse
729
+ if newl = simplifier[cur.lexpr]
730
+ Expression[newl, cur.op, cur.rexpr].reduce_rec
731
+ elsif newr = simplifier[cur.rexpr]
732
+ Expression[cur.lexpr, cur.op, newr].reduce_rec
733
+ end
734
+ end
735
+ }
736
+
737
+ simplifier[r]
738
+ end
739
+
740
+ # expr & 0xffff
741
+ def reduce_rec_mod2(e, mask)
742
+ case e.op
743
+ when :+, :^
744
+ if e.lexpr.kind_of?(Expression) and e.lexpr.op == :& and
745
+ e.lexpr.rexpr.kind_of?(::Integer) and e.lexpr.rexpr & mask == mask
746
+ # ((a&m) + b) & m => (a+b) & m
747
+ Expression[[e.lexpr.lexpr, e.op, e.rexpr], :&, mask].reduce_rec
748
+ elsif e.rexpr.kind_of?(Expression) and e.rexpr.op == :& and
749
+ e.rexpr.rexpr.kind_of?(::Integer) and e.rexpr.rexpr & mask == mask
750
+ # (a + (b&m)) & m => (a+b) & m
751
+ Expression[[e.lexpr, e.op, e.rexpr.lexpr], :&, mask].reduce_rec
752
+ else
753
+ Expression[e, :&, mask]
754
+ end
755
+ when :|
756
+ # rol/ror composition
757
+ reduce_rec_composerol e, mask
758
+ else
759
+ Expression[e, :&, mask]
760
+ end
761
+ end
762
+
763
+ # a check to see if an Expr is the composition of two rotations (rol eax, 4 ; rol eax, 6 => rol eax, 10)
764
+ # this is a bit too ugly to stay in the main reduce_rec body.
765
+ def reduce_rec_composerol(e, mask)
766
+ m = Expression[['var', :sh_op, 'amt'], :|, ['var', :inv_sh_op, 'inv_amt']]
767
+ if vars = e.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and vars[:sh_op] == {:>> => :<<, :<< => :>>}[vars[:inv_sh_op]] and
768
+ ((vars['amt'].kind_of?(::Integer) and vars['inv_amt'].kind_of?(::Integer) and ampl = vars['amt'] + vars['inv_amt']) or
769
+ (vars['amt'].kind_of? Expression and vars['amt'].op == :% and vars['amt'].rexpr.kind_of? ::Integer and
770
+ vars['inv_amt'].kind_of? Expression and vars['inv_amt'].op == :% and vars['amt'].rexpr == vars['inv_amt'].rexpr and ampl = vars['amt'].rexpr)) and
771
+ mask == (1<<ampl)-1 and vars['var'].kind_of? Expression and # it's a rotation
772
+
773
+ vars['var'].op == :& and vars['var'].rexpr == mask and
774
+ ivars = vars['var'].lexpr.match(m, 'var', :sh_op, 'amt', :inv_sh_op, 'inv_amt') and ivars[:sh_op] == {:>> => :<<, :<< => :>>}[ivars[:inv_sh_op]] and
775
+ ((ivars['amt'].kind_of?(::Integer) and ivars['inv_amt'].kind_of?(::Integer) and ampl = ivars['amt'] + ivars['inv_amt']) or
776
+ (ivars['amt'].kind_of? Expression and ivars['amt'].op == :% and ivars['amt'].rexpr.kind_of? ::Integer and
777
+ ivars['inv_amt'].kind_of? Expression and ivars['inv_amt'].op == :% and ivars['amt'].rexpr == ivars['inv_amt'].rexpr and ampl = ivars['amt'].rexpr))
778
+ if ivars[:sh_op] != vars[:sh_op]
779
+ # ensure the rotations are the same orientation
780
+ ivars[:sh_op], ivars[:inv_sh_op] = ivars[:inv_sh_op], ivars[:sh_op]
781
+ ivars['amt'], ivars['inv_amt'] = ivars['inv_amt'], ivars['amt']
782
+ end
783
+ amt = Expression[[vars['amt'], :+, ivars['amt']], :%, ampl]
784
+ invamt = Expression[[vars['inv_amt'], :+, ivars['inv_amt']], :%, ampl]
785
+ Expression[[[[ivars['var'], :&, mask], vars[:sh_op], amt], :|, [[ivars['var'], :&, mask], vars[:inv_sh_op], invamt]], :&, mask].reduce_rec
786
+ else
787
+ Expression[e, :&, mask]
788
+ end
789
+ end
790
+
791
+ # a pattern-matching method
792
+ # Expression[42, :+, 28].match(Expression['any', :+, 28], 'any') => {'any' => 42}
793
+ # Expression[42, :+, 28].match(Expression['any', :+, 'any'], 'any') => false
794
+ # Expression[42, :+, 42].match(Expression['any', :+, 'any'], 'any') => {'any' => 42}
795
+ # vars can match anything except nil
796
+ def match(target, *vars)
797
+ match_rec(target, vars.inject({}) { |h, v| h.update v => nil })
798
+ end
799
+
800
+ def match_rec(target, vars)
801
+ return false if not target.kind_of? Expression
802
+ [target.lexpr, target.op, target.rexpr].zip([@lexpr, @op, @rexpr]) { |targ, exp|
803
+ if targ and vars[targ]
804
+ return false if exp != vars[targ]
805
+ elsif targ and vars.has_key? targ
806
+ return false if not vars[targ] = exp
807
+ elsif targ.kind_of? ExpressionType
808
+ return false if not exp.kind_of? ExpressionType or not exp.match_rec(targ, vars)
809
+ else
810
+ return false if targ != exp
811
+ end
812
+ }
813
+ vars
814
+ end
815
+
816
+ # returns the array of non-numeric members of the expression
817
+ # if a variables appears 3 times, it will be present 3 times in the returned array
818
+ def externals
819
+ [@rexpr, @lexpr].inject([]) { |a, e|
820
+ case e
821
+ when ExpressionType; a.concat e.externals
822
+ when nil, ::Numeric; a
823
+ else a << e
824
+ end
825
+ }
826
+ end
827
+
828
+ # returns the externals that appears in the expression, does not walk through other ExpressionType
829
+ def expr_externals
830
+ [@rexpr, @lexpr].inject([]) { |a, e|
831
+ case e
832
+ when Expression; a.concat e.expr_externals
833
+ when nil, ::Numeric, ExpressionType; a
834
+ else a << e
835
+ end
836
+ }
837
+ end
838
+
839
+ def inspect
840
+ "Expression[#{@lexpr.inspect.sub(/^Expression/, '') + ', ' if @lexpr}#{@op.inspect + ', ' if @lexpr or @op != :+}#{@rexpr.inspect.sub(/^Expression/, '')}]"
841
+ end
842
+
843
+ Unknown = self[:unknown]
844
+ end
845
+
846
+ # an EncodedData relocation, specifies a value to patch in
847
+ class Relocation
848
+ # the relocation value (an Expression)
849
+ attr_accessor :target
850
+ # the relocation expression type
851
+ attr_accessor :type
852
+ # the endianness of the relocation
853
+ attr_accessor :endianness
854
+
855
+ include Backtrace
856
+
857
+ def initialize(target, type, endianness, backtrace = nil)
858
+ raise ArgumentError, "bad args #{[target, type, endianness].inspect}" if not target.kind_of? Expression or not type.kind_of? ::Symbol or not endianness.kind_of? ::Symbol
859
+ @target, @type, @endianness, @backtrace = target, type, endianness, backtrace
860
+ end
861
+
862
+ # fixup the encodeddata with value (reloc starts at off)
863
+ def fixup(edata, off, value)
864
+ str = Expression.encode_imm(value, @type, @endianness, @backtrace)
865
+ edata.fill off
866
+ edata.data[off, str.length] = str
867
+ end
868
+
869
+ # size of the relocation field, in bytes
870
+ def length
871
+ Expression::INT_SIZE[@type]/8
872
+ end
873
+ end
874
+
875
+ # a String-like, with export/relocation informations added
876
+ class EncodedData
877
+ # string with raw data
878
+ attr_accessor :data
879
+ # hash, key = offset within data, value = +Relocation+
880
+ attr_accessor :reloc
881
+ # hash, key = export name, value = offset within data - use add_export to update
882
+ attr_accessor :export
883
+ # hash, key = offset, value = 1st export name
884
+ attr_accessor :inv_export
885
+ # virtual size of data (all 0 by default, see +fill+)
886
+ attr_accessor :virtsize
887
+ # arbitrary pointer, often used when decoding immediates
888
+ # may be initialized with an export value
889
+ attr_reader :ptr # custom writer
890
+ def ptr=(p) @ptr = @export[p] || p end
891
+
892
+ # opts' keys in :reloc, :export, :virtsize, defaults to empty/empty/data.length
893
+ def initialize(data = '', opts={})
894
+ @data = data
895
+ @reloc = opts[:reloc] || {}
896
+ @export = opts[:export] || {}
897
+ @inv_export = @export.invert
898
+ @virtsize = opts[:virtsize] || @data.length
899
+ @ptr = 0
900
+ end
901
+
902
+ def add_export(label, off=@ptr, set_inv=false)
903
+ @export[label] = off
904
+ if set_inv or not @inv_export[off]
905
+ @inv_export[off] = label
906
+ end
907
+ end
908
+
909
+ def del_export(label, off=@ptr)
910
+ @export.delete label
911
+ if e = @export.index(off)
912
+ @inv_export[off] = e
913
+ else
914
+ @inv_export.delete off
915
+ end
916
+ end
917
+
918
+ # returns the size of raw data, that is [data.length, last relocation end].max
919
+ def rawsize
920
+ [@data.length, *@reloc.map { |off, rel| off + rel.length } ].max
921
+ end
922
+ # String-like
923
+ alias length virtsize
924
+ # String-like
925
+ alias size virtsize
926
+
927
+ def empty?
928
+ @virtsize == 0
929
+ end
930
+
931
+ def eos?
932
+ ptr.to_i >= @virtsize
933
+ end
934
+
935
+ # returns a copy of itself, with reloc/export duped (but not deep)
936
+ def dup
937
+ self.class.new @data.dup, :reloc => @reloc.dup, :export => @export.dup, :virtsize => @virtsize
938
+ end
939
+
940
+ # resolve relocations:
941
+ # calculate each reloc target using Expression#bind(binding)
942
+ # if numeric, replace the raw data with the encoding of this value (+fill+s preceding data if needed) and remove the reloc
943
+ # if replace_target is true, the reloc target is replaced with its bound counterpart
944
+ def fixup_choice(binding, replace_target)
945
+ @reloc.keys.each { |off|
946
+ val = @reloc[off].target.bind(binding).reduce
947
+ if val.kind_of? Integer
948
+ reloc = @reloc[off]
949
+ reloc.fixup(self, off, val)
950
+ @reloc.delete(off) # delete only if not overflowed
951
+ elsif replace_target
952
+ @reloc[off].target = val
953
+ end
954
+ }
955
+ end
956
+
957
+ # +fixup_choice+ binding, false
958
+ def fixup(binding)
959
+ fixup_choice(binding, false)
960
+ end
961
+
962
+ # +fixup_choice+ binding, true
963
+ def fixup!(binding)
964
+ fixup_choice(binding, true)
965
+ end
966
+
967
+ # returns a default binding suitable for use in +fixup+
968
+ # every export is expressed as base + offset
969
+ # base defaults to the first export name + its offset
970
+ def binding(base = nil)
971
+ if not base
972
+ key = @export.index(@export.values.min)
973
+ return {} if not key
974
+ base = (@export[key] == 0 ? key : Expression[key, :-, @export[key]])
975
+ end
976
+ @export.inject({}) { |binding, (n, o)| binding.update n => Expression.new(:+, o, base) }
977
+ end
978
+
979
+ # returns an array of variables that needs to be defined for a complete #fixup
980
+ # ie the list of externals for all relocations
981
+ def reloc_externals
982
+ @reloc.values.map { |r| r.target.externals }.flatten.uniq - @export.keys
983
+ end
984
+
985
+ # returns the offset where the relocation for target t is to be applied
986
+ def offset_of_reloc(t)
987
+ t = Expression[t]
988
+ @reloc.keys.find { |off| @reloc[off].target == t }
989
+ end
990
+
991
+ # fill virtual space by repeating pattern (String) up to len
992
+ # expand self if len is larger than self.virtsize
993
+ def fill(len = @virtsize, pattern = [0].pack('C'))
994
+ @virtsize = len if len > @virtsize
995
+ @data = @data.to_str.ljust(len, pattern) if len > @data.length
996
+ end
997
+
998
+ # rounds up virtsize to next multiple of len
999
+ def align(len, pattern=nil)
1000
+ @virtsize = EncodedData.align_size(@virtsize, len)
1001
+ fill(@virtsize, pattern) if pattern
1002
+ end
1003
+
1004
+ # returns the value val rounded up to next multiple of len
1005
+ def self.align_size(val, len)
1006
+ return val if len == 0
1007
+ ((val + len - 1) / len).to_i * len
1008
+ end
1009
+
1010
+ # concatenation of another +EncodedData+ (or nil/Fixnum/anything supporting String#<<)
1011
+ def << other
1012
+ case other
1013
+ when nil
1014
+ when ::Fixnum
1015
+ fill
1016
+ @data = @data.to_str if not @data.kind_of? String
1017
+ @data << other
1018
+ @virtsize += 1
1019
+ when EncodedData
1020
+ fill if not other.data.empty?
1021
+ other.reloc.each { |k, v| @reloc[k + @virtsize] = v } if not other.reloc.empty?
1022
+ if not other.export.empty?
1023
+ other.export.each { |k, v|
1024
+ if @export[k] and @export[k] != v + @virtsize
1025
+ cf = (other.export.keys & @export.keys).find_all { |k_| other.export[k_] != @export[k_] - @virtsize }
1026
+ raise "edata merge: label conflict #{cf.inspect}"
1027
+ end
1028
+ @export[k] = v + @virtsize
1029
+ }
1030
+ other.inv_export.each { |k, v| @inv_export[@virtsize + k] = v }
1031
+ end
1032
+ if @data.empty?; @data = other.data.dup
1033
+ elsif not @data.kind_of?(String); @data = @data.to_str << other.data
1034
+ else @data << other.data
1035
+ end
1036
+ @virtsize += other.virtsize
1037
+ else
1038
+ fill
1039
+ if @data.empty?; @data = other.dup
1040
+ elsif not @data.kind_of?(String); @data = @data.to_str << other
1041
+ else @data << other
1042
+ end
1043
+ @virtsize += other.length
1044
+ end
1045
+
1046
+ self
1047
+ end
1048
+
1049
+ # equivalent to dup << other, filters out Integers & nil
1050
+ def + other
1051
+ raise ArgumentError if not other or other.kind_of?(Integer)
1052
+ dup << other
1053
+ end
1054
+
1055
+ # slice
1056
+ def [](from, len=nil)
1057
+ if not len and from.kind_of? Range
1058
+ b = from.begin
1059
+ e = from.end
1060
+ b = @export[b] if @export[b]
1061
+ e = @export[e] if @export[e]
1062
+ b = b + @virtsize if b < 0
1063
+ e = e + @virtsize if e < 0
1064
+ len = e - b
1065
+ len += 1 if not from.exclude_end?
1066
+ from = b
1067
+ end
1068
+ from = @export[from] if @export[from]
1069
+ from = from + @virtsize if from < 0
1070
+ return if from > @virtsize or from < 0
1071
+
1072
+ return @data[from] if not len
1073
+ len = @virtsize - from if from+len > @virtsize
1074
+ ret = EncodedData.new @data[from, len]
1075
+ ret.virtsize = len
1076
+ @reloc.each { |o, r|
1077
+ ret.reloc[o - from] = r if o >= from and o + r.length <= from+len
1078
+ }
1079
+ @export.each { |e_, o|
1080
+ ret.export[e_] = o - from if o >= from and o <= from+len # XXX include end ?
1081
+ }
1082
+ @inv_export.each { |o, e_|
1083
+ ret.inv_export[o-from] = e_ if o >= from and o <= from+len
1084
+ }
1085
+ ret
1086
+ end
1087
+
1088
+ # slice replacement, supports size change (shifts following relocs/exports)
1089
+ # discards old exports/relocs from the overwritten space
1090
+ def []=(from, len, val=nil)
1091
+ if not val
1092
+ val = len
1093
+ len = nil
1094
+ end
1095
+ if not len and from.kind_of? ::Range
1096
+ b = from.begin
1097
+ e = from.end
1098
+ b = @export[b] if @export[b]
1099
+ e = @export[e] if @export[e]
1100
+ b = b + @virtsize if b < 0
1101
+ e = e + @virtsize if e < 0
1102
+ len = e - b
1103
+ len += 1 if not from.exclude_end?
1104
+ from = b
1105
+ end
1106
+ from = @export[from] || from
1107
+ raise "invalid offset #{from}" if not from.kind_of? ::Integer
1108
+ from = from + @virtsize if from < 0
1109
+
1110
+ if not len
1111
+ val = val.chr if val.kind_of? ::Integer
1112
+ len = val.length
1113
+ end
1114
+ raise "invalid slice length #{len}" if not len.kind_of? ::Integer or len < 0
1115
+
1116
+ if from >= @virtsize
1117
+ len = 0
1118
+ elsif from+len > @virtsize
1119
+ len = @virtsize-from
1120
+ end
1121
+
1122
+ val = EncodedData.new << val
1123
+
1124
+ # remove overwritten metadata
1125
+ @export.delete_if { |name, off| off > from and off < from + len }
1126
+ @reloc.delete_if { |off, rel| off - rel.length > from and off < from + len }
1127
+ # shrink/grow
1128
+ if val.length != len
1129
+ diff = val.length - len
1130
+ @export.keys.each { |name| @export[name] = @export[name] + diff if @export[name] > from }
1131
+ @inv_export.keys.each { |off| @inv_export[off+diff] = @inv_export.delete(off) if off > from }
1132
+ @reloc.keys.each { |off| @reloc[off + diff] = @reloc.delete(off) if off > from }
1133
+ if @virtsize >= from+len
1134
+ @virtsize += diff
1135
+ end
1136
+ end
1137
+
1138
+ @virtsize = from + val.length if @virtsize < from + val.length
1139
+
1140
+ if from + len < @data.length # patch real data
1141
+ val.fill
1142
+ @data[from, len] = val.data
1143
+ elsif not val.data.empty? # patch end of real data
1144
+ @data << ([0].pack('C')*(from-@data.length)) if @data.length < from
1145
+ @data[from..-1] = val.data
1146
+ else # patch end of real data with fully virtual
1147
+ @data = @data[0, from]
1148
+ end
1149
+ val.export.each { |name, off| @export[name] = from + off }
1150
+ val.inv_export.each { |off, name| @inv_export[from+off] = name }
1151
+ val.reloc.each { |off, rel| @reloc[from + off] = rel }
1152
+ end
1153
+
1154
+ # replace a portion of self
1155
+ # from/to may be Integers (offsets) or labels (from self.export)
1156
+ # content is a String or an EncodedData, which will be inserted in the specified location (padded if necessary)
1157
+ # raise if the string does not fit in.
1158
+ def patch(from, to, content)
1159
+ from = @export[from] || from
1160
+ raise "invalid offset specification #{from}" if not from.kind_of? Integer
1161
+ to = @export[to] || to
1162
+ raise "invalid offset specification #{to}" if not to.kind_of? Integer
1163
+ raise EncodeError, 'cannot patch data: new content too long' if to - from < content.length
1164
+ self[from, content.length] = content
1165
+ end
1166
+
1167
+ # returns a list of offsets where /pat/ can be found inside @data
1168
+ # scan is done per chunk of chunksz bytes, with a margin for chunk-overlapping patterns
1169
+ # yields each offset found, and only include it in the result if the block returns !false
1170
+ def pattern_scan(pat, chunksz=nil, margin=nil)
1171
+ chunksz ||= 4*1024*1024 # scan 4MB at a time
1172
+ margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
1173
+ pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String
1174
+
1175
+ found = []
1176
+ chunkoff = 0
1177
+ while chunkoff < @data.length
1178
+ chunk = @data[chunkoff, chunksz+margin].to_str
1179
+ off = 0
1180
+ while match_off = (chunk[off..-1] =~ pat)
1181
+ break if off+match_off >= chunksz # match fully in margin
1182
+ match_addr = chunkoff + off + match_off
1183
+ found << match_addr if not block_given? or yield(match_addr)
1184
+ off += match_off + 1
1185
+ # XXX +1 or +lastmatch.length ?
1186
+ # 'aaaabc'.pattern_scan(/a*bc/) will match 5 times here
1187
+ end
1188
+ chunkoff += chunksz
1189
+ end
1190
+ found
1191
+ end
1192
+ end
1193
+ end