metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,876 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/preprocessor'
9
+
10
+ module Metasm
11
+ class Data
12
+ # keywords for data definition (used to recognize label names)
13
+ DataSpec = %w[db dw dd dq]
14
+ end
15
+
16
+ class CPU
17
+ # parses prefix/name/arguments
18
+ # returns an +Instruction+ or raise a ParseError
19
+ # if the parameter is a String, a custom AsmPP is built - XXX it will not be able to create labels (eg jmp 1b / jmp $)
20
+ def parse_instruction(lexer)
21
+ lexer = new_asmprepro(lexer) if lexer.kind_of? String
22
+
23
+ i = Instruction.new self
24
+
25
+ # find prefixes, break on opcode name
26
+ while tok = lexer.readtok and parse_prefix(i, tok.raw)
27
+ lexer.skip_space_eol
28
+ end
29
+ return if not tok
30
+
31
+ # allow '.' in opcode name
32
+ tok = tok.dup
33
+ while ntok = lexer.nexttok and ntok.type == :punct and ntok.raw == '.'
34
+ tok.raw << lexer.readtok.raw
35
+ ntok = lexer.readtok
36
+ raise tok, 'invalid opcode name' if not ntok or ntok.type != :string
37
+ tok.raw << ntok.raw
38
+ end
39
+
40
+ raise tok, 'invalid opcode' if not opcode_list_byname[tok.raw]
41
+
42
+ i.opname = tok.raw
43
+ i.backtrace = tok.backtrace
44
+ lexer.skip_space
45
+
46
+ # find arguments list
47
+ loop do
48
+ break if not ntok = lexer.nexttok
49
+ break if i.args.empty? and opcode_list_byname[ntok.raw] and opcode_list_byname[i.opname].find { |op| op.args.empty? }
50
+ break if not arg = parse_argument(lexer)
51
+ i.args << arg
52
+ lexer.skip_space
53
+ break if not ntok = lexer.nexttok or ntok.type != :punct or ntok.raw != ','
54
+ lexer.readtok
55
+ lexer.skip_space_eol
56
+ end
57
+
58
+ if not parse_instruction_checkproto(i)
59
+ raise tok, "invalid opcode arguments #{i.to_s.inspect}, allowed : #{opcode_list_byname[i.opname].to_a.map { |o| o.args }.inspect}"
60
+ end
61
+ parse_instruction_fixup(i)
62
+
63
+ i
64
+ end
65
+
66
+ def parse_instruction_checkproto(i)
67
+ opcode_list_byname[i.opname].to_a.find { |o|
68
+ o.args.length == i.args.length and o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) }
69
+ }
70
+ end
71
+
72
+ # called after the instruction is fully parsed
73
+ def parse_instruction_fixup(i)
74
+ end
75
+
76
+ # return false if not a prefix
77
+ def parse_prefix(i, word)
78
+ end
79
+
80
+ # returns a parsed argument
81
+ # add your own arguments parser here (registers, memory references..)
82
+ def parse_argument(lexer)
83
+ Expression.parse(lexer)
84
+ end
85
+
86
+ # handles .instructions
87
+ # XXX handle HLA here ?
88
+ def parse_parser_instruction(lexer, instr)
89
+ raise instr, 'unknown parser instruction'
90
+ end
91
+ end
92
+
93
+ # asm-specific preprocessor
94
+ # handles asm arguments (; ... eol)
95
+ # asm macros (name macro args\nbody endm, name equ val)
96
+ # initializes token.value (reads integers in hex etc)
97
+ # merges consecutive space/eol
98
+ class AsmPreprocessor < Preprocessor
99
+ # an assembler macro, similar to preprocessor macro
100
+ # handles local labels
101
+ class Macro
102
+ attr_accessor :name, :args, :body, :labels
103
+
104
+ def initialize(name)
105
+ @name = name
106
+ @args, @body, @labels = [], [], []
107
+ end
108
+
109
+ # returns the array of token resulting from the application of the macro
110
+ # parses arguments if needed, handles macro-local labels
111
+ def apply(macro, lexer, program)
112
+ args = Preprocessor::Macro.parse_arglist(lexer).to_a
113
+ raise @name, 'invalid argument count' if args.length != @args.length
114
+
115
+ labels = @labels.inject({}) { |h, l| h.update l => program.new_label(l) }
116
+ args = @args.zip(args).inject({}) { |h, (fa, a)| h.update fa.raw => a }
117
+
118
+ # apply macro
119
+ @body.map { |t|
120
+ t = t.dup
121
+ t.backtrace += macro.backtrace[-2..-1] if not macro.backtrace.empty?
122
+ if labels[t.raw]
123
+ t.raw = labels[t.raw]
124
+ t
125
+ elsif args[t.raw]
126
+ # XXX update toks backtrace ?
127
+ args[t.raw]
128
+ else
129
+ t
130
+ end
131
+ }.flatten
132
+ end
133
+
134
+ # parses the argument list and the body from lexer
135
+ # recognize the local labels
136
+ # XXX add eax,
137
+ # toto db 42 ; zomg h4x
138
+ def parse_definition(lexer)
139
+ lexer.skip_space
140
+ while tok = lexer.nexttok and tok.type != :eol
141
+ # no preprocess argument list
142
+ raise @name, 'invalid arg definition' if not tok = lexer.readtok or tok.type != :string
143
+ @args << tok
144
+ lexer.skip_space
145
+ raise @name, 'invalid arg separator' if not tok = lexer.readtok or ((tok.type != :punct or tok.raw != ',') and tok.type != :eol)
146
+ break if tok.type == :eol
147
+ lexer.skip_space
148
+ end
149
+
150
+ lexer.skip_space_eol
151
+ while tok = lexer.readtok and (tok.type != :string or tok.raw != 'endm')
152
+ @body << tok
153
+ if @body[-2] and @body[-2].type == :string and @body[-1].raw == ':' and (not @body[-3] or @body[-3].type == :eol) and @body[-2].raw !~ /^[1-9][0-9]*$/
154
+ @labels << @body[-2].raw
155
+ elsif @body[-3] and @body[-3].type == :string and @body[-2].type == :space and Data::DataSpec.include?(@body[-1].raw) and (not @body[-4] or @body[-4].type == :eol)
156
+ @labels << @body[-3].raw
157
+ end
158
+ end
159
+ end
160
+ end
161
+
162
+ # the program (used to create new label names)
163
+ attr_accessor :program
164
+ # hash macro name => Macro
165
+ attr_accessor :macro
166
+
167
+ def initialize(text='', program=nil)
168
+ @program = program
169
+ @macro = {}
170
+ super(text)
171
+ end
172
+
173
+ def skip_space_eol
174
+ readtok while t = nexttok and (t.type == :space or t.type == :eol)
175
+ end
176
+
177
+ def skip_space
178
+ readtok while t = nexttok and t.type == :space
179
+ end
180
+
181
+ def nexttok
182
+ t = readtok
183
+ unreadtok t
184
+ t
185
+ end
186
+
187
+ # reads a token, handles macros/comments/integers/etc
188
+ # argument is for internal use
189
+ def readtok(rec = false)
190
+ tok = super()
191
+
192
+ # handle ; comments
193
+ if tok and tok.type == :punct and tok.raw == ';'
194
+ tok.type = :eol
195
+ begin
196
+ tok = tok.dup
197
+ while ntok = super() and ntok.type != :eol
198
+ tok.raw << ntok.raw
199
+ end
200
+ tok.raw << ntok.raw if ntok
201
+ rescue ParseError
202
+ # unterminated string
203
+ end
204
+ end
205
+
206
+ # aggregate space/eol
207
+ if tok and (tok.type == :space or tok.type == :eol)
208
+ if ntok = readtok(true) and ntok.type == :space
209
+ tok = tok.dup
210
+ tok.raw << ntok.raw
211
+ elsif ntok and ntok.type == :eol
212
+ tok = tok.dup
213
+ tok.raw << ntok.raw
214
+ tok.type = :eol
215
+ else
216
+ unreadtok ntok
217
+ end
218
+ end
219
+
220
+
221
+ # handle macros
222
+ # the rec parameter is used to avoid reading the whole text at once when reading ahead to check 'macro' keyword
223
+ if not rec and tok and tok.type == :string
224
+ if @macro[tok.raw]
225
+ @macro[tok.raw].apply(tok, self, @program).reverse_each { |t| unreadtok t }
226
+ tok = readtok
227
+
228
+ else
229
+ if ntok = readtok(true) and ntok.type == :space and nntok = readtok(true) and nntok.type == :string and (nntok.raw == 'macro' or nntok.raw == 'equ')
230
+ puts "W: asm: redefinition of macro #{tok.raw} at #{tok.backtrace_str}, previous definition at #{@macro[tok.raw].name.backtrace_str}" if @macro[tok.raw]
231
+ m = Macro.new tok
232
+ # XXX this allows nested macro definition..
233
+ if nntok.raw == 'macro'
234
+ m.parse_definition self
235
+ else
236
+ # equ
237
+ raise nntok if not etok = readtok
238
+ unreadtok etok
239
+ raise nntok if not v = Expression.parse(self)
240
+ etok = etok.dup
241
+ etok.type = :string
242
+ etok.value = v
243
+ etok.raw = v.to_s
244
+ m.body << etok
245
+ end
246
+ @macro[tok.raw] = m
247
+ tok = readtok
248
+ else
249
+ unreadtok nntok
250
+ unreadtok ntok
251
+ end
252
+ end
253
+ end
254
+
255
+ tok
256
+ end
257
+ end
258
+
259
+ class ExeFormat
260
+ # setup self.cursource here
261
+ def parse_init
262
+ @locallabels_bkw ||= {}
263
+ @locallabels_fwd ||= {}
264
+ end
265
+
266
+ # hash mapping local anonymous label number => unique name
267
+ # defined only while parsing
268
+ # usage:
269
+ # jmp 1f
270
+ # 1:
271
+ # jmp 1f
272
+ # jmp 1b
273
+ # 1:
274
+ # defined in #parse, replaced in use by Expression#parse
275
+ # no macro-scope (macro are gsub-like, and no special handling for those labels is done)
276
+ def locallabels_bkw(id)
277
+ @locallabels_bkw[id]
278
+ end
279
+ def locallabels_fwd(id)
280
+ @locallabels_fwd[id] ||= new_label("local_#{id}")
281
+ end
282
+
283
+ # parses an asm source file to an array of Instruction/Data/Align/Offset/Padding
284
+ def parse(text, file='<ruby>', lineno=0)
285
+ parse_init
286
+ @lexer ||= cpu.new_asmprepro('', self)
287
+ @lexer.feed text, file, lineno
288
+ lasteol = true
289
+
290
+ while not @lexer.eos?
291
+ tok = @lexer.readtok
292
+ next if not tok
293
+ case tok.type
294
+ when :space
295
+ when :eol
296
+ lasteol = true
297
+ when :punct
298
+ case tok.raw
299
+ when '.'
300
+ tok = tok.dup
301
+ while ntok = @lexer.nexttok and ((ntok.type == :string) or (ntok.type == :punct and ntok.raw == '.'))
302
+ tok.raw << @lexer.readtok.raw
303
+ end
304
+ parse_parser_instruction tok
305
+ else raise tok, 'syntax error'
306
+ end
307
+ lasteol = false
308
+ when :string
309
+ ntok = nntok = nil
310
+ if lasteol and ((ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ':') or
311
+ (ntok and ntok.type == :space and nntok = @lexer.nexttok and nntok.type == :string and Data::DataSpec.include?(nntok.raw)))
312
+ if tok.raw =~ /^[1-9][0-9]*$/
313
+ # handle anonymous local labels
314
+ lname = @locallabels_bkw[tok.raw] = @locallabels_fwd.delete(tok.raw) || new_label('local_'+tok.raw)
315
+ else
316
+ lname = tok.raw
317
+ raise tok, "label redefinition" if new_label(lname) != lname
318
+ end
319
+ l = Label.new(lname)
320
+ l.backtrace = tok.backtrace
321
+ @cursource << l
322
+ lasteol = false
323
+ else
324
+ lasteol = false
325
+ @lexer.unreadtok ntok
326
+ @lexer.unreadtok tok
327
+ if Data::DataSpec.include?(tok.raw)
328
+ @cursource << parse_data
329
+ else
330
+ @cursource << @cpu.parse_instruction(@lexer)
331
+ end
332
+ if lname = @locallabels_fwd.delete('endinstr')
333
+ l = Label.new(lname)
334
+ l.backtrace = tok.backtrace
335
+ @cursource << l
336
+ end
337
+ end
338
+ else
339
+ raise tok, 'syntax error'
340
+ end
341
+ end
342
+
343
+ puts "Undefined forward reference to anonymous labels #{@locallabels_fwd.keys.inspect}" if $VERBOSE and not @locallabels_fwd.empty?
344
+
345
+ self
346
+ end
347
+
348
+ # create a new label from base, parse it (incl optionnal additionnal src)
349
+ # returns the new label name
350
+ def parse_new_label(base='', src=nil)
351
+ parse_init
352
+ label = new_label(base)
353
+ @cursource << Label.new(label)
354
+ parse src
355
+ label
356
+ end
357
+
358
+ # handles special directives (alignment, changing section, ...)
359
+ # special directives start with a dot
360
+ def parse_parser_instruction(tok)
361
+ case tok.raw.downcase
362
+ when '.align'
363
+ e = Expression.parse(@lexer).reduce
364
+ raise self, 'need immediate alignment size' unless e.kind_of? ::Integer
365
+ @lexer.skip_space
366
+ if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
367
+ @lexer.skip_space_eol
368
+ # allow single byte value or full data statement
369
+ if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
370
+ @lexer.unreadtok ntok
371
+ type = 'db'
372
+ else
373
+ type = ntok.raw
374
+ end
375
+ fillwith = parse_data_data type
376
+ else
377
+ @lexer.unreadtok ntok
378
+ end
379
+ raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
380
+ @cursource << Align.new(e, fillwith, tok.backtrace)
381
+
382
+ when '.pad'
383
+ @lexer.skip_space
384
+ if ntok = @lexer.readtok and ntok.type != :eol
385
+ # allow single byte value or full data statement
386
+ if not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
387
+ @lexer.unreadtok ntok
388
+ type = 'db'
389
+ else
390
+ type = ntok.raw
391
+ end
392
+ fillwith = parse_data_data(type)
393
+ else
394
+ @lexer.unreadtok ntok
395
+ end
396
+ raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
397
+ @cursource << Padding.new(fillwith, tok.backtrace)
398
+
399
+ when '.offset'
400
+ e = Expression.parse(@lexer)
401
+ raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
402
+ @cursource << Offset.new(e, tok.backtrace)
403
+
404
+ when '.padto'
405
+ e = Expression.parse(@lexer)
406
+ @lexer.skip_space
407
+ if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
408
+ @lexer.skip_space
409
+ # allow single byte value or full data statement
410
+ if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
411
+ @lexer.unreadtok ntok
412
+ type = 'db'
413
+ else
414
+ type = ntok.raw
415
+ end
416
+ fillwith = parse_data_data type
417
+ else
418
+ @lexer.unreadtok ntok
419
+ end
420
+ raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
421
+ @cursource << Padding.new(fillwith, tok.backtrace) << Offset.new(e, tok.backtrace)
422
+
423
+ else
424
+ @cpu.parse_parser_instruction(self, tok)
425
+ end
426
+ end
427
+
428
+ def parse_data
429
+ raise ParseError, 'internal error' if not tok = @lexer.readtok
430
+ raise tok, 'invalid data type' if tok.type != :string or not Data::DataSpec.include?(tok.raw)
431
+ type = tok.raw
432
+ @lexer.skip_space_eol
433
+ arr = []
434
+ loop do
435
+ arr << parse_data_data(type)
436
+ @lexer.skip_space
437
+ if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
438
+ @lexer.skip_space_eol
439
+ else
440
+ @lexer.unreadtok ntok
441
+ break
442
+ end
443
+ end
444
+ Data.new(type, arr, 1, tok.backtrace)
445
+ end
446
+
447
+ def parse_data_data(type)
448
+ raise ParseError, 'need data content' if not tok = @lexer.readtok
449
+ if tok.type == :punct and tok.raw == '?'
450
+ Data.new type, :uninitialized, 1, tok.backtrace
451
+ elsif tok.type == :quoted
452
+ Data.new type, tok.value, 1, tok.backtrace
453
+ else
454
+ @lexer.unreadtok tok
455
+ raise tok, 'invalid data' if not i = Expression.parse(@lexer)
456
+ @lexer.skip_space
457
+ if ntok = @lexer.readtok and ntok.type == :string and ntok.raw.downcase == 'dup'
458
+ raise ntok, 'need immediate count expression' unless (count = i.reduce).kind_of? ::Integer
459
+ @lexer.skip_space
460
+ raise ntok, 'syntax error, ( expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != '('
461
+ content = []
462
+ loop do
463
+ content << parse_data_data(type)
464
+ @lexer.skip_space
465
+ if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
466
+ @lexer.skip_space_eol
467
+ else
468
+ @lexer.unreadtok ntok
469
+ break
470
+ end
471
+ end
472
+ raise ntok, 'syntax error, ) expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != ')'
473
+ Data.new type, content, count, tok.backtrace
474
+ else
475
+ @lexer.unreadtok ntok
476
+ Data.new type, i, 1, tok.backtrace
477
+ end
478
+ end
479
+ end
480
+ end
481
+
482
+ class Expression
483
+ # key = operator, value = hash regrouping operators of lower precedence
484
+ OP_PRIO = [[:'||'], [:'&&'], [:|], [:^], [:&], [:'==', :'!='],
485
+ [:'<', :'>', :'<=', :'>='], [:<<, :>>], [:+, :-], [:*, :/, :%]
486
+ ].inject({}) { |h, oplist|
487
+ lessprio = h.keys.inject({}) { |hh, op| hh.update op => true }
488
+ oplist.each { |op| h[op] = lessprio }
489
+ h }
490
+
491
+
492
+ class << self
493
+ # reads an operator from the lexer, returns the corresponding symbol or nil
494
+ def readop(lexer)
495
+ if not tok = lexer.readtok or tok.type != :punct
496
+ lexer.unreadtok tok
497
+ return
498
+ end
499
+
500
+ if tok.value
501
+ if OP_PRIO[tok.value]
502
+ return tok
503
+ else
504
+ lexer.unreadtok tok
505
+ return
506
+ end
507
+ end
508
+
509
+ op = tok
510
+ case op.raw
511
+ # may be followed by itself or '='
512
+ when '>', '<'
513
+ if ntok = lexer.readtok and ntok.type == :punct and (ntok.raw == op.raw or ntok.raw == '=')
514
+ op = op.dup
515
+ op.raw << ntok.raw
516
+ else
517
+ lexer.unreadtok ntok
518
+ end
519
+ # may be followed by itself
520
+ when '|', '&'
521
+ if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == op.raw
522
+ op = op.dup
523
+ op.raw << ntok.raw
524
+ else
525
+ lexer.unreadtok ntok
526
+ end
527
+ # must be followed by '='
528
+ when '!', '='
529
+ if not ntok = lexer.readtok or ntok.type != :punct and ntok.raw != '='
530
+ lexer.unreadtok ntok
531
+ lexer.unreadtok tok
532
+ return
533
+ end
534
+ op = op.dup
535
+ op.raw << ntok.raw
536
+ # ok
537
+ when '^', '+', '-', '*', '/', '%'
538
+ # unknown
539
+ else
540
+ lexer.unreadtok tok
541
+ return
542
+ end
543
+ op.value = op.raw.to_sym
544
+ op
545
+ end
546
+
547
+ # parses floats/hex into tok.value, returns nothing
548
+ # does not parse unary operators (-/+/~)
549
+ def parse_num_value(lexer, tok)
550
+ if not tok.value and tok.raw =~ /^[a-f][0-9a-f]*h$/i
551
+ # warn on variable name like ffffh
552
+ puts "W: Parser: you may want to add a leading 0 to #{tok.raw.inspect} at #{tok.backtrace[-2]}:#{tok.backtrace[-1]}" if $VERBOSE
553
+ end
554
+
555
+ return if tok.value
556
+ return if tok.raw[0] != ?. and !(?0..?9).include? tok.raw[0]
557
+
558
+ case tr = tok.raw.downcase
559
+ when /^0b([01][01_]*)$/, /^([01][01_]*)b$/
560
+ tok.value = $1.to_i(2)
561
+
562
+ when /^(0[0-7][0-7_]*)$/
563
+ tok.value = $1.to_i(8)
564
+
565
+ when /^([0-9][a-f0-9_]*)h$/
566
+ tok.value = $1.to_i(16)
567
+
568
+ when /^0x([a-f0-9][a-f0-9_]*)(u?l?l?|l?l?u?|p([0-9][0-9_]*[fl]?)?)$/, '0x'
569
+ tok.value = $1.to_i(16) if $1
570
+ ntok = lexer.readtok
571
+
572
+ # check for C99 hex float
573
+ if not tr.include? 'p' and ntok and ntok.type == :punct and ntok.raw == '.'
574
+ if not nntok = lexer.readtok or nntok.type != :string
575
+ lexer.unreadtok nntok
576
+ lexer.unreadtok ntok
577
+ return
578
+ end
579
+ # read all pre-mantissa
580
+ tok.raw << ntok.raw
581
+ ntok = nntok
582
+ tok.raw << ntok.raw if ntok
583
+ raise tok, 'invalid hex float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9a-f_]*p([0-9][0-9_]*[fl]?)?$/i
584
+ raise tok, 'invalid hex float' if tok.raw.delete('_').downcase[0,4] == '0x.p' # no digits
585
+ ntok = lexer.readtok
586
+ end
587
+
588
+ if not tok.raw.downcase.include? 'p'
589
+ # standard hex
590
+ lexer.unreadtok ntok
591
+ else
592
+ if tok.raw.downcase[-1] == ?p
593
+ # read signed mantissa
594
+ tok.raw << ntok.raw if ntok
595
+ raise tok, 'invalid hex float' if not ntok or ntok.type == :punct or (ntok.raw != '+' and ntok.raw != '-')
596
+ ntok = lexer.readtok
597
+ tok.raw << ntok.raw if ntok
598
+ raise tok, 'invalid hex float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9][0-9_]*[fl]?$/i
599
+ end
600
+
601
+ raise tok, 'internal error' if not tok.raw.delete('_').downcase =~ /^0x([0-9a-f]*)(?:\.([0-9a-f]*))?p([+-]?[0-9]+)[fl]?$/
602
+ b1, b2, b3 = $1.to_i(16), $2, $3.to_i
603
+ b2 = b2.to_i(16) if b2
604
+ tok.value = b1.to_f
605
+ # tok.value += 1/b2.to_f # TODO
606
+ puts "W: unhandled hex float #{tok.raw}" if $VERBOSE and b2 and b2 != 0
607
+ tok.value *= 2**b3
608
+ puts "hex float: #{tok.raw} => #{tok.value}" if $DEBUG
609
+ end
610
+
611
+ when /^([0-9][0-9_]*)(u?l?l?|l?l?u?|e([0-9][0-9_]*[fl]?)?)$/, '.'
612
+ tok.value = $1.to_i if $1
613
+ ntok = lexer.readtok
614
+ if tok.raw == '.' and (not ntok or ntok.type != :string)
615
+ lexer.unreadtok ntok
616
+ return
617
+ end
618
+
619
+ if not tr.include? 'e' and tr != '.' and ntok and ntok.type == :punct and ntok.raw == '.'
620
+ if not nntok = lexer.readtok or nntok.type != :string
621
+ lexer.unreadtok nntok
622
+ lexer.unreadtok ntok
623
+ return
624
+ end
625
+ # read upto '.'
626
+ tok.raw << ntok.raw
627
+ ntok = nntok
628
+ end
629
+
630
+ if not tok.raw.downcase.include? 'e' and tok.raw[-1] == ?.
631
+ # read fractional part
632
+ tok.raw << ntok.raw if ntok
633
+ raise tok, 'bad float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9_]*(e[0-9_]*)?[fl]?$/i
634
+ ntok = lexer.readtok
635
+ end
636
+
637
+ if tok.raw.downcase[-1] == ?e
638
+ # read signed exponent
639
+ tok.raw << ntok.raw if ntok
640
+ raise tok, 'bad float' if not ntok or ntok.type != :punct or (ntok.raw != '+' and ntok.raw != '-')
641
+ ntok = lexer.readtok
642
+ tok.raw << ntok.raw if ntok
643
+ raise tok, 'bad float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9][0-9_]*[fl]?$/i
644
+ ntok = lexer.readtok
645
+ end
646
+
647
+ lexer.unreadtok ntok
648
+
649
+ if tok.raw.delete('_').downcase =~ /^(?:(?:[0-9]+\.[0-9]*|\.[0-9]+)(?:e[+-]?[0-9]+)?|[0-9]+e[+-]?[0-9]+)[fl]?$/i
650
+ tok.value = tok.raw.to_f
651
+ else
652
+ raise tok, 'internal error' if tok.raw =~ /[e.]/i
653
+ end
654
+
655
+ else raise tok, 'invalid numeric constant'
656
+ end
657
+ end
658
+
659
+ # parses an integer/a float, sets its tok.value, consumes&aggregate necessary following tokens (point, mantissa..)
660
+ # handles $/$$ special asm label name
661
+ # XXX for binary, use _ delimiter or 0b prefix, or start with 0 : 1b may conflict with backward local anonymous label reference
662
+ def parse_intfloat(lexer, tok)
663
+ if not tok.value and tok.raw == '$'
664
+ l = lexer.program.cursource.last
665
+ if not l.kind_of? Label
666
+ l = Label.new(lexer.program.new_label('instr_start'))
667
+ l.backtrace = tok.backtrace
668
+ lexer.program.cursource << l
669
+ end
670
+ tok.value = l.name
671
+ elsif not tok.value and tok.raw == '$$'
672
+ l = lexer.program.cursource.first
673
+ if not l.kind_of? Label
674
+ l = Label.new(lexer.program.new_label('section_start'))
675
+ l.backtrace = tok.backtrace
676
+ lexer.program.cursource.unshift l
677
+ end
678
+ tok.value = l.name
679
+ elsif not tok.value and tok.raw == '$_'
680
+ tok.value = lexer.program.locallabels_fwd('endinstr')
681
+ elsif not tok.value and tok.raw =~ /^([1-9][0-9]*)([fb])$/
682
+ case $2
683
+ when 'b'; tok.value = lexer.program.locallabels_bkw($1) # may fallback to binary parser
684
+ when 'f'; tok.value = lexer.program.locallabels_fwd($1)
685
+ end
686
+ end
687
+
688
+ parse_num_value(lexer, tok)
689
+ end
690
+
691
+ # returns the next value from lexer (parenthesised expression, immediate, variable, unary operators)
692
+ def parse_value(lexer)
693
+ nil while tok = lexer.readtok and tok.type == :space
694
+ return if not tok
695
+ case tok.type
696
+ when :string
697
+ # ignores the 'offset' word if followed by a string
698
+ if not tok.value and tok.raw.downcase == 'offset'
699
+ nil while ntok = lexer.readtok and ntok.type == :space
700
+ if ntok.type == :string; tok = ntok
701
+ else lexer.unreadtok ntok
702
+ end
703
+ end
704
+ parse_intfloat(lexer, tok)
705
+ val = tok.value || tok.raw
706
+ when :quoted
707
+ if tok.raw[0] != ?'
708
+ lexer.unreadtok tok
709
+ return
710
+ end
711
+ s = tok.value || tok.raw[1..-2] # raise tok, 'need ppcessing !'
712
+ s = s.reverse if lexer.respond_to? :program and lexer.program and lexer.program.cpu and lexer.program.cpu.endianness == :little
713
+ val = s.unpack('C*').inject(0) { |sum, c| (sum << 8) | c }
714
+ when :punct
715
+ case tok.raw
716
+ when '('
717
+ nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
718
+ lexer.unreadtok ntok
719
+ val = parse(lexer)
720
+ nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
721
+ raise tok, "syntax error, no ) found after #{val.inspect}, got #{ntok.inspect}" if not ntok or ntok.type != :punct or ntok.raw != ')'
722
+ when '!', '+', '-', '~'
723
+ nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
724
+ lexer.unreadtok ntok
725
+ raise tok, 'need expression after unary operator' if not val = parse_value(lexer)
726
+ val = Expression[tok.raw.to_sym, val]
727
+ when '.'
728
+ parse_intfloat(lexer, tok)
729
+ if not tok.value
730
+ lexer.unreadtok tok
731
+ return
732
+ end
733
+ val = tok.value
734
+ else
735
+ lexer.unreadtok tok
736
+ return
737
+ end
738
+ else
739
+ lexer.unreadtok tok
740
+ return
741
+ end
742
+ nil while tok = lexer.readtok and tok.type == :space
743
+ lexer.unreadtok tok
744
+ val
745
+ end
746
+
747
+ # for boolean operators, true is 1 (or anything != 0), false is 0
748
+ def parse(lexer)
749
+ opstack = []
750
+ stack = []
751
+
752
+ return if not e = parse_value(lexer)
753
+
754
+ stack << e
755
+
756
+ while op = readop(lexer)
757
+ nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
758
+ lexer.unreadtok ntok
759
+ until opstack.empty? or OP_PRIO[op.value][opstack.last]
760
+ stack << new(opstack.pop, stack.pop, stack.pop)
761
+ end
762
+
763
+ opstack << op.value
764
+
765
+ raise op, 'need rhs' if not e = parse_value(lexer)
766
+
767
+ stack << e
768
+ end
769
+
770
+ until opstack.empty?
771
+ stack << new(opstack.pop, stack.pop, stack.pop)
772
+ end
773
+
774
+ Expression[stack.first]
775
+ end
776
+
777
+ # parse an expression in a string
778
+ # updates the string to point after the parsed expression
779
+ def parse_string!(str, &b)
780
+ pp = Preprocessor.new(str)
781
+
782
+ e = parse(pp, &b)
783
+
784
+ # update arg
785
+ len = pp.pos
786
+ pp.queue.each { |t| len -= t.raw.length }
787
+ str[0, len] = ''
788
+
789
+ e
790
+ end
791
+
792
+ # parse an expression in a string
793
+ def parse_string(str, &b)
794
+ parse(Preprocessor.new(str), &b)
795
+ end
796
+ end
797
+ end
798
+
799
+ # an Expression whose ::parser handles indirection (byte ptr [foobar])
800
+ class IndExpression < Expression
801
+ class << self
802
+ def parse_value(lexer)
803
+ sz = nil
804
+ ptr = nil
805
+ loop do
806
+ nil while tok = lexer.readtok and tok.type == :space
807
+ return if not tok
808
+ case tok.raw
809
+ when 'qword'; sz=8
810
+ when 'dword'; sz=4
811
+ when 'word'; sz=2
812
+ when 'byte'; sz=1
813
+ when 'ptr'
814
+ when '['
815
+ ptr = parse(lexer)
816
+ nil while tok = lexer.readtok and tok.type == :space
817
+ raise tok || lexer, '] expected' if tok.raw != ']'
818
+ break
819
+ when '*'
820
+ ptr = parse_value(lexer)
821
+ break
822
+ when ':' # symbols, eg ':eax'
823
+ n = lexer.readtok
824
+ return n.raw.to_sym
825
+ else
826
+ lexer.unreadtok tok
827
+ break
828
+ end
829
+ end
830
+ raise lexer, 'invalid indirection' if sz and not ptr
831
+ if ptr; Indirection[ptr, sz] # if sz is nil, default cpu pointersz is set in resolve_expr
832
+ else super(lexer)
833
+ end
834
+ end
835
+
836
+ def parse(*a, &b)
837
+ # custom decimal converter
838
+ @parse_cb = b if b
839
+ e = super(*a)
840
+ @parse_cb = nil if b
841
+ e
842
+ end
843
+
844
+ # callback used to customize the parsing of /^([0-9]+)$/ tokens
845
+ # implicitely set by parse(expr) { cb }
846
+ # allows eg parsing '40000' as 0x40000 when relevant
847
+ attr_accessor :parse_cb
848
+
849
+ def parse_intfloat(lexer, tok)
850
+ case tok.raw
851
+ when /^([0-9]+)$/; tok.value = parse_cb ? @parse_cb[$1] : $1.to_i
852
+ when /^0x([0-9a-f]+)$/i, /^([0-9a-f]+)h$/i; tok.value = $1.to_i(16)
853
+ when /^0b([01]+)$/i; tok.value = $1.to_i(2)
854
+ end
855
+ end
856
+
857
+ def readop(lexer)
858
+ if t0 = lexer.readtok and t0.raw == '-' and t1 = lexer.readtok and t1.raw == '>'
859
+ op = t0.dup
860
+ op.raw << t1.raw
861
+ op.value = op.raw.to_sym
862
+ op
863
+ else
864
+ lexer.unreadtok t1
865
+ lexer.unreadtok t0
866
+ super(lexer)
867
+ end
868
+ end
869
+
870
+ def new(op, r, l)
871
+ return Indirection[[l, :+, r], nil] if op == :'->'
872
+ super(op, r, l)
873
+ end
874
+ end
875
+ end
876
+ end