metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,876 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/preprocessor'
9
+
10
+ module Metasm
11
+ class Data
12
+ # keywords for data definition (used to recognize label names)
13
+ DataSpec = %w[db dw dd dq]
14
+ end
15
+
16
+ class CPU
17
+ # parses prefix/name/arguments
18
+ # returns an +Instruction+ or raise a ParseError
19
+ # if the parameter is a String, a custom AsmPP is built - XXX it will not be able to create labels (eg jmp 1b / jmp $)
20
+ def parse_instruction(lexer)
21
+ lexer = new_asmprepro(lexer) if lexer.kind_of? String
22
+
23
+ i = Instruction.new self
24
+
25
+ # find prefixes, break on opcode name
26
+ while tok = lexer.readtok and parse_prefix(i, tok.raw)
27
+ lexer.skip_space_eol
28
+ end
29
+ return if not tok
30
+
31
+ # allow '.' in opcode name
32
+ tok = tok.dup
33
+ while ntok = lexer.nexttok and ntok.type == :punct and ntok.raw == '.'
34
+ tok.raw << lexer.readtok.raw
35
+ ntok = lexer.readtok
36
+ raise tok, 'invalid opcode name' if not ntok or ntok.type != :string
37
+ tok.raw << ntok.raw
38
+ end
39
+
40
+ raise tok, 'invalid opcode' if not opcode_list_byname[tok.raw]
41
+
42
+ i.opname = tok.raw
43
+ i.backtrace = tok.backtrace
44
+ lexer.skip_space
45
+
46
+ # find arguments list
47
+ loop do
48
+ break if not ntok = lexer.nexttok
49
+ break if i.args.empty? and opcode_list_byname[ntok.raw] and opcode_list_byname[i.opname].find { |op| op.args.empty? }
50
+ break if not arg = parse_argument(lexer)
51
+ i.args << arg
52
+ lexer.skip_space
53
+ break if not ntok = lexer.nexttok or ntok.type != :punct or ntok.raw != ','
54
+ lexer.readtok
55
+ lexer.skip_space_eol
56
+ end
57
+
58
+ if not parse_instruction_checkproto(i)
59
+ raise tok, "invalid opcode arguments #{i.to_s.inspect}, allowed : #{opcode_list_byname[i.opname].to_a.map { |o| o.args }.inspect}"
60
+ end
61
+ parse_instruction_fixup(i)
62
+
63
+ i
64
+ end
65
+
66
+ def parse_instruction_checkproto(i)
67
+ opcode_list_byname[i.opname].to_a.find { |o|
68
+ o.args.length == i.args.length and o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) }
69
+ }
70
+ end
71
+
72
+ # called after the instruction is fully parsed
73
+ def parse_instruction_fixup(i)
74
+ end
75
+
76
+ # return false if not a prefix
77
+ def parse_prefix(i, word)
78
+ end
79
+
80
+ # returns a parsed argument
81
+ # add your own arguments parser here (registers, memory references..)
82
+ def parse_argument(lexer)
83
+ Expression.parse(lexer)
84
+ end
85
+
86
+ # handles .instructions
87
+ # XXX handle HLA here ?
88
+ def parse_parser_instruction(lexer, instr)
89
+ raise instr, 'unknown parser instruction'
90
+ end
91
+ end
92
+
93
+ # asm-specific preprocessor
94
+ # handles asm arguments (; ... eol)
95
+ # asm macros (name macro args\nbody endm, name equ val)
96
+ # initializes token.value (reads integers in hex etc)
97
+ # merges consecutive space/eol
98
+ class AsmPreprocessor < Preprocessor
99
+ # an assembler macro, similar to preprocessor macro
100
+ # handles local labels
101
+ class Macro
102
+ attr_accessor :name, :args, :body, :labels
103
+
104
+ def initialize(name)
105
+ @name = name
106
+ @args, @body, @labels = [], [], []
107
+ end
108
+
109
+ # returns the array of token resulting from the application of the macro
110
+ # parses arguments if needed, handles macro-local labels
111
+ def apply(macro, lexer, program)
112
+ args = Preprocessor::Macro.parse_arglist(lexer).to_a
113
+ raise @name, 'invalid argument count' if args.length != @args.length
114
+
115
+ labels = @labels.inject({}) { |h, l| h.update l => program.new_label(l) }
116
+ args = @args.zip(args).inject({}) { |h, (fa, a)| h.update fa.raw => a }
117
+
118
+ # apply macro
119
+ @body.map { |t|
120
+ t = t.dup
121
+ t.backtrace += macro.backtrace[-2..-1] if not macro.backtrace.empty?
122
+ if labels[t.raw]
123
+ t.raw = labels[t.raw]
124
+ t
125
+ elsif args[t.raw]
126
+ # XXX update toks backtrace ?
127
+ args[t.raw]
128
+ else
129
+ t
130
+ end
131
+ }.flatten
132
+ end
133
+
134
+ # parses the argument list and the body from lexer
135
+ # recognize the local labels
136
+ # XXX add eax,
137
+ # toto db 42 ; zomg h4x
138
+ def parse_definition(lexer)
139
+ lexer.skip_space
140
+ while tok = lexer.nexttok and tok.type != :eol
141
+ # no preprocess argument list
142
+ raise @name, 'invalid arg definition' if not tok = lexer.readtok or tok.type != :string
143
+ @args << tok
144
+ lexer.skip_space
145
+ raise @name, 'invalid arg separator' if not tok = lexer.readtok or ((tok.type != :punct or tok.raw != ',') and tok.type != :eol)
146
+ break if tok.type == :eol
147
+ lexer.skip_space
148
+ end
149
+
150
+ lexer.skip_space_eol
151
+ while tok = lexer.readtok and (tok.type != :string or tok.raw != 'endm')
152
+ @body << tok
153
+ if @body[-2] and @body[-2].type == :string and @body[-1].raw == ':' and (not @body[-3] or @body[-3].type == :eol) and @body[-2].raw !~ /^[1-9][0-9]*$/
154
+ @labels << @body[-2].raw
155
+ elsif @body[-3] and @body[-3].type == :string and @body[-2].type == :space and Data::DataSpec.include?(@body[-1].raw) and (not @body[-4] or @body[-4].type == :eol)
156
+ @labels << @body[-3].raw
157
+ end
158
+ end
159
+ end
160
+ end
161
+
162
+ # the program (used to create new label names)
163
+ attr_accessor :program
164
+ # hash macro name => Macro
165
+ attr_accessor :macro
166
+
167
+ def initialize(text='', program=nil)
168
+ @program = program
169
+ @macro = {}
170
+ super(text)
171
+ end
172
+
173
+ def skip_space_eol
174
+ readtok while t = nexttok and (t.type == :space or t.type == :eol)
175
+ end
176
+
177
+ def skip_space
178
+ readtok while t = nexttok and t.type == :space
179
+ end
180
+
181
+ def nexttok
182
+ t = readtok
183
+ unreadtok t
184
+ t
185
+ end
186
+
187
+ # reads a token, handles macros/comments/integers/etc
188
+ # argument is for internal use
189
+ def readtok(rec = false)
190
+ tok = super()
191
+
192
+ # handle ; comments
193
+ if tok and tok.type == :punct and tok.raw == ';'
194
+ tok.type = :eol
195
+ begin
196
+ tok = tok.dup
197
+ while ntok = super() and ntok.type != :eol
198
+ tok.raw << ntok.raw
199
+ end
200
+ tok.raw << ntok.raw if ntok
201
+ rescue ParseError
202
+ # unterminated string
203
+ end
204
+ end
205
+
206
+ # aggregate space/eol
207
+ if tok and (tok.type == :space or tok.type == :eol)
208
+ if ntok = readtok(true) and ntok.type == :space
209
+ tok = tok.dup
210
+ tok.raw << ntok.raw
211
+ elsif ntok and ntok.type == :eol
212
+ tok = tok.dup
213
+ tok.raw << ntok.raw
214
+ tok.type = :eol
215
+ else
216
+ unreadtok ntok
217
+ end
218
+ end
219
+
220
+
221
+ # handle macros
222
+ # the rec parameter is used to avoid reading the whole text at once when reading ahead to check 'macro' keyword
223
+ if not rec and tok and tok.type == :string
224
+ if @macro[tok.raw]
225
+ @macro[tok.raw].apply(tok, self, @program).reverse_each { |t| unreadtok t }
226
+ tok = readtok
227
+
228
+ else
229
+ if ntok = readtok(true) and ntok.type == :space and nntok = readtok(true) and nntok.type == :string and (nntok.raw == 'macro' or nntok.raw == 'equ')
230
+ puts "W: asm: redefinition of macro #{tok.raw} at #{tok.backtrace_str}, previous definition at #{@macro[tok.raw].name.backtrace_str}" if @macro[tok.raw]
231
+ m = Macro.new tok
232
+ # XXX this allows nested macro definition..
233
+ if nntok.raw == 'macro'
234
+ m.parse_definition self
235
+ else
236
+ # equ
237
+ raise nntok if not etok = readtok
238
+ unreadtok etok
239
+ raise nntok if not v = Expression.parse(self)
240
+ etok = etok.dup
241
+ etok.type = :string
242
+ etok.value = v
243
+ etok.raw = v.to_s
244
+ m.body << etok
245
+ end
246
+ @macro[tok.raw] = m
247
+ tok = readtok
248
+ else
249
+ unreadtok nntok
250
+ unreadtok ntok
251
+ end
252
+ end
253
+ end
254
+
255
+ tok
256
+ end
257
+ end
258
+
259
+ class ExeFormat
260
+ # setup self.cursource here
261
+ def parse_init
262
+ @locallabels_bkw ||= {}
263
+ @locallabels_fwd ||= {}
264
+ end
265
+
266
+ # hash mapping local anonymous label number => unique name
267
+ # defined only while parsing
268
+ # usage:
269
+ # jmp 1f
270
+ # 1:
271
+ # jmp 1f
272
+ # jmp 1b
273
+ # 1:
274
+ # defined in #parse, replaced in use by Expression#parse
275
+ # no macro-scope (macro are gsub-like, and no special handling for those labels is done)
276
+ def locallabels_bkw(id)
277
+ @locallabels_bkw[id]
278
+ end
279
+ def locallabels_fwd(id)
280
+ @locallabels_fwd[id] ||= new_label("local_#{id}")
281
+ end
282
+
283
+ # parses an asm source file to an array of Instruction/Data/Align/Offset/Padding
284
+ def parse(text, file='<ruby>', lineno=0)
285
+ parse_init
286
+ @lexer ||= cpu.new_asmprepro('', self)
287
+ @lexer.feed text, file, lineno
288
+ lasteol = true
289
+
290
+ while not @lexer.eos?
291
+ tok = @lexer.readtok
292
+ next if not tok
293
+ case tok.type
294
+ when :space
295
+ when :eol
296
+ lasteol = true
297
+ when :punct
298
+ case tok.raw
299
+ when '.'
300
+ tok = tok.dup
301
+ while ntok = @lexer.nexttok and ((ntok.type == :string) or (ntok.type == :punct and ntok.raw == '.'))
302
+ tok.raw << @lexer.readtok.raw
303
+ end
304
+ parse_parser_instruction tok
305
+ else raise tok, 'syntax error'
306
+ end
307
+ lasteol = false
308
+ when :string
309
+ ntok = nntok = nil
310
+ if lasteol and ((ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ':') or
311
+ (ntok and ntok.type == :space and nntok = @lexer.nexttok and nntok.type == :string and Data::DataSpec.include?(nntok.raw)))
312
+ if tok.raw =~ /^[1-9][0-9]*$/
313
+ # handle anonymous local labels
314
+ lname = @locallabels_bkw[tok.raw] = @locallabels_fwd.delete(tok.raw) || new_label('local_'+tok.raw)
315
+ else
316
+ lname = tok.raw
317
+ raise tok, "label redefinition" if new_label(lname) != lname
318
+ end
319
+ l = Label.new(lname)
320
+ l.backtrace = tok.backtrace
321
+ @cursource << l
322
+ lasteol = false
323
+ else
324
+ lasteol = false
325
+ @lexer.unreadtok ntok
326
+ @lexer.unreadtok tok
327
+ if Data::DataSpec.include?(tok.raw)
328
+ @cursource << parse_data
329
+ else
330
+ @cursource << @cpu.parse_instruction(@lexer)
331
+ end
332
+ if lname = @locallabels_fwd.delete('endinstr')
333
+ l = Label.new(lname)
334
+ l.backtrace = tok.backtrace
335
+ @cursource << l
336
+ end
337
+ end
338
+ else
339
+ raise tok, 'syntax error'
340
+ end
341
+ end
342
+
343
+ puts "Undefined forward reference to anonymous labels #{@locallabels_fwd.keys.inspect}" if $VERBOSE and not @locallabels_fwd.empty?
344
+
345
+ self
346
+ end
347
+
348
+ # create a new label from base, parse it (incl optionnal additionnal src)
349
+ # returns the new label name
350
+ def parse_new_label(base='', src=nil)
351
+ parse_init
352
+ label = new_label(base)
353
+ @cursource << Label.new(label)
354
+ parse src
355
+ label
356
+ end
357
+
358
+ # handles special directives (alignment, changing section, ...)
359
+ # special directives start with a dot
360
+ def parse_parser_instruction(tok)
361
+ case tok.raw.downcase
362
+ when '.align'
363
+ e = Expression.parse(@lexer).reduce
364
+ raise self, 'need immediate alignment size' unless e.kind_of? ::Integer
365
+ @lexer.skip_space
366
+ if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
367
+ @lexer.skip_space_eol
368
+ # allow single byte value or full data statement
369
+ if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
370
+ @lexer.unreadtok ntok
371
+ type = 'db'
372
+ else
373
+ type = ntok.raw
374
+ end
375
+ fillwith = parse_data_data type
376
+ else
377
+ @lexer.unreadtok ntok
378
+ end
379
+ raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
380
+ @cursource << Align.new(e, fillwith, tok.backtrace)
381
+
382
+ when '.pad'
383
+ @lexer.skip_space
384
+ if ntok = @lexer.readtok and ntok.type != :eol
385
+ # allow single byte value or full data statement
386
+ if not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
387
+ @lexer.unreadtok ntok
388
+ type = 'db'
389
+ else
390
+ type = ntok.raw
391
+ end
392
+ fillwith = parse_data_data(type)
393
+ else
394
+ @lexer.unreadtok ntok
395
+ end
396
+ raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
397
+ @cursource << Padding.new(fillwith, tok.backtrace)
398
+
399
+ when '.offset'
400
+ e = Expression.parse(@lexer)
401
+ raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
402
+ @cursource << Offset.new(e, tok.backtrace)
403
+
404
+ when '.padto'
405
+ e = Expression.parse(@lexer)
406
+ @lexer.skip_space
407
+ if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
408
+ @lexer.skip_space
409
+ # allow single byte value or full data statement
410
+ if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
411
+ @lexer.unreadtok ntok
412
+ type = 'db'
413
+ else
414
+ type = ntok.raw
415
+ end
416
+ fillwith = parse_data_data type
417
+ else
418
+ @lexer.unreadtok ntok
419
+ end
420
+ raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
421
+ @cursource << Padding.new(fillwith, tok.backtrace) << Offset.new(e, tok.backtrace)
422
+
423
+ else
424
+ @cpu.parse_parser_instruction(self, tok)
425
+ end
426
+ end
427
+
428
+ def parse_data
429
+ raise ParseError, 'internal error' if not tok = @lexer.readtok
430
+ raise tok, 'invalid data type' if tok.type != :string or not Data::DataSpec.include?(tok.raw)
431
+ type = tok.raw
432
+ @lexer.skip_space_eol
433
+ arr = []
434
+ loop do
435
+ arr << parse_data_data(type)
436
+ @lexer.skip_space
437
+ if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
438
+ @lexer.skip_space_eol
439
+ else
440
+ @lexer.unreadtok ntok
441
+ break
442
+ end
443
+ end
444
+ Data.new(type, arr, 1, tok.backtrace)
445
+ end
446
+
447
+ def parse_data_data(type)
448
+ raise ParseError, 'need data content' if not tok = @lexer.readtok
449
+ if tok.type == :punct and tok.raw == '?'
450
+ Data.new type, :uninitialized, 1, tok.backtrace
451
+ elsif tok.type == :quoted
452
+ Data.new type, tok.value, 1, tok.backtrace
453
+ else
454
+ @lexer.unreadtok tok
455
+ raise tok, 'invalid data' if not i = Expression.parse(@lexer)
456
+ @lexer.skip_space
457
+ if ntok = @lexer.readtok and ntok.type == :string and ntok.raw.downcase == 'dup'
458
+ raise ntok, 'need immediate count expression' unless (count = i.reduce).kind_of? ::Integer
459
+ @lexer.skip_space
460
+ raise ntok, 'syntax error, ( expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != '('
461
+ content = []
462
+ loop do
463
+ content << parse_data_data(type)
464
+ @lexer.skip_space
465
+ if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
466
+ @lexer.skip_space_eol
467
+ else
468
+ @lexer.unreadtok ntok
469
+ break
470
+ end
471
+ end
472
+ raise ntok, 'syntax error, ) expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != ')'
473
+ Data.new type, content, count, tok.backtrace
474
+ else
475
+ @lexer.unreadtok ntok
476
+ Data.new type, i, 1, tok.backtrace
477
+ end
478
+ end
479
+ end
480
+ end
481
+
482
+ class Expression
483
+ # key = operator, value = hash regrouping operators of lower precedence
484
+ OP_PRIO = [[:'||'], [:'&&'], [:|], [:^], [:&], [:'==', :'!='],
485
+ [:'<', :'>', :'<=', :'>='], [:<<, :>>], [:+, :-], [:*, :/, :%]
486
+ ].inject({}) { |h, oplist|
487
+ lessprio = h.keys.inject({}) { |hh, op| hh.update op => true }
488
+ oplist.each { |op| h[op] = lessprio }
489
+ h }
490
+
491
+
492
+ class << self
493
+ # reads an operator from the lexer, returns the corresponding symbol or nil
494
+ def readop(lexer)
495
+ if not tok = lexer.readtok or tok.type != :punct
496
+ lexer.unreadtok tok
497
+ return
498
+ end
499
+
500
+ if tok.value
501
+ if OP_PRIO[tok.value]
502
+ return tok
503
+ else
504
+ lexer.unreadtok tok
505
+ return
506
+ end
507
+ end
508
+
509
+ op = tok
510
+ case op.raw
511
+ # may be followed by itself or '='
512
+ when '>', '<'
513
+ if ntok = lexer.readtok and ntok.type == :punct and (ntok.raw == op.raw or ntok.raw == '=')
514
+ op = op.dup
515
+ op.raw << ntok.raw
516
+ else
517
+ lexer.unreadtok ntok
518
+ end
519
+ # may be followed by itself
520
+ when '|', '&'
521
+ if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == op.raw
522
+ op = op.dup
523
+ op.raw << ntok.raw
524
+ else
525
+ lexer.unreadtok ntok
526
+ end
527
+ # must be followed by '='
528
+ when '!', '='
529
+ if not ntok = lexer.readtok or ntok.type != :punct and ntok.raw != '='
530
+ lexer.unreadtok ntok
531
+ lexer.unreadtok tok
532
+ return
533
+ end
534
+ op = op.dup
535
+ op.raw << ntok.raw
536
+ # ok
537
+ when '^', '+', '-', '*', '/', '%'
538
+ # unknown
539
+ else
540
+ lexer.unreadtok tok
541
+ return
542
+ end
543
+ op.value = op.raw.to_sym
544
+ op
545
+ end
546
+
547
+ # parses floats/hex into tok.value, returns nothing
548
+ # does not parse unary operators (-/+/~)
549
+ def parse_num_value(lexer, tok)
550
+ if not tok.value and tok.raw =~ /^[a-f][0-9a-f]*h$/i
551
+ # warn on variable name like ffffh
552
+ puts "W: Parser: you may want to add a leading 0 to #{tok.raw.inspect} at #{tok.backtrace[-2]}:#{tok.backtrace[-1]}" if $VERBOSE
553
+ end
554
+
555
+ return if tok.value
556
+ return if tok.raw[0] != ?. and !(?0..?9).include? tok.raw[0]
557
+
558
+ case tr = tok.raw.downcase
559
+ when /^0b([01][01_]*)$/, /^([01][01_]*)b$/
560
+ tok.value = $1.to_i(2)
561
+
562
+ when /^(0[0-7][0-7_]*)$/
563
+ tok.value = $1.to_i(8)
564
+
565
+ when /^([0-9][a-f0-9_]*)h$/
566
+ tok.value = $1.to_i(16)
567
+
568
+ when /^0x([a-f0-9][a-f0-9_]*)(u?l?l?|l?l?u?|p([0-9][0-9_]*[fl]?)?)$/, '0x'
569
+ tok.value = $1.to_i(16) if $1
570
+ ntok = lexer.readtok
571
+
572
+ # check for C99 hex float
573
+ if not tr.include? 'p' and ntok and ntok.type == :punct and ntok.raw == '.'
574
+ if not nntok = lexer.readtok or nntok.type != :string
575
+ lexer.unreadtok nntok
576
+ lexer.unreadtok ntok
577
+ return
578
+ end
579
+ # read all pre-mantissa
580
+ tok.raw << ntok.raw
581
+ ntok = nntok
582
+ tok.raw << ntok.raw if ntok
583
+ raise tok, 'invalid hex float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9a-f_]*p([0-9][0-9_]*[fl]?)?$/i
584
+ raise tok, 'invalid hex float' if tok.raw.delete('_').downcase[0,4] == '0x.p' # no digits
585
+ ntok = lexer.readtok
586
+ end
587
+
588
+ if not tok.raw.downcase.include? 'p'
589
+ # standard hex
590
+ lexer.unreadtok ntok
591
+ else
592
+ if tok.raw.downcase[-1] == ?p
593
+ # read signed mantissa
594
+ tok.raw << ntok.raw if ntok
595
+ raise tok, 'invalid hex float' if not ntok or ntok.type == :punct or (ntok.raw != '+' and ntok.raw != '-')
596
+ ntok = lexer.readtok
597
+ tok.raw << ntok.raw if ntok
598
+ raise tok, 'invalid hex float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9][0-9_]*[fl]?$/i
599
+ end
600
+
601
+ raise tok, 'internal error' if not tok.raw.delete('_').downcase =~ /^0x([0-9a-f]*)(?:\.([0-9a-f]*))?p([+-]?[0-9]+)[fl]?$/
602
+ b1, b2, b3 = $1.to_i(16), $2, $3.to_i
603
+ b2 = b2.to_i(16) if b2
604
+ tok.value = b1.to_f
605
+ # tok.value += 1/b2.to_f # TODO
606
+ puts "W: unhandled hex float #{tok.raw}" if $VERBOSE and b2 and b2 != 0
607
+ tok.value *= 2**b3
608
+ puts "hex float: #{tok.raw} => #{tok.value}" if $DEBUG
609
+ end
610
+
611
+ when /^([0-9][0-9_]*)(u?l?l?|l?l?u?|e([0-9][0-9_]*[fl]?)?)$/, '.'
612
+ tok.value = $1.to_i if $1
613
+ ntok = lexer.readtok
614
+ if tok.raw == '.' and (not ntok or ntok.type != :string)
615
+ lexer.unreadtok ntok
616
+ return
617
+ end
618
+
619
+ if not tr.include? 'e' and tr != '.' and ntok and ntok.type == :punct and ntok.raw == '.'
620
+ if not nntok = lexer.readtok or nntok.type != :string
621
+ lexer.unreadtok nntok
622
+ lexer.unreadtok ntok
623
+ return
624
+ end
625
+ # read upto '.'
626
+ tok.raw << ntok.raw
627
+ ntok = nntok
628
+ end
629
+
630
+ if not tok.raw.downcase.include? 'e' and tok.raw[-1] == ?.
631
+ # read fractional part
632
+ tok.raw << ntok.raw if ntok
633
+ raise tok, 'bad float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9_]*(e[0-9_]*)?[fl]?$/i
634
+ ntok = lexer.readtok
635
+ end
636
+
637
+ if tok.raw.downcase[-1] == ?e
638
+ # read signed exponent
639
+ tok.raw << ntok.raw if ntok
640
+ raise tok, 'bad float' if not ntok or ntok.type != :punct or (ntok.raw != '+' and ntok.raw != '-')
641
+ ntok = lexer.readtok
642
+ tok.raw << ntok.raw if ntok
643
+ raise tok, 'bad float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9][0-9_]*[fl]?$/i
644
+ ntok = lexer.readtok
645
+ end
646
+
647
+ lexer.unreadtok ntok
648
+
649
+ if tok.raw.delete('_').downcase =~ /^(?:(?:[0-9]+\.[0-9]*|\.[0-9]+)(?:e[+-]?[0-9]+)?|[0-9]+e[+-]?[0-9]+)[fl]?$/i
650
+ tok.value = tok.raw.to_f
651
+ else
652
+ raise tok, 'internal error' if tok.raw =~ /[e.]/i
653
+ end
654
+
655
+ else raise tok, 'invalid numeric constant'
656
+ end
657
+ end
658
+
659
+ # parses an integer/a float, sets its tok.value, consumes&aggregate necessary following tokens (point, mantissa..)
660
+ # handles $/$$ special asm label name
661
+ # XXX for binary, use _ delimiter or 0b prefix, or start with 0 : 1b may conflict with backward local anonymous label reference
662
+ def parse_intfloat(lexer, tok)
663
+ if not tok.value and tok.raw == '$'
664
+ l = lexer.program.cursource.last
665
+ if not l.kind_of? Label
666
+ l = Label.new(lexer.program.new_label('instr_start'))
667
+ l.backtrace = tok.backtrace
668
+ lexer.program.cursource << l
669
+ end
670
+ tok.value = l.name
671
+ elsif not tok.value and tok.raw == '$$'
672
+ l = lexer.program.cursource.first
673
+ if not l.kind_of? Label
674
+ l = Label.new(lexer.program.new_label('section_start'))
675
+ l.backtrace = tok.backtrace
676
+ lexer.program.cursource.unshift l
677
+ end
678
+ tok.value = l.name
679
+ elsif not tok.value and tok.raw == '$_'
680
+ tok.value = lexer.program.locallabels_fwd('endinstr')
681
+ elsif not tok.value and tok.raw =~ /^([1-9][0-9]*)([fb])$/
682
+ case $2
683
+ when 'b'; tok.value = lexer.program.locallabels_bkw($1) # may fallback to binary parser
684
+ when 'f'; tok.value = lexer.program.locallabels_fwd($1)
685
+ end
686
+ end
687
+
688
+ parse_num_value(lexer, tok)
689
+ end
690
+
691
+ # returns the next value from lexer (parenthesised expression, immediate, variable, unary operators)
692
+ def parse_value(lexer)
693
+ nil while tok = lexer.readtok and tok.type == :space
694
+ return if not tok
695
+ case tok.type
696
+ when :string
697
+ # ignores the 'offset' word if followed by a string
698
+ if not tok.value and tok.raw.downcase == 'offset'
699
+ nil while ntok = lexer.readtok and ntok.type == :space
700
+ if ntok.type == :string; tok = ntok
701
+ else lexer.unreadtok ntok
702
+ end
703
+ end
704
+ parse_intfloat(lexer, tok)
705
+ val = tok.value || tok.raw
706
+ when :quoted
707
+ if tok.raw[0] != ?'
708
+ lexer.unreadtok tok
709
+ return
710
+ end
711
+ s = tok.value || tok.raw[1..-2] # raise tok, 'need ppcessing !'
712
+ s = s.reverse if lexer.respond_to? :program and lexer.program and lexer.program.cpu and lexer.program.cpu.endianness == :little
713
+ val = s.unpack('C*').inject(0) { |sum, c| (sum << 8) | c }
714
+ when :punct
715
+ case tok.raw
716
+ when '('
717
+ nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
718
+ lexer.unreadtok ntok
719
+ val = parse(lexer)
720
+ nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
721
+ raise tok, "syntax error, no ) found after #{val.inspect}, got #{ntok.inspect}" if not ntok or ntok.type != :punct or ntok.raw != ')'
722
+ when '!', '+', '-', '~'
723
+ nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
724
+ lexer.unreadtok ntok
725
+ raise tok, 'need expression after unary operator' if not val = parse_value(lexer)
726
+ val = Expression[tok.raw.to_sym, val]
727
+ when '.'
728
+ parse_intfloat(lexer, tok)
729
+ if not tok.value
730
+ lexer.unreadtok tok
731
+ return
732
+ end
733
+ val = tok.value
734
+ else
735
+ lexer.unreadtok tok
736
+ return
737
+ end
738
+ else
739
+ lexer.unreadtok tok
740
+ return
741
+ end
742
+ nil while tok = lexer.readtok and tok.type == :space
743
+ lexer.unreadtok tok
744
+ val
745
+ end
746
+
747
+ # for boolean operators, true is 1 (or anything != 0), false is 0
748
+ def parse(lexer)
749
+ opstack = []
750
+ stack = []
751
+
752
+ return if not e = parse_value(lexer)
753
+
754
+ stack << e
755
+
756
+ while op = readop(lexer)
757
+ nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
758
+ lexer.unreadtok ntok
759
+ until opstack.empty? or OP_PRIO[op.value][opstack.last]
760
+ stack << new(opstack.pop, stack.pop, stack.pop)
761
+ end
762
+
763
+ opstack << op.value
764
+
765
+ raise op, 'need rhs' if not e = parse_value(lexer)
766
+
767
+ stack << e
768
+ end
769
+
770
+ until opstack.empty?
771
+ stack << new(opstack.pop, stack.pop, stack.pop)
772
+ end
773
+
774
+ Expression[stack.first]
775
+ end
776
+
777
+ # parse an expression in a string
778
+ # updates the string to point after the parsed expression
779
+ def parse_string!(str, &b)
780
+ pp = Preprocessor.new(str)
781
+
782
+ e = parse(pp, &b)
783
+
784
+ # update arg
785
+ len = pp.pos
786
+ pp.queue.each { |t| len -= t.raw.length }
787
+ str[0, len] = ''
788
+
789
+ e
790
+ end
791
+
792
+ # parse an expression in a string
793
+ def parse_string(str, &b)
794
+ parse(Preprocessor.new(str), &b)
795
+ end
796
+ end
797
+ end
798
+
799
+ # an Expression whose ::parser handles indirection (byte ptr [foobar])
800
+ class IndExpression < Expression
801
+ class << self
802
+ def parse_value(lexer)
803
+ sz = nil
804
+ ptr = nil
805
+ loop do
806
+ nil while tok = lexer.readtok and tok.type == :space
807
+ return if not tok
808
+ case tok.raw
809
+ when 'qword'; sz=8
810
+ when 'dword'; sz=4
811
+ when 'word'; sz=2
812
+ when 'byte'; sz=1
813
+ when 'ptr'
814
+ when '['
815
+ ptr = parse(lexer)
816
+ nil while tok = lexer.readtok and tok.type == :space
817
+ raise tok || lexer, '] expected' if tok.raw != ']'
818
+ break
819
+ when '*'
820
+ ptr = parse_value(lexer)
821
+ break
822
+ when ':' # symbols, eg ':eax'
823
+ n = lexer.readtok
824
+ return n.raw.to_sym
825
+ else
826
+ lexer.unreadtok tok
827
+ break
828
+ end
829
+ end
830
+ raise lexer, 'invalid indirection' if sz and not ptr
831
+ if ptr; Indirection[ptr, sz] # if sz is nil, default cpu pointersz is set in resolve_expr
832
+ else super(lexer)
833
+ end
834
+ end
835
+
836
+ def parse(*a, &b)
837
+ # custom decimal converter
838
+ @parse_cb = b if b
839
+ e = super(*a)
840
+ @parse_cb = nil if b
841
+ e
842
+ end
843
+
844
+ # callback used to customize the parsing of /^([0-9]+)$/ tokens
845
+ # implicitely set by parse(expr) { cb }
846
+ # allows eg parsing '40000' as 0x40000 when relevant
847
+ attr_accessor :parse_cb
848
+
849
+ def parse_intfloat(lexer, tok)
850
+ case tok.raw
851
+ when /^([0-9]+)$/; tok.value = parse_cb ? @parse_cb[$1] : $1.to_i
852
+ when /^0x([0-9a-f]+)$/i, /^([0-9a-f]+)h$/i; tok.value = $1.to_i(16)
853
+ when /^0b([01]+)$/i; tok.value = $1.to_i(2)
854
+ end
855
+ end
856
+
857
+ def readop(lexer)
858
+ if t0 = lexer.readtok and t0.raw == '-' and t1 = lexer.readtok and t1.raw == '>'
859
+ op = t0.dup
860
+ op.raw << t1.raw
861
+ op.value = op.raw.to_sym
862
+ op
863
+ else
864
+ lexer.unreadtok t1
865
+ lexer.unreadtok t0
866
+ super(lexer)
867
+ end
868
+ end
869
+
870
+ def new(op, r, l)
871
+ return Indirection[[l, :+, r], nil] if op == :'->'
872
+ super(op, r, l)
873
+ end
874
+ end
875
+ end
876
+ end