metasm 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
data/lib/metasm/parse.rb
ADDED
@@ -0,0 +1,876 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/main'
|
8
|
+
require 'metasm/preprocessor'
|
9
|
+
|
10
|
+
module Metasm
|
11
|
+
class Data
|
12
|
+
# keywords for data definition (used to recognize label names)
|
13
|
+
DataSpec = %w[db dw dd dq]
|
14
|
+
end
|
15
|
+
|
16
|
+
class CPU
|
17
|
+
# parses prefix/name/arguments
|
18
|
+
# returns an +Instruction+ or raise a ParseError
|
19
|
+
# if the parameter is a String, a custom AsmPP is built - XXX it will not be able to create labels (eg jmp 1b / jmp $)
|
20
|
+
def parse_instruction(lexer)
|
21
|
+
lexer = new_asmprepro(lexer) if lexer.kind_of? String
|
22
|
+
|
23
|
+
i = Instruction.new self
|
24
|
+
|
25
|
+
# find prefixes, break on opcode name
|
26
|
+
while tok = lexer.readtok and parse_prefix(i, tok.raw)
|
27
|
+
lexer.skip_space_eol
|
28
|
+
end
|
29
|
+
return if not tok
|
30
|
+
|
31
|
+
# allow '.' in opcode name
|
32
|
+
tok = tok.dup
|
33
|
+
while ntok = lexer.nexttok and ntok.type == :punct and ntok.raw == '.'
|
34
|
+
tok.raw << lexer.readtok.raw
|
35
|
+
ntok = lexer.readtok
|
36
|
+
raise tok, 'invalid opcode name' if not ntok or ntok.type != :string
|
37
|
+
tok.raw << ntok.raw
|
38
|
+
end
|
39
|
+
|
40
|
+
raise tok, 'invalid opcode' if not opcode_list_byname[tok.raw]
|
41
|
+
|
42
|
+
i.opname = tok.raw
|
43
|
+
i.backtrace = tok.backtrace
|
44
|
+
lexer.skip_space
|
45
|
+
|
46
|
+
# find arguments list
|
47
|
+
loop do
|
48
|
+
break if not ntok = lexer.nexttok
|
49
|
+
break if i.args.empty? and opcode_list_byname[ntok.raw] and opcode_list_byname[i.opname].find { |op| op.args.empty? }
|
50
|
+
break if not arg = parse_argument(lexer)
|
51
|
+
i.args << arg
|
52
|
+
lexer.skip_space
|
53
|
+
break if not ntok = lexer.nexttok or ntok.type != :punct or ntok.raw != ','
|
54
|
+
lexer.readtok
|
55
|
+
lexer.skip_space_eol
|
56
|
+
end
|
57
|
+
|
58
|
+
if not parse_instruction_checkproto(i)
|
59
|
+
raise tok, "invalid opcode arguments #{i.to_s.inspect}, allowed : #{opcode_list_byname[i.opname].to_a.map { |o| o.args }.inspect}"
|
60
|
+
end
|
61
|
+
parse_instruction_fixup(i)
|
62
|
+
|
63
|
+
i
|
64
|
+
end
|
65
|
+
|
66
|
+
def parse_instruction_checkproto(i)
|
67
|
+
opcode_list_byname[i.opname].to_a.find { |o|
|
68
|
+
o.args.length == i.args.length and o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) }
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
# called after the instruction is fully parsed
|
73
|
+
def parse_instruction_fixup(i)
|
74
|
+
end
|
75
|
+
|
76
|
+
# return false if not a prefix
|
77
|
+
def parse_prefix(i, word)
|
78
|
+
end
|
79
|
+
|
80
|
+
# returns a parsed argument
|
81
|
+
# add your own arguments parser here (registers, memory references..)
|
82
|
+
def parse_argument(lexer)
|
83
|
+
Expression.parse(lexer)
|
84
|
+
end
|
85
|
+
|
86
|
+
# handles .instructions
|
87
|
+
# XXX handle HLA here ?
|
88
|
+
def parse_parser_instruction(lexer, instr)
|
89
|
+
raise instr, 'unknown parser instruction'
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# asm-specific preprocessor
|
94
|
+
# handles asm arguments (; ... eol)
|
95
|
+
# asm macros (name macro args\nbody endm, name equ val)
|
96
|
+
# initializes token.value (reads integers in hex etc)
|
97
|
+
# merges consecutive space/eol
|
98
|
+
class AsmPreprocessor < Preprocessor
|
99
|
+
# an assembler macro, similar to preprocessor macro
|
100
|
+
# handles local labels
|
101
|
+
class Macro
|
102
|
+
attr_accessor :name, :args, :body, :labels
|
103
|
+
|
104
|
+
def initialize(name)
|
105
|
+
@name = name
|
106
|
+
@args, @body, @labels = [], [], []
|
107
|
+
end
|
108
|
+
|
109
|
+
# returns the array of token resulting from the application of the macro
|
110
|
+
# parses arguments if needed, handles macro-local labels
|
111
|
+
def apply(macro, lexer, program)
|
112
|
+
args = Preprocessor::Macro.parse_arglist(lexer).to_a
|
113
|
+
raise @name, 'invalid argument count' if args.length != @args.length
|
114
|
+
|
115
|
+
labels = @labels.inject({}) { |h, l| h.update l => program.new_label(l) }
|
116
|
+
args = @args.zip(args).inject({}) { |h, (fa, a)| h.update fa.raw => a }
|
117
|
+
|
118
|
+
# apply macro
|
119
|
+
@body.map { |t|
|
120
|
+
t = t.dup
|
121
|
+
t.backtrace += macro.backtrace[-2..-1] if not macro.backtrace.empty?
|
122
|
+
if labels[t.raw]
|
123
|
+
t.raw = labels[t.raw]
|
124
|
+
t
|
125
|
+
elsif args[t.raw]
|
126
|
+
# XXX update toks backtrace ?
|
127
|
+
args[t.raw]
|
128
|
+
else
|
129
|
+
t
|
130
|
+
end
|
131
|
+
}.flatten
|
132
|
+
end
|
133
|
+
|
134
|
+
# parses the argument list and the body from lexer
|
135
|
+
# recognize the local labels
|
136
|
+
# XXX add eax,
|
137
|
+
# toto db 42 ; zomg h4x
|
138
|
+
def parse_definition(lexer)
|
139
|
+
lexer.skip_space
|
140
|
+
while tok = lexer.nexttok and tok.type != :eol
|
141
|
+
# no preprocess argument list
|
142
|
+
raise @name, 'invalid arg definition' if not tok = lexer.readtok or tok.type != :string
|
143
|
+
@args << tok
|
144
|
+
lexer.skip_space
|
145
|
+
raise @name, 'invalid arg separator' if not tok = lexer.readtok or ((tok.type != :punct or tok.raw != ',') and tok.type != :eol)
|
146
|
+
break if tok.type == :eol
|
147
|
+
lexer.skip_space
|
148
|
+
end
|
149
|
+
|
150
|
+
lexer.skip_space_eol
|
151
|
+
while tok = lexer.readtok and (tok.type != :string or tok.raw != 'endm')
|
152
|
+
@body << tok
|
153
|
+
if @body[-2] and @body[-2].type == :string and @body[-1].raw == ':' and (not @body[-3] or @body[-3].type == :eol) and @body[-2].raw !~ /^[1-9][0-9]*$/
|
154
|
+
@labels << @body[-2].raw
|
155
|
+
elsif @body[-3] and @body[-3].type == :string and @body[-2].type == :space and Data::DataSpec.include?(@body[-1].raw) and (not @body[-4] or @body[-4].type == :eol)
|
156
|
+
@labels << @body[-3].raw
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# the program (used to create new label names)
|
163
|
+
attr_accessor :program
|
164
|
+
# hash macro name => Macro
|
165
|
+
attr_accessor :macro
|
166
|
+
|
167
|
+
def initialize(text='', program=nil)
|
168
|
+
@program = program
|
169
|
+
@macro = {}
|
170
|
+
super(text)
|
171
|
+
end
|
172
|
+
|
173
|
+
def skip_space_eol
|
174
|
+
readtok while t = nexttok and (t.type == :space or t.type == :eol)
|
175
|
+
end
|
176
|
+
|
177
|
+
def skip_space
|
178
|
+
readtok while t = nexttok and t.type == :space
|
179
|
+
end
|
180
|
+
|
181
|
+
def nexttok
|
182
|
+
t = readtok
|
183
|
+
unreadtok t
|
184
|
+
t
|
185
|
+
end
|
186
|
+
|
187
|
+
# reads a token, handles macros/comments/integers/etc
|
188
|
+
# argument is for internal use
|
189
|
+
def readtok(rec = false)
|
190
|
+
tok = super()
|
191
|
+
|
192
|
+
# handle ; comments
|
193
|
+
if tok and tok.type == :punct and tok.raw == ';'
|
194
|
+
tok.type = :eol
|
195
|
+
begin
|
196
|
+
tok = tok.dup
|
197
|
+
while ntok = super() and ntok.type != :eol
|
198
|
+
tok.raw << ntok.raw
|
199
|
+
end
|
200
|
+
tok.raw << ntok.raw if ntok
|
201
|
+
rescue ParseError
|
202
|
+
# unterminated string
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# aggregate space/eol
|
207
|
+
if tok and (tok.type == :space or tok.type == :eol)
|
208
|
+
if ntok = readtok(true) and ntok.type == :space
|
209
|
+
tok = tok.dup
|
210
|
+
tok.raw << ntok.raw
|
211
|
+
elsif ntok and ntok.type == :eol
|
212
|
+
tok = tok.dup
|
213
|
+
tok.raw << ntok.raw
|
214
|
+
tok.type = :eol
|
215
|
+
else
|
216
|
+
unreadtok ntok
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
# handle macros
|
222
|
+
# the rec parameter is used to avoid reading the whole text at once when reading ahead to check 'macro' keyword
|
223
|
+
if not rec and tok and tok.type == :string
|
224
|
+
if @macro[tok.raw]
|
225
|
+
@macro[tok.raw].apply(tok, self, @program).reverse_each { |t| unreadtok t }
|
226
|
+
tok = readtok
|
227
|
+
|
228
|
+
else
|
229
|
+
if ntok = readtok(true) and ntok.type == :space and nntok = readtok(true) and nntok.type == :string and (nntok.raw == 'macro' or nntok.raw == 'equ')
|
230
|
+
puts "W: asm: redefinition of macro #{tok.raw} at #{tok.backtrace_str}, previous definition at #{@macro[tok.raw].name.backtrace_str}" if @macro[tok.raw]
|
231
|
+
m = Macro.new tok
|
232
|
+
# XXX this allows nested macro definition..
|
233
|
+
if nntok.raw == 'macro'
|
234
|
+
m.parse_definition self
|
235
|
+
else
|
236
|
+
# equ
|
237
|
+
raise nntok if not etok = readtok
|
238
|
+
unreadtok etok
|
239
|
+
raise nntok if not v = Expression.parse(self)
|
240
|
+
etok = etok.dup
|
241
|
+
etok.type = :string
|
242
|
+
etok.value = v
|
243
|
+
etok.raw = v.to_s
|
244
|
+
m.body << etok
|
245
|
+
end
|
246
|
+
@macro[tok.raw] = m
|
247
|
+
tok = readtok
|
248
|
+
else
|
249
|
+
unreadtok nntok
|
250
|
+
unreadtok ntok
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
tok
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
class ExeFormat
|
260
|
+
# setup self.cursource here
|
261
|
+
def parse_init
|
262
|
+
@locallabels_bkw ||= {}
|
263
|
+
@locallabels_fwd ||= {}
|
264
|
+
end
|
265
|
+
|
266
|
+
# hash mapping local anonymous label number => unique name
|
267
|
+
# defined only while parsing
|
268
|
+
# usage:
|
269
|
+
# jmp 1f
|
270
|
+
# 1:
|
271
|
+
# jmp 1f
|
272
|
+
# jmp 1b
|
273
|
+
# 1:
|
274
|
+
# defined in #parse, replaced in use by Expression#parse
|
275
|
+
# no macro-scope (macro are gsub-like, and no special handling for those labels is done)
|
276
|
+
def locallabels_bkw(id)
|
277
|
+
@locallabels_bkw[id]
|
278
|
+
end
|
279
|
+
def locallabels_fwd(id)
|
280
|
+
@locallabels_fwd[id] ||= new_label("local_#{id}")
|
281
|
+
end
|
282
|
+
|
283
|
+
# parses an asm source file to an array of Instruction/Data/Align/Offset/Padding
|
284
|
+
def parse(text, file='<ruby>', lineno=0)
|
285
|
+
parse_init
|
286
|
+
@lexer ||= cpu.new_asmprepro('', self)
|
287
|
+
@lexer.feed text, file, lineno
|
288
|
+
lasteol = true
|
289
|
+
|
290
|
+
while not @lexer.eos?
|
291
|
+
tok = @lexer.readtok
|
292
|
+
next if not tok
|
293
|
+
case tok.type
|
294
|
+
when :space
|
295
|
+
when :eol
|
296
|
+
lasteol = true
|
297
|
+
when :punct
|
298
|
+
case tok.raw
|
299
|
+
when '.'
|
300
|
+
tok = tok.dup
|
301
|
+
while ntok = @lexer.nexttok and ((ntok.type == :string) or (ntok.type == :punct and ntok.raw == '.'))
|
302
|
+
tok.raw << @lexer.readtok.raw
|
303
|
+
end
|
304
|
+
parse_parser_instruction tok
|
305
|
+
else raise tok, 'syntax error'
|
306
|
+
end
|
307
|
+
lasteol = false
|
308
|
+
when :string
|
309
|
+
ntok = nntok = nil
|
310
|
+
if lasteol and ((ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ':') or
|
311
|
+
(ntok and ntok.type == :space and nntok = @lexer.nexttok and nntok.type == :string and Data::DataSpec.include?(nntok.raw)))
|
312
|
+
if tok.raw =~ /^[1-9][0-9]*$/
|
313
|
+
# handle anonymous local labels
|
314
|
+
lname = @locallabels_bkw[tok.raw] = @locallabels_fwd.delete(tok.raw) || new_label('local_'+tok.raw)
|
315
|
+
else
|
316
|
+
lname = tok.raw
|
317
|
+
raise tok, "label redefinition" if new_label(lname) != lname
|
318
|
+
end
|
319
|
+
l = Label.new(lname)
|
320
|
+
l.backtrace = tok.backtrace
|
321
|
+
@cursource << l
|
322
|
+
lasteol = false
|
323
|
+
else
|
324
|
+
lasteol = false
|
325
|
+
@lexer.unreadtok ntok
|
326
|
+
@lexer.unreadtok tok
|
327
|
+
if Data::DataSpec.include?(tok.raw)
|
328
|
+
@cursource << parse_data
|
329
|
+
else
|
330
|
+
@cursource << @cpu.parse_instruction(@lexer)
|
331
|
+
end
|
332
|
+
if lname = @locallabels_fwd.delete('endinstr')
|
333
|
+
l = Label.new(lname)
|
334
|
+
l.backtrace = tok.backtrace
|
335
|
+
@cursource << l
|
336
|
+
end
|
337
|
+
end
|
338
|
+
else
|
339
|
+
raise tok, 'syntax error'
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
puts "Undefined forward reference to anonymous labels #{@locallabels_fwd.keys.inspect}" if $VERBOSE and not @locallabels_fwd.empty?
|
344
|
+
|
345
|
+
self
|
346
|
+
end
|
347
|
+
|
348
|
+
# create a new label from base, parse it (incl optionnal additionnal src)
|
349
|
+
# returns the new label name
|
350
|
+
def parse_new_label(base='', src=nil)
|
351
|
+
parse_init
|
352
|
+
label = new_label(base)
|
353
|
+
@cursource << Label.new(label)
|
354
|
+
parse src
|
355
|
+
label
|
356
|
+
end
|
357
|
+
|
358
|
+
# handles special directives (alignment, changing section, ...)
|
359
|
+
# special directives start with a dot
|
360
|
+
def parse_parser_instruction(tok)
|
361
|
+
case tok.raw.downcase
|
362
|
+
when '.align'
|
363
|
+
e = Expression.parse(@lexer).reduce
|
364
|
+
raise self, 'need immediate alignment size' unless e.kind_of? ::Integer
|
365
|
+
@lexer.skip_space
|
366
|
+
if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
|
367
|
+
@lexer.skip_space_eol
|
368
|
+
# allow single byte value or full data statement
|
369
|
+
if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
|
370
|
+
@lexer.unreadtok ntok
|
371
|
+
type = 'db'
|
372
|
+
else
|
373
|
+
type = ntok.raw
|
374
|
+
end
|
375
|
+
fillwith = parse_data_data type
|
376
|
+
else
|
377
|
+
@lexer.unreadtok ntok
|
378
|
+
end
|
379
|
+
raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
|
380
|
+
@cursource << Align.new(e, fillwith, tok.backtrace)
|
381
|
+
|
382
|
+
when '.pad'
|
383
|
+
@lexer.skip_space
|
384
|
+
if ntok = @lexer.readtok and ntok.type != :eol
|
385
|
+
# allow single byte value or full data statement
|
386
|
+
if not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
|
387
|
+
@lexer.unreadtok ntok
|
388
|
+
type = 'db'
|
389
|
+
else
|
390
|
+
type = ntok.raw
|
391
|
+
end
|
392
|
+
fillwith = parse_data_data(type)
|
393
|
+
else
|
394
|
+
@lexer.unreadtok ntok
|
395
|
+
end
|
396
|
+
raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
|
397
|
+
@cursource << Padding.new(fillwith, tok.backtrace)
|
398
|
+
|
399
|
+
when '.offset'
|
400
|
+
e = Expression.parse(@lexer)
|
401
|
+
raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
|
402
|
+
@cursource << Offset.new(e, tok.backtrace)
|
403
|
+
|
404
|
+
when '.padto'
|
405
|
+
e = Expression.parse(@lexer)
|
406
|
+
@lexer.skip_space
|
407
|
+
if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
|
408
|
+
@lexer.skip_space
|
409
|
+
# allow single byte value or full data statement
|
410
|
+
if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
|
411
|
+
@lexer.unreadtok ntok
|
412
|
+
type = 'db'
|
413
|
+
else
|
414
|
+
type = ntok.raw
|
415
|
+
end
|
416
|
+
fillwith = parse_data_data type
|
417
|
+
else
|
418
|
+
@lexer.unreadtok ntok
|
419
|
+
end
|
420
|
+
raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
|
421
|
+
@cursource << Padding.new(fillwith, tok.backtrace) << Offset.new(e, tok.backtrace)
|
422
|
+
|
423
|
+
else
|
424
|
+
@cpu.parse_parser_instruction(self, tok)
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
def parse_data
|
429
|
+
raise ParseError, 'internal error' if not tok = @lexer.readtok
|
430
|
+
raise tok, 'invalid data type' if tok.type != :string or not Data::DataSpec.include?(tok.raw)
|
431
|
+
type = tok.raw
|
432
|
+
@lexer.skip_space_eol
|
433
|
+
arr = []
|
434
|
+
loop do
|
435
|
+
arr << parse_data_data(type)
|
436
|
+
@lexer.skip_space
|
437
|
+
if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
|
438
|
+
@lexer.skip_space_eol
|
439
|
+
else
|
440
|
+
@lexer.unreadtok ntok
|
441
|
+
break
|
442
|
+
end
|
443
|
+
end
|
444
|
+
Data.new(type, arr, 1, tok.backtrace)
|
445
|
+
end
|
446
|
+
|
447
|
+
def parse_data_data(type)
|
448
|
+
raise ParseError, 'need data content' if not tok = @lexer.readtok
|
449
|
+
if tok.type == :punct and tok.raw == '?'
|
450
|
+
Data.new type, :uninitialized, 1, tok.backtrace
|
451
|
+
elsif tok.type == :quoted
|
452
|
+
Data.new type, tok.value, 1, tok.backtrace
|
453
|
+
else
|
454
|
+
@lexer.unreadtok tok
|
455
|
+
raise tok, 'invalid data' if not i = Expression.parse(@lexer)
|
456
|
+
@lexer.skip_space
|
457
|
+
if ntok = @lexer.readtok and ntok.type == :string and ntok.raw.downcase == 'dup'
|
458
|
+
raise ntok, 'need immediate count expression' unless (count = i.reduce).kind_of? ::Integer
|
459
|
+
@lexer.skip_space
|
460
|
+
raise ntok, 'syntax error, ( expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != '('
|
461
|
+
content = []
|
462
|
+
loop do
|
463
|
+
content << parse_data_data(type)
|
464
|
+
@lexer.skip_space
|
465
|
+
if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
|
466
|
+
@lexer.skip_space_eol
|
467
|
+
else
|
468
|
+
@lexer.unreadtok ntok
|
469
|
+
break
|
470
|
+
end
|
471
|
+
end
|
472
|
+
raise ntok, 'syntax error, ) expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != ')'
|
473
|
+
Data.new type, content, count, tok.backtrace
|
474
|
+
else
|
475
|
+
@lexer.unreadtok ntok
|
476
|
+
Data.new type, i, 1, tok.backtrace
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|
481
|
+
|
482
|
+
class Expression
|
483
|
+
# key = operator, value = hash regrouping operators of lower precedence
|
484
|
+
OP_PRIO = [[:'||'], [:'&&'], [:|], [:^], [:&], [:'==', :'!='],
|
485
|
+
[:'<', :'>', :'<=', :'>='], [:<<, :>>], [:+, :-], [:*, :/, :%]
|
486
|
+
].inject({}) { |h, oplist|
|
487
|
+
lessprio = h.keys.inject({}) { |hh, op| hh.update op => true }
|
488
|
+
oplist.each { |op| h[op] = lessprio }
|
489
|
+
h }
|
490
|
+
|
491
|
+
|
492
|
+
class << self
|
493
|
+
# reads an operator from the lexer, returns the corresponding symbol or nil
|
494
|
+
def readop(lexer)
|
495
|
+
if not tok = lexer.readtok or tok.type != :punct
|
496
|
+
lexer.unreadtok tok
|
497
|
+
return
|
498
|
+
end
|
499
|
+
|
500
|
+
if tok.value
|
501
|
+
if OP_PRIO[tok.value]
|
502
|
+
return tok
|
503
|
+
else
|
504
|
+
lexer.unreadtok tok
|
505
|
+
return
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
op = tok
|
510
|
+
case op.raw
|
511
|
+
# may be followed by itself or '='
|
512
|
+
when '>', '<'
|
513
|
+
if ntok = lexer.readtok and ntok.type == :punct and (ntok.raw == op.raw or ntok.raw == '=')
|
514
|
+
op = op.dup
|
515
|
+
op.raw << ntok.raw
|
516
|
+
else
|
517
|
+
lexer.unreadtok ntok
|
518
|
+
end
|
519
|
+
# may be followed by itself
|
520
|
+
when '|', '&'
|
521
|
+
if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == op.raw
|
522
|
+
op = op.dup
|
523
|
+
op.raw << ntok.raw
|
524
|
+
else
|
525
|
+
lexer.unreadtok ntok
|
526
|
+
end
|
527
|
+
# must be followed by '='
|
528
|
+
when '!', '='
|
529
|
+
if not ntok = lexer.readtok or ntok.type != :punct and ntok.raw != '='
|
530
|
+
lexer.unreadtok ntok
|
531
|
+
lexer.unreadtok tok
|
532
|
+
return
|
533
|
+
end
|
534
|
+
op = op.dup
|
535
|
+
op.raw << ntok.raw
|
536
|
+
# ok
|
537
|
+
when '^', '+', '-', '*', '/', '%'
|
538
|
+
# unknown
|
539
|
+
else
|
540
|
+
lexer.unreadtok tok
|
541
|
+
return
|
542
|
+
end
|
543
|
+
op.value = op.raw.to_sym
|
544
|
+
op
|
545
|
+
end
|
546
|
+
|
547
|
+
# parses floats/hex into tok.value, returns nothing
|
548
|
+
# does not parse unary operators (-/+/~)
|
549
|
+
def parse_num_value(lexer, tok)
|
550
|
+
if not tok.value and tok.raw =~ /^[a-f][0-9a-f]*h$/i
|
551
|
+
# warn on variable name like ffffh
|
552
|
+
puts "W: Parser: you may want to add a leading 0 to #{tok.raw.inspect} at #{tok.backtrace[-2]}:#{tok.backtrace[-1]}" if $VERBOSE
|
553
|
+
end
|
554
|
+
|
555
|
+
return if tok.value
|
556
|
+
return if tok.raw[0] != ?. and !(?0..?9).include? tok.raw[0]
|
557
|
+
|
558
|
+
case tr = tok.raw.downcase
|
559
|
+
when /^0b([01][01_]*)$/, /^([01][01_]*)b$/
|
560
|
+
tok.value = $1.to_i(2)
|
561
|
+
|
562
|
+
when /^(0[0-7][0-7_]*)$/
|
563
|
+
tok.value = $1.to_i(8)
|
564
|
+
|
565
|
+
when /^([0-9][a-f0-9_]*)h$/
|
566
|
+
tok.value = $1.to_i(16)
|
567
|
+
|
568
|
+
when /^0x([a-f0-9][a-f0-9_]*)(u?l?l?|l?l?u?|p([0-9][0-9_]*[fl]?)?)$/, '0x'
|
569
|
+
tok.value = $1.to_i(16) if $1
|
570
|
+
ntok = lexer.readtok
|
571
|
+
|
572
|
+
# check for C99 hex float
|
573
|
+
if not tr.include? 'p' and ntok and ntok.type == :punct and ntok.raw == '.'
|
574
|
+
if not nntok = lexer.readtok or nntok.type != :string
|
575
|
+
lexer.unreadtok nntok
|
576
|
+
lexer.unreadtok ntok
|
577
|
+
return
|
578
|
+
end
|
579
|
+
# read all pre-mantissa
|
580
|
+
tok.raw << ntok.raw
|
581
|
+
ntok = nntok
|
582
|
+
tok.raw << ntok.raw if ntok
|
583
|
+
raise tok, 'invalid hex float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9a-f_]*p([0-9][0-9_]*[fl]?)?$/i
|
584
|
+
raise tok, 'invalid hex float' if tok.raw.delete('_').downcase[0,4] == '0x.p' # no digits
|
585
|
+
ntok = lexer.readtok
|
586
|
+
end
|
587
|
+
|
588
|
+
if not tok.raw.downcase.include? 'p'
|
589
|
+
# standard hex
|
590
|
+
lexer.unreadtok ntok
|
591
|
+
else
|
592
|
+
if tok.raw.downcase[-1] == ?p
|
593
|
+
# read signed mantissa
|
594
|
+
tok.raw << ntok.raw if ntok
|
595
|
+
raise tok, 'invalid hex float' if not ntok or ntok.type == :punct or (ntok.raw != '+' and ntok.raw != '-')
|
596
|
+
ntok = lexer.readtok
|
597
|
+
tok.raw << ntok.raw if ntok
|
598
|
+
raise tok, 'invalid hex float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9][0-9_]*[fl]?$/i
|
599
|
+
end
|
600
|
+
|
601
|
+
raise tok, 'internal error' if not tok.raw.delete('_').downcase =~ /^0x([0-9a-f]*)(?:\.([0-9a-f]*))?p([+-]?[0-9]+)[fl]?$/
|
602
|
+
b1, b2, b3 = $1.to_i(16), $2, $3.to_i
|
603
|
+
b2 = b2.to_i(16) if b2
|
604
|
+
tok.value = b1.to_f
|
605
|
+
# tok.value += 1/b2.to_f # TODO
|
606
|
+
puts "W: unhandled hex float #{tok.raw}" if $VERBOSE and b2 and b2 != 0
|
607
|
+
tok.value *= 2**b3
|
608
|
+
puts "hex float: #{tok.raw} => #{tok.value}" if $DEBUG
|
609
|
+
end
|
610
|
+
|
611
|
+
when /^([0-9][0-9_]*)(u?l?l?|l?l?u?|e([0-9][0-9_]*[fl]?)?)$/, '.'
|
612
|
+
tok.value = $1.to_i if $1
|
613
|
+
ntok = lexer.readtok
|
614
|
+
if tok.raw == '.' and (not ntok or ntok.type != :string)
|
615
|
+
lexer.unreadtok ntok
|
616
|
+
return
|
617
|
+
end
|
618
|
+
|
619
|
+
if not tr.include? 'e' and tr != '.' and ntok and ntok.type == :punct and ntok.raw == '.'
|
620
|
+
if not nntok = lexer.readtok or nntok.type != :string
|
621
|
+
lexer.unreadtok nntok
|
622
|
+
lexer.unreadtok ntok
|
623
|
+
return
|
624
|
+
end
|
625
|
+
# read upto '.'
|
626
|
+
tok.raw << ntok.raw
|
627
|
+
ntok = nntok
|
628
|
+
end
|
629
|
+
|
630
|
+
if not tok.raw.downcase.include? 'e' and tok.raw[-1] == ?.
|
631
|
+
# read fractional part
|
632
|
+
tok.raw << ntok.raw if ntok
|
633
|
+
raise tok, 'bad float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9_]*(e[0-9_]*)?[fl]?$/i
|
634
|
+
ntok = lexer.readtok
|
635
|
+
end
|
636
|
+
|
637
|
+
if tok.raw.downcase[-1] == ?e
|
638
|
+
# read signed exponent
|
639
|
+
tok.raw << ntok.raw if ntok
|
640
|
+
raise tok, 'bad float' if not ntok or ntok.type != :punct or (ntok.raw != '+' and ntok.raw != '-')
|
641
|
+
ntok = lexer.readtok
|
642
|
+
tok.raw << ntok.raw if ntok
|
643
|
+
raise tok, 'bad float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9][0-9_]*[fl]?$/i
|
644
|
+
ntok = lexer.readtok
|
645
|
+
end
|
646
|
+
|
647
|
+
lexer.unreadtok ntok
|
648
|
+
|
649
|
+
if tok.raw.delete('_').downcase =~ /^(?:(?:[0-9]+\.[0-9]*|\.[0-9]+)(?:e[+-]?[0-9]+)?|[0-9]+e[+-]?[0-9]+)[fl]?$/i
|
650
|
+
tok.value = tok.raw.to_f
|
651
|
+
else
|
652
|
+
raise tok, 'internal error' if tok.raw =~ /[e.]/i
|
653
|
+
end
|
654
|
+
|
655
|
+
else raise tok, 'invalid numeric constant'
|
656
|
+
end
|
657
|
+
end
|
658
|
+
|
659
|
+
# parses an integer/a float, sets its tok.value, consumes&aggregate necessary following tokens (point, mantissa..)
|
660
|
+
# handles $/$$ special asm label name
|
661
|
+
# XXX for binary, use _ delimiter or 0b prefix, or start with 0 : 1b may conflict with backward local anonymous label reference
|
662
|
+
def parse_intfloat(lexer, tok)
|
663
|
+
if not tok.value and tok.raw == '$'
|
664
|
+
l = lexer.program.cursource.last
|
665
|
+
if not l.kind_of? Label
|
666
|
+
l = Label.new(lexer.program.new_label('instr_start'))
|
667
|
+
l.backtrace = tok.backtrace
|
668
|
+
lexer.program.cursource << l
|
669
|
+
end
|
670
|
+
tok.value = l.name
|
671
|
+
elsif not tok.value and tok.raw == '$$'
|
672
|
+
l = lexer.program.cursource.first
|
673
|
+
if not l.kind_of? Label
|
674
|
+
l = Label.new(lexer.program.new_label('section_start'))
|
675
|
+
l.backtrace = tok.backtrace
|
676
|
+
lexer.program.cursource.unshift l
|
677
|
+
end
|
678
|
+
tok.value = l.name
|
679
|
+
elsif not tok.value and tok.raw == '$_'
|
680
|
+
tok.value = lexer.program.locallabels_fwd('endinstr')
|
681
|
+
elsif not tok.value and tok.raw =~ /^([1-9][0-9]*)([fb])$/
|
682
|
+
case $2
|
683
|
+
when 'b'; tok.value = lexer.program.locallabels_bkw($1) # may fallback to binary parser
|
684
|
+
when 'f'; tok.value = lexer.program.locallabels_fwd($1)
|
685
|
+
end
|
686
|
+
end
|
687
|
+
|
688
|
+
parse_num_value(lexer, tok)
|
689
|
+
end
|
690
|
+
|
691
|
+
# returns the next value from lexer (parenthesised expression, immediate, variable, unary operators)
|
692
|
+
def parse_value(lexer)
|
693
|
+
nil while tok = lexer.readtok and tok.type == :space
|
694
|
+
return if not tok
|
695
|
+
case tok.type
|
696
|
+
when :string
|
697
|
+
# ignores the 'offset' word if followed by a string
|
698
|
+
if not tok.value and tok.raw.downcase == 'offset'
|
699
|
+
nil while ntok = lexer.readtok and ntok.type == :space
|
700
|
+
if ntok.type == :string; tok = ntok
|
701
|
+
else lexer.unreadtok ntok
|
702
|
+
end
|
703
|
+
end
|
704
|
+
parse_intfloat(lexer, tok)
|
705
|
+
val = tok.value || tok.raw
|
706
|
+
when :quoted
|
707
|
+
if tok.raw[0] != ?'
|
708
|
+
lexer.unreadtok tok
|
709
|
+
return
|
710
|
+
end
|
711
|
+
s = tok.value || tok.raw[1..-2] # raise tok, 'need ppcessing !'
|
712
|
+
s = s.reverse if lexer.respond_to? :program and lexer.program and lexer.program.cpu and lexer.program.cpu.endianness == :little
|
713
|
+
val = s.unpack('C*').inject(0) { |sum, c| (sum << 8) | c }
|
714
|
+
when :punct
|
715
|
+
case tok.raw
|
716
|
+
when '('
|
717
|
+
nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
|
718
|
+
lexer.unreadtok ntok
|
719
|
+
val = parse(lexer)
|
720
|
+
nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
|
721
|
+
raise tok, "syntax error, no ) found after #{val.inspect}, got #{ntok.inspect}" if not ntok or ntok.type != :punct or ntok.raw != ')'
|
722
|
+
when '!', '+', '-', '~'
|
723
|
+
nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
|
724
|
+
lexer.unreadtok ntok
|
725
|
+
raise tok, 'need expression after unary operator' if not val = parse_value(lexer)
|
726
|
+
val = Expression[tok.raw.to_sym, val]
|
727
|
+
when '.'
|
728
|
+
parse_intfloat(lexer, tok)
|
729
|
+
if not tok.value
|
730
|
+
lexer.unreadtok tok
|
731
|
+
return
|
732
|
+
end
|
733
|
+
val = tok.value
|
734
|
+
else
|
735
|
+
lexer.unreadtok tok
|
736
|
+
return
|
737
|
+
end
|
738
|
+
else
|
739
|
+
lexer.unreadtok tok
|
740
|
+
return
|
741
|
+
end
|
742
|
+
nil while tok = lexer.readtok and tok.type == :space
|
743
|
+
lexer.unreadtok tok
|
744
|
+
val
|
745
|
+
end
|
746
|
+
|
747
|
+
# for boolean operators, true is 1 (or anything != 0), false is 0
|
748
|
+
def parse(lexer)
|
749
|
+
opstack = []
|
750
|
+
stack = []
|
751
|
+
|
752
|
+
return if not e = parse_value(lexer)
|
753
|
+
|
754
|
+
stack << e
|
755
|
+
|
756
|
+
while op = readop(lexer)
|
757
|
+
nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
|
758
|
+
lexer.unreadtok ntok
|
759
|
+
until opstack.empty? or OP_PRIO[op.value][opstack.last]
|
760
|
+
stack << new(opstack.pop, stack.pop, stack.pop)
|
761
|
+
end
|
762
|
+
|
763
|
+
opstack << op.value
|
764
|
+
|
765
|
+
raise op, 'need rhs' if not e = parse_value(lexer)
|
766
|
+
|
767
|
+
stack << e
|
768
|
+
end
|
769
|
+
|
770
|
+
until opstack.empty?
|
771
|
+
stack << new(opstack.pop, stack.pop, stack.pop)
|
772
|
+
end
|
773
|
+
|
774
|
+
Expression[stack.first]
|
775
|
+
end
|
776
|
+
|
777
|
+
# parse an expression in a string
|
778
|
+
# updates the string to point after the parsed expression
|
779
|
+
def parse_string!(str, &b)
|
780
|
+
pp = Preprocessor.new(str)
|
781
|
+
|
782
|
+
e = parse(pp, &b)
|
783
|
+
|
784
|
+
# update arg
|
785
|
+
len = pp.pos
|
786
|
+
pp.queue.each { |t| len -= t.raw.length }
|
787
|
+
str[0, len] = ''
|
788
|
+
|
789
|
+
e
|
790
|
+
end
|
791
|
+
|
792
|
+
# parse an expression in a string
|
793
|
+
def parse_string(str, &b)
|
794
|
+
parse(Preprocessor.new(str), &b)
|
795
|
+
end
|
796
|
+
end
|
797
|
+
end
|
798
|
+
|
799
|
+
# an Expression whose ::parser handles indirection (byte ptr [foobar])
|
800
|
+
class IndExpression < Expression
|
801
|
+
class << self
|
802
|
+
def parse_value(lexer)
|
803
|
+
sz = nil
|
804
|
+
ptr = nil
|
805
|
+
loop do
|
806
|
+
nil while tok = lexer.readtok and tok.type == :space
|
807
|
+
return if not tok
|
808
|
+
case tok.raw
|
809
|
+
when 'qword'; sz=8
|
810
|
+
when 'dword'; sz=4
|
811
|
+
when 'word'; sz=2
|
812
|
+
when 'byte'; sz=1
|
813
|
+
when 'ptr'
|
814
|
+
when '['
|
815
|
+
ptr = parse(lexer)
|
816
|
+
nil while tok = lexer.readtok and tok.type == :space
|
817
|
+
raise tok || lexer, '] expected' if tok.raw != ']'
|
818
|
+
break
|
819
|
+
when '*'
|
820
|
+
ptr = parse_value(lexer)
|
821
|
+
break
|
822
|
+
when ':' # symbols, eg ':eax'
|
823
|
+
n = lexer.readtok
|
824
|
+
return n.raw.to_sym
|
825
|
+
else
|
826
|
+
lexer.unreadtok tok
|
827
|
+
break
|
828
|
+
end
|
829
|
+
end
|
830
|
+
raise lexer, 'invalid indirection' if sz and not ptr
|
831
|
+
if ptr; Indirection[ptr, sz] # if sz is nil, default cpu pointersz is set in resolve_expr
|
832
|
+
else super(lexer)
|
833
|
+
end
|
834
|
+
end
|
835
|
+
|
836
|
+
def parse(*a, &b)
|
837
|
+
# custom decimal converter
|
838
|
+
@parse_cb = b if b
|
839
|
+
e = super(*a)
|
840
|
+
@parse_cb = nil if b
|
841
|
+
e
|
842
|
+
end
|
843
|
+
|
844
|
+
# callback used to customize the parsing of /^([0-9]+)$/ tokens
|
845
|
+
# implicitely set by parse(expr) { cb }
|
846
|
+
# allows eg parsing '40000' as 0x40000 when relevant
|
847
|
+
attr_accessor :parse_cb
|
848
|
+
|
849
|
+
def parse_intfloat(lexer, tok)
|
850
|
+
case tok.raw
|
851
|
+
when /^([0-9]+)$/; tok.value = parse_cb ? @parse_cb[$1] : $1.to_i
|
852
|
+
when /^0x([0-9a-f]+)$/i, /^([0-9a-f]+)h$/i; tok.value = $1.to_i(16)
|
853
|
+
when /^0b([01]+)$/i; tok.value = $1.to_i(2)
|
854
|
+
end
|
855
|
+
end
|
856
|
+
|
857
|
+
def readop(lexer)
|
858
|
+
if t0 = lexer.readtok and t0.raw == '-' and t1 = lexer.readtok and t1.raw == '>'
|
859
|
+
op = t0.dup
|
860
|
+
op.raw << t1.raw
|
861
|
+
op.value = op.raw.to_sym
|
862
|
+
op
|
863
|
+
else
|
864
|
+
lexer.unreadtok t1
|
865
|
+
lexer.unreadtok t0
|
866
|
+
super(lexer)
|
867
|
+
end
|
868
|
+
end
|
869
|
+
|
870
|
+
def new(op, r, l)
|
871
|
+
return Indirection[[l, :+, r], nil] if op == :'->'
|
872
|
+
super(op, r, l)
|
873
|
+
end
|
874
|
+
end
|
875
|
+
end
|
876
|
+
end
|