metasm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
data/lib/metasm/parse.rb
ADDED
|
@@ -0,0 +1,876 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
require 'metasm/main'
|
|
8
|
+
require 'metasm/preprocessor'
|
|
9
|
+
|
|
10
|
+
module Metasm
|
|
11
|
+
class Data
|
|
12
|
+
# keywords for data definition (used to recognize label names)
|
|
13
|
+
DataSpec = %w[db dw dd dq]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class CPU
|
|
17
|
+
# parses prefix/name/arguments
|
|
18
|
+
# returns an +Instruction+ or raise a ParseError
|
|
19
|
+
# if the parameter is a String, a custom AsmPP is built - XXX it will not be able to create labels (eg jmp 1b / jmp $)
|
|
20
|
+
def parse_instruction(lexer)
|
|
21
|
+
lexer = new_asmprepro(lexer) if lexer.kind_of? String
|
|
22
|
+
|
|
23
|
+
i = Instruction.new self
|
|
24
|
+
|
|
25
|
+
# find prefixes, break on opcode name
|
|
26
|
+
while tok = lexer.readtok and parse_prefix(i, tok.raw)
|
|
27
|
+
lexer.skip_space_eol
|
|
28
|
+
end
|
|
29
|
+
return if not tok
|
|
30
|
+
|
|
31
|
+
# allow '.' in opcode name
|
|
32
|
+
tok = tok.dup
|
|
33
|
+
while ntok = lexer.nexttok and ntok.type == :punct and ntok.raw == '.'
|
|
34
|
+
tok.raw << lexer.readtok.raw
|
|
35
|
+
ntok = lexer.readtok
|
|
36
|
+
raise tok, 'invalid opcode name' if not ntok or ntok.type != :string
|
|
37
|
+
tok.raw << ntok.raw
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
raise tok, 'invalid opcode' if not opcode_list_byname[tok.raw]
|
|
41
|
+
|
|
42
|
+
i.opname = tok.raw
|
|
43
|
+
i.backtrace = tok.backtrace
|
|
44
|
+
lexer.skip_space
|
|
45
|
+
|
|
46
|
+
# find arguments list
|
|
47
|
+
loop do
|
|
48
|
+
break if not ntok = lexer.nexttok
|
|
49
|
+
break if i.args.empty? and opcode_list_byname[ntok.raw] and opcode_list_byname[i.opname].find { |op| op.args.empty? }
|
|
50
|
+
break if not arg = parse_argument(lexer)
|
|
51
|
+
i.args << arg
|
|
52
|
+
lexer.skip_space
|
|
53
|
+
break if not ntok = lexer.nexttok or ntok.type != :punct or ntok.raw != ','
|
|
54
|
+
lexer.readtok
|
|
55
|
+
lexer.skip_space_eol
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
if not parse_instruction_checkproto(i)
|
|
59
|
+
raise tok, "invalid opcode arguments #{i.to_s.inspect}, allowed : #{opcode_list_byname[i.opname].to_a.map { |o| o.args }.inspect}"
|
|
60
|
+
end
|
|
61
|
+
parse_instruction_fixup(i)
|
|
62
|
+
|
|
63
|
+
i
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def parse_instruction_checkproto(i)
|
|
67
|
+
opcode_list_byname[i.opname].to_a.find { |o|
|
|
68
|
+
o.args.length == i.args.length and o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) }
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# called after the instruction is fully parsed
|
|
73
|
+
def parse_instruction_fixup(i)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# return false if not a prefix
|
|
77
|
+
def parse_prefix(i, word)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# returns a parsed argument
|
|
81
|
+
# add your own arguments parser here (registers, memory references..)
|
|
82
|
+
def parse_argument(lexer)
|
|
83
|
+
Expression.parse(lexer)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# handles .instructions
|
|
87
|
+
# XXX handle HLA here ?
|
|
88
|
+
def parse_parser_instruction(lexer, instr)
|
|
89
|
+
raise instr, 'unknown parser instruction'
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# asm-specific preprocessor
|
|
94
|
+
# handles asm arguments (; ... eol)
|
|
95
|
+
# asm macros (name macro args\nbody endm, name equ val)
|
|
96
|
+
# initializes token.value (reads integers in hex etc)
|
|
97
|
+
# merges consecutive space/eol
|
|
98
|
+
class AsmPreprocessor < Preprocessor
|
|
99
|
+
# an assembler macro, similar to preprocessor macro
|
|
100
|
+
# handles local labels
|
|
101
|
+
class Macro
|
|
102
|
+
attr_accessor :name, :args, :body, :labels
|
|
103
|
+
|
|
104
|
+
def initialize(name)
|
|
105
|
+
@name = name
|
|
106
|
+
@args, @body, @labels = [], [], []
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# returns the array of token resulting from the application of the macro
|
|
110
|
+
# parses arguments if needed, handles macro-local labels
|
|
111
|
+
def apply(macro, lexer, program)
|
|
112
|
+
args = Preprocessor::Macro.parse_arglist(lexer).to_a
|
|
113
|
+
raise @name, 'invalid argument count' if args.length != @args.length
|
|
114
|
+
|
|
115
|
+
labels = @labels.inject({}) { |h, l| h.update l => program.new_label(l) }
|
|
116
|
+
args = @args.zip(args).inject({}) { |h, (fa, a)| h.update fa.raw => a }
|
|
117
|
+
|
|
118
|
+
# apply macro
|
|
119
|
+
@body.map { |t|
|
|
120
|
+
t = t.dup
|
|
121
|
+
t.backtrace += macro.backtrace[-2..-1] if not macro.backtrace.empty?
|
|
122
|
+
if labels[t.raw]
|
|
123
|
+
t.raw = labels[t.raw]
|
|
124
|
+
t
|
|
125
|
+
elsif args[t.raw]
|
|
126
|
+
# XXX update toks backtrace ?
|
|
127
|
+
args[t.raw]
|
|
128
|
+
else
|
|
129
|
+
t
|
|
130
|
+
end
|
|
131
|
+
}.flatten
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# parses the argument list and the body from lexer
|
|
135
|
+
# recognize the local labels
|
|
136
|
+
# XXX add eax,
|
|
137
|
+
# toto db 42 ; zomg h4x
|
|
138
|
+
def parse_definition(lexer)
|
|
139
|
+
lexer.skip_space
|
|
140
|
+
while tok = lexer.nexttok and tok.type != :eol
|
|
141
|
+
# no preprocess argument list
|
|
142
|
+
raise @name, 'invalid arg definition' if not tok = lexer.readtok or tok.type != :string
|
|
143
|
+
@args << tok
|
|
144
|
+
lexer.skip_space
|
|
145
|
+
raise @name, 'invalid arg separator' if not tok = lexer.readtok or ((tok.type != :punct or tok.raw != ',') and tok.type != :eol)
|
|
146
|
+
break if tok.type == :eol
|
|
147
|
+
lexer.skip_space
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
lexer.skip_space_eol
|
|
151
|
+
while tok = lexer.readtok and (tok.type != :string or tok.raw != 'endm')
|
|
152
|
+
@body << tok
|
|
153
|
+
if @body[-2] and @body[-2].type == :string and @body[-1].raw == ':' and (not @body[-3] or @body[-3].type == :eol) and @body[-2].raw !~ /^[1-9][0-9]*$/
|
|
154
|
+
@labels << @body[-2].raw
|
|
155
|
+
elsif @body[-3] and @body[-3].type == :string and @body[-2].type == :space and Data::DataSpec.include?(@body[-1].raw) and (not @body[-4] or @body[-4].type == :eol)
|
|
156
|
+
@labels << @body[-3].raw
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# the program (used to create new label names)
|
|
163
|
+
attr_accessor :program
|
|
164
|
+
# hash macro name => Macro
|
|
165
|
+
attr_accessor :macro
|
|
166
|
+
|
|
167
|
+
def initialize(text='', program=nil)
|
|
168
|
+
@program = program
|
|
169
|
+
@macro = {}
|
|
170
|
+
super(text)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def skip_space_eol
|
|
174
|
+
readtok while t = nexttok and (t.type == :space or t.type == :eol)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def skip_space
|
|
178
|
+
readtok while t = nexttok and t.type == :space
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def nexttok
|
|
182
|
+
t = readtok
|
|
183
|
+
unreadtok t
|
|
184
|
+
t
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# reads a token, handles macros/comments/integers/etc
|
|
188
|
+
# argument is for internal use
|
|
189
|
+
def readtok(rec = false)
|
|
190
|
+
tok = super()
|
|
191
|
+
|
|
192
|
+
# handle ; comments
|
|
193
|
+
if tok and tok.type == :punct and tok.raw == ';'
|
|
194
|
+
tok.type = :eol
|
|
195
|
+
begin
|
|
196
|
+
tok = tok.dup
|
|
197
|
+
while ntok = super() and ntok.type != :eol
|
|
198
|
+
tok.raw << ntok.raw
|
|
199
|
+
end
|
|
200
|
+
tok.raw << ntok.raw if ntok
|
|
201
|
+
rescue ParseError
|
|
202
|
+
# unterminated string
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# aggregate space/eol
|
|
207
|
+
if tok and (tok.type == :space or tok.type == :eol)
|
|
208
|
+
if ntok = readtok(true) and ntok.type == :space
|
|
209
|
+
tok = tok.dup
|
|
210
|
+
tok.raw << ntok.raw
|
|
211
|
+
elsif ntok and ntok.type == :eol
|
|
212
|
+
tok = tok.dup
|
|
213
|
+
tok.raw << ntok.raw
|
|
214
|
+
tok.type = :eol
|
|
215
|
+
else
|
|
216
|
+
unreadtok ntok
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# handle macros
|
|
222
|
+
# the rec parameter is used to avoid reading the whole text at once when reading ahead to check 'macro' keyword
|
|
223
|
+
if not rec and tok and tok.type == :string
|
|
224
|
+
if @macro[tok.raw]
|
|
225
|
+
@macro[tok.raw].apply(tok, self, @program).reverse_each { |t| unreadtok t }
|
|
226
|
+
tok = readtok
|
|
227
|
+
|
|
228
|
+
else
|
|
229
|
+
if ntok = readtok(true) and ntok.type == :space and nntok = readtok(true) and nntok.type == :string and (nntok.raw == 'macro' or nntok.raw == 'equ')
|
|
230
|
+
puts "W: asm: redefinition of macro #{tok.raw} at #{tok.backtrace_str}, previous definition at #{@macro[tok.raw].name.backtrace_str}" if @macro[tok.raw]
|
|
231
|
+
m = Macro.new tok
|
|
232
|
+
# XXX this allows nested macro definition..
|
|
233
|
+
if nntok.raw == 'macro'
|
|
234
|
+
m.parse_definition self
|
|
235
|
+
else
|
|
236
|
+
# equ
|
|
237
|
+
raise nntok if not etok = readtok
|
|
238
|
+
unreadtok etok
|
|
239
|
+
raise nntok if not v = Expression.parse(self)
|
|
240
|
+
etok = etok.dup
|
|
241
|
+
etok.type = :string
|
|
242
|
+
etok.value = v
|
|
243
|
+
etok.raw = v.to_s
|
|
244
|
+
m.body << etok
|
|
245
|
+
end
|
|
246
|
+
@macro[tok.raw] = m
|
|
247
|
+
tok = readtok
|
|
248
|
+
else
|
|
249
|
+
unreadtok nntok
|
|
250
|
+
unreadtok ntok
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
tok
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
class ExeFormat
|
|
260
|
+
# setup self.cursource here
|
|
261
|
+
def parse_init
|
|
262
|
+
@locallabels_bkw ||= {}
|
|
263
|
+
@locallabels_fwd ||= {}
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# hash mapping local anonymous label number => unique name
|
|
267
|
+
# defined only while parsing
|
|
268
|
+
# usage:
|
|
269
|
+
# jmp 1f
|
|
270
|
+
# 1:
|
|
271
|
+
# jmp 1f
|
|
272
|
+
# jmp 1b
|
|
273
|
+
# 1:
|
|
274
|
+
# defined in #parse, replaced in use by Expression#parse
|
|
275
|
+
# no macro-scope (macro are gsub-like, and no special handling for those labels is done)
|
|
276
|
+
def locallabels_bkw(id)
|
|
277
|
+
@locallabels_bkw[id]
|
|
278
|
+
end
|
|
279
|
+
def locallabels_fwd(id)
|
|
280
|
+
@locallabels_fwd[id] ||= new_label("local_#{id}")
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# parses an asm source file to an array of Instruction/Data/Align/Offset/Padding
|
|
284
|
+
def parse(text, file='<ruby>', lineno=0)
|
|
285
|
+
parse_init
|
|
286
|
+
@lexer ||= cpu.new_asmprepro('', self)
|
|
287
|
+
@lexer.feed text, file, lineno
|
|
288
|
+
lasteol = true
|
|
289
|
+
|
|
290
|
+
while not @lexer.eos?
|
|
291
|
+
tok = @lexer.readtok
|
|
292
|
+
next if not tok
|
|
293
|
+
case tok.type
|
|
294
|
+
when :space
|
|
295
|
+
when :eol
|
|
296
|
+
lasteol = true
|
|
297
|
+
when :punct
|
|
298
|
+
case tok.raw
|
|
299
|
+
when '.'
|
|
300
|
+
tok = tok.dup
|
|
301
|
+
while ntok = @lexer.nexttok and ((ntok.type == :string) or (ntok.type == :punct and ntok.raw == '.'))
|
|
302
|
+
tok.raw << @lexer.readtok.raw
|
|
303
|
+
end
|
|
304
|
+
parse_parser_instruction tok
|
|
305
|
+
else raise tok, 'syntax error'
|
|
306
|
+
end
|
|
307
|
+
lasteol = false
|
|
308
|
+
when :string
|
|
309
|
+
ntok = nntok = nil
|
|
310
|
+
if lasteol and ((ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ':') or
|
|
311
|
+
(ntok and ntok.type == :space and nntok = @lexer.nexttok and nntok.type == :string and Data::DataSpec.include?(nntok.raw)))
|
|
312
|
+
if tok.raw =~ /^[1-9][0-9]*$/
|
|
313
|
+
# handle anonymous local labels
|
|
314
|
+
lname = @locallabels_bkw[tok.raw] = @locallabels_fwd.delete(tok.raw) || new_label('local_'+tok.raw)
|
|
315
|
+
else
|
|
316
|
+
lname = tok.raw
|
|
317
|
+
raise tok, "label redefinition" if new_label(lname) != lname
|
|
318
|
+
end
|
|
319
|
+
l = Label.new(lname)
|
|
320
|
+
l.backtrace = tok.backtrace
|
|
321
|
+
@cursource << l
|
|
322
|
+
lasteol = false
|
|
323
|
+
else
|
|
324
|
+
lasteol = false
|
|
325
|
+
@lexer.unreadtok ntok
|
|
326
|
+
@lexer.unreadtok tok
|
|
327
|
+
if Data::DataSpec.include?(tok.raw)
|
|
328
|
+
@cursource << parse_data
|
|
329
|
+
else
|
|
330
|
+
@cursource << @cpu.parse_instruction(@lexer)
|
|
331
|
+
end
|
|
332
|
+
if lname = @locallabels_fwd.delete('endinstr')
|
|
333
|
+
l = Label.new(lname)
|
|
334
|
+
l.backtrace = tok.backtrace
|
|
335
|
+
@cursource << l
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
else
|
|
339
|
+
raise tok, 'syntax error'
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
puts "Undefined forward reference to anonymous labels #{@locallabels_fwd.keys.inspect}" if $VERBOSE and not @locallabels_fwd.empty?
|
|
344
|
+
|
|
345
|
+
self
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
# create a new label from base, parse it (incl optionnal additionnal src)
|
|
349
|
+
# returns the new label name
|
|
350
|
+
def parse_new_label(base='', src=nil)
|
|
351
|
+
parse_init
|
|
352
|
+
label = new_label(base)
|
|
353
|
+
@cursource << Label.new(label)
|
|
354
|
+
parse src
|
|
355
|
+
label
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# handles special directives (alignment, changing section, ...)
|
|
359
|
+
# special directives start with a dot
|
|
360
|
+
def parse_parser_instruction(tok)
|
|
361
|
+
case tok.raw.downcase
|
|
362
|
+
when '.align'
|
|
363
|
+
e = Expression.parse(@lexer).reduce
|
|
364
|
+
raise self, 'need immediate alignment size' unless e.kind_of? ::Integer
|
|
365
|
+
@lexer.skip_space
|
|
366
|
+
if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
|
|
367
|
+
@lexer.skip_space_eol
|
|
368
|
+
# allow single byte value or full data statement
|
|
369
|
+
if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
|
|
370
|
+
@lexer.unreadtok ntok
|
|
371
|
+
type = 'db'
|
|
372
|
+
else
|
|
373
|
+
type = ntok.raw
|
|
374
|
+
end
|
|
375
|
+
fillwith = parse_data_data type
|
|
376
|
+
else
|
|
377
|
+
@lexer.unreadtok ntok
|
|
378
|
+
end
|
|
379
|
+
raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
|
|
380
|
+
@cursource << Align.new(e, fillwith, tok.backtrace)
|
|
381
|
+
|
|
382
|
+
when '.pad'
|
|
383
|
+
@lexer.skip_space
|
|
384
|
+
if ntok = @lexer.readtok and ntok.type != :eol
|
|
385
|
+
# allow single byte value or full data statement
|
|
386
|
+
if not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
|
|
387
|
+
@lexer.unreadtok ntok
|
|
388
|
+
type = 'db'
|
|
389
|
+
else
|
|
390
|
+
type = ntok.raw
|
|
391
|
+
end
|
|
392
|
+
fillwith = parse_data_data(type)
|
|
393
|
+
else
|
|
394
|
+
@lexer.unreadtok ntok
|
|
395
|
+
end
|
|
396
|
+
raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
|
|
397
|
+
@cursource << Padding.new(fillwith, tok.backtrace)
|
|
398
|
+
|
|
399
|
+
when '.offset'
|
|
400
|
+
e = Expression.parse(@lexer)
|
|
401
|
+
raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
|
|
402
|
+
@cursource << Offset.new(e, tok.backtrace)
|
|
403
|
+
|
|
404
|
+
when '.padto'
|
|
405
|
+
e = Expression.parse(@lexer)
|
|
406
|
+
@lexer.skip_space
|
|
407
|
+
if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
|
|
408
|
+
@lexer.skip_space
|
|
409
|
+
# allow single byte value or full data statement
|
|
410
|
+
if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw)
|
|
411
|
+
@lexer.unreadtok ntok
|
|
412
|
+
type = 'db'
|
|
413
|
+
else
|
|
414
|
+
type = ntok.raw
|
|
415
|
+
end
|
|
416
|
+
fillwith = parse_data_data type
|
|
417
|
+
else
|
|
418
|
+
@lexer.unreadtok ntok
|
|
419
|
+
end
|
|
420
|
+
raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol
|
|
421
|
+
@cursource << Padding.new(fillwith, tok.backtrace) << Offset.new(e, tok.backtrace)
|
|
422
|
+
|
|
423
|
+
else
|
|
424
|
+
@cpu.parse_parser_instruction(self, tok)
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
def parse_data
|
|
429
|
+
raise ParseError, 'internal error' if not tok = @lexer.readtok
|
|
430
|
+
raise tok, 'invalid data type' if tok.type != :string or not Data::DataSpec.include?(tok.raw)
|
|
431
|
+
type = tok.raw
|
|
432
|
+
@lexer.skip_space_eol
|
|
433
|
+
arr = []
|
|
434
|
+
loop do
|
|
435
|
+
arr << parse_data_data(type)
|
|
436
|
+
@lexer.skip_space
|
|
437
|
+
if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
|
|
438
|
+
@lexer.skip_space_eol
|
|
439
|
+
else
|
|
440
|
+
@lexer.unreadtok ntok
|
|
441
|
+
break
|
|
442
|
+
end
|
|
443
|
+
end
|
|
444
|
+
Data.new(type, arr, 1, tok.backtrace)
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
def parse_data_data(type)
|
|
448
|
+
raise ParseError, 'need data content' if not tok = @lexer.readtok
|
|
449
|
+
if tok.type == :punct and tok.raw == '?'
|
|
450
|
+
Data.new type, :uninitialized, 1, tok.backtrace
|
|
451
|
+
elsif tok.type == :quoted
|
|
452
|
+
Data.new type, tok.value, 1, tok.backtrace
|
|
453
|
+
else
|
|
454
|
+
@lexer.unreadtok tok
|
|
455
|
+
raise tok, 'invalid data' if not i = Expression.parse(@lexer)
|
|
456
|
+
@lexer.skip_space
|
|
457
|
+
if ntok = @lexer.readtok and ntok.type == :string and ntok.raw.downcase == 'dup'
|
|
458
|
+
raise ntok, 'need immediate count expression' unless (count = i.reduce).kind_of? ::Integer
|
|
459
|
+
@lexer.skip_space
|
|
460
|
+
raise ntok, 'syntax error, ( expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != '('
|
|
461
|
+
content = []
|
|
462
|
+
loop do
|
|
463
|
+
content << parse_data_data(type)
|
|
464
|
+
@lexer.skip_space
|
|
465
|
+
if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ','
|
|
466
|
+
@lexer.skip_space_eol
|
|
467
|
+
else
|
|
468
|
+
@lexer.unreadtok ntok
|
|
469
|
+
break
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
raise ntok, 'syntax error, ) expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != ')'
|
|
473
|
+
Data.new type, content, count, tok.backtrace
|
|
474
|
+
else
|
|
475
|
+
@lexer.unreadtok ntok
|
|
476
|
+
Data.new type, i, 1, tok.backtrace
|
|
477
|
+
end
|
|
478
|
+
end
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
class Expression
|
|
483
|
+
# key = operator, value = hash regrouping operators of lower precedence
|
|
484
|
+
OP_PRIO = [[:'||'], [:'&&'], [:|], [:^], [:&], [:'==', :'!='],
|
|
485
|
+
[:'<', :'>', :'<=', :'>='], [:<<, :>>], [:+, :-], [:*, :/, :%]
|
|
486
|
+
].inject({}) { |h, oplist|
|
|
487
|
+
lessprio = h.keys.inject({}) { |hh, op| hh.update op => true }
|
|
488
|
+
oplist.each { |op| h[op] = lessprio }
|
|
489
|
+
h }
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
class << self
|
|
493
|
+
# reads an operator from the lexer, returns the corresponding symbol or nil
|
|
494
|
+
def readop(lexer)
|
|
495
|
+
if not tok = lexer.readtok or tok.type != :punct
|
|
496
|
+
lexer.unreadtok tok
|
|
497
|
+
return
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
if tok.value
|
|
501
|
+
if OP_PRIO[tok.value]
|
|
502
|
+
return tok
|
|
503
|
+
else
|
|
504
|
+
lexer.unreadtok tok
|
|
505
|
+
return
|
|
506
|
+
end
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
op = tok
|
|
510
|
+
case op.raw
|
|
511
|
+
# may be followed by itself or '='
|
|
512
|
+
when '>', '<'
|
|
513
|
+
if ntok = lexer.readtok and ntok.type == :punct and (ntok.raw == op.raw or ntok.raw == '=')
|
|
514
|
+
op = op.dup
|
|
515
|
+
op.raw << ntok.raw
|
|
516
|
+
else
|
|
517
|
+
lexer.unreadtok ntok
|
|
518
|
+
end
|
|
519
|
+
# may be followed by itself
|
|
520
|
+
when '|', '&'
|
|
521
|
+
if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == op.raw
|
|
522
|
+
op = op.dup
|
|
523
|
+
op.raw << ntok.raw
|
|
524
|
+
else
|
|
525
|
+
lexer.unreadtok ntok
|
|
526
|
+
end
|
|
527
|
+
# must be followed by '='
|
|
528
|
+
when '!', '='
|
|
529
|
+
if not ntok = lexer.readtok or ntok.type != :punct and ntok.raw != '='
|
|
530
|
+
lexer.unreadtok ntok
|
|
531
|
+
lexer.unreadtok tok
|
|
532
|
+
return
|
|
533
|
+
end
|
|
534
|
+
op = op.dup
|
|
535
|
+
op.raw << ntok.raw
|
|
536
|
+
# ok
|
|
537
|
+
when '^', '+', '-', '*', '/', '%'
|
|
538
|
+
# unknown
|
|
539
|
+
else
|
|
540
|
+
lexer.unreadtok tok
|
|
541
|
+
return
|
|
542
|
+
end
|
|
543
|
+
op.value = op.raw.to_sym
|
|
544
|
+
op
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
# parses floats/hex into tok.value, returns nothing
|
|
548
|
+
# does not parse unary operators (-/+/~)
|
|
549
|
+
def parse_num_value(lexer, tok)
|
|
550
|
+
if not tok.value and tok.raw =~ /^[a-f][0-9a-f]*h$/i
|
|
551
|
+
# warn on variable name like ffffh
|
|
552
|
+
puts "W: Parser: you may want to add a leading 0 to #{tok.raw.inspect} at #{tok.backtrace[-2]}:#{tok.backtrace[-1]}" if $VERBOSE
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
return if tok.value
|
|
556
|
+
return if tok.raw[0] != ?. and !(?0..?9).include? tok.raw[0]
|
|
557
|
+
|
|
558
|
+
case tr = tok.raw.downcase
|
|
559
|
+
when /^0b([01][01_]*)$/, /^([01][01_]*)b$/
|
|
560
|
+
tok.value = $1.to_i(2)
|
|
561
|
+
|
|
562
|
+
when /^(0[0-7][0-7_]*)$/
|
|
563
|
+
tok.value = $1.to_i(8)
|
|
564
|
+
|
|
565
|
+
when /^([0-9][a-f0-9_]*)h$/
|
|
566
|
+
tok.value = $1.to_i(16)
|
|
567
|
+
|
|
568
|
+
when /^0x([a-f0-9][a-f0-9_]*)(u?l?l?|l?l?u?|p([0-9][0-9_]*[fl]?)?)$/, '0x'
|
|
569
|
+
tok.value = $1.to_i(16) if $1
|
|
570
|
+
ntok = lexer.readtok
|
|
571
|
+
|
|
572
|
+
# check for C99 hex float
|
|
573
|
+
if not tr.include? 'p' and ntok and ntok.type == :punct and ntok.raw == '.'
|
|
574
|
+
if not nntok = lexer.readtok or nntok.type != :string
|
|
575
|
+
lexer.unreadtok nntok
|
|
576
|
+
lexer.unreadtok ntok
|
|
577
|
+
return
|
|
578
|
+
end
|
|
579
|
+
# read all pre-mantissa
|
|
580
|
+
tok.raw << ntok.raw
|
|
581
|
+
ntok = nntok
|
|
582
|
+
tok.raw << ntok.raw if ntok
|
|
583
|
+
raise tok, 'invalid hex float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9a-f_]*p([0-9][0-9_]*[fl]?)?$/i
|
|
584
|
+
raise tok, 'invalid hex float' if tok.raw.delete('_').downcase[0,4] == '0x.p' # no digits
|
|
585
|
+
ntok = lexer.readtok
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
if not tok.raw.downcase.include? 'p'
|
|
589
|
+
# standard hex
|
|
590
|
+
lexer.unreadtok ntok
|
|
591
|
+
else
|
|
592
|
+
if tok.raw.downcase[-1] == ?p
|
|
593
|
+
# read signed mantissa
|
|
594
|
+
tok.raw << ntok.raw if ntok
|
|
595
|
+
raise tok, 'invalid hex float' if not ntok or ntok.type == :punct or (ntok.raw != '+' and ntok.raw != '-')
|
|
596
|
+
ntok = lexer.readtok
|
|
597
|
+
tok.raw << ntok.raw if ntok
|
|
598
|
+
raise tok, 'invalid hex float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9][0-9_]*[fl]?$/i
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
raise tok, 'internal error' if not tok.raw.delete('_').downcase =~ /^0x([0-9a-f]*)(?:\.([0-9a-f]*))?p([+-]?[0-9]+)[fl]?$/
|
|
602
|
+
b1, b2, b3 = $1.to_i(16), $2, $3.to_i
|
|
603
|
+
b2 = b2.to_i(16) if b2
|
|
604
|
+
tok.value = b1.to_f
|
|
605
|
+
# tok.value += 1/b2.to_f # TODO
|
|
606
|
+
puts "W: unhandled hex float #{tok.raw}" if $VERBOSE and b2 and b2 != 0
|
|
607
|
+
tok.value *= 2**b3
|
|
608
|
+
puts "hex float: #{tok.raw} => #{tok.value}" if $DEBUG
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
when /^([0-9][0-9_]*)(u?l?l?|l?l?u?|e([0-9][0-9_]*[fl]?)?)$/, '.'
|
|
612
|
+
tok.value = $1.to_i if $1
|
|
613
|
+
ntok = lexer.readtok
|
|
614
|
+
if tok.raw == '.' and (not ntok or ntok.type != :string)
|
|
615
|
+
lexer.unreadtok ntok
|
|
616
|
+
return
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
if not tr.include? 'e' and tr != '.' and ntok and ntok.type == :punct and ntok.raw == '.'
|
|
620
|
+
if not nntok = lexer.readtok or nntok.type != :string
|
|
621
|
+
lexer.unreadtok nntok
|
|
622
|
+
lexer.unreadtok ntok
|
|
623
|
+
return
|
|
624
|
+
end
|
|
625
|
+
# read upto '.'
|
|
626
|
+
tok.raw << ntok.raw
|
|
627
|
+
ntok = nntok
|
|
628
|
+
end
|
|
629
|
+
|
|
630
|
+
if not tok.raw.downcase.include? 'e' and tok.raw[-1] == ?.
|
|
631
|
+
# read fractional part
|
|
632
|
+
tok.raw << ntok.raw if ntok
|
|
633
|
+
raise tok, 'bad float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9_]*(e[0-9_]*)?[fl]?$/i
|
|
634
|
+
ntok = lexer.readtok
|
|
635
|
+
end
|
|
636
|
+
|
|
637
|
+
if tok.raw.downcase[-1] == ?e
|
|
638
|
+
# read signed exponent
|
|
639
|
+
tok.raw << ntok.raw if ntok
|
|
640
|
+
raise tok, 'bad float' if not ntok or ntok.type != :punct or (ntok.raw != '+' and ntok.raw != '-')
|
|
641
|
+
ntok = lexer.readtok
|
|
642
|
+
tok.raw << ntok.raw if ntok
|
|
643
|
+
raise tok, 'bad float' if not ntok or ntok.type != :string or ntok.raw !~ /^[0-9][0-9_]*[fl]?$/i
|
|
644
|
+
ntok = lexer.readtok
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
lexer.unreadtok ntok
|
|
648
|
+
|
|
649
|
+
if tok.raw.delete('_').downcase =~ /^(?:(?:[0-9]+\.[0-9]*|\.[0-9]+)(?:e[+-]?[0-9]+)?|[0-9]+e[+-]?[0-9]+)[fl]?$/i
|
|
650
|
+
tok.value = tok.raw.to_f
|
|
651
|
+
else
|
|
652
|
+
raise tok, 'internal error' if tok.raw =~ /[e.]/i
|
|
653
|
+
end
|
|
654
|
+
|
|
655
|
+
else raise tok, 'invalid numeric constant'
|
|
656
|
+
end
|
|
657
|
+
end
|
|
658
|
+
|
|
659
|
+
# parses an integer/a float, sets its tok.value, consumes&aggregate necessary following tokens (point, mantissa..)
|
|
660
|
+
# handles $/$$ special asm label name
|
|
661
|
+
# XXX for binary, use _ delimiter or 0b prefix, or start with 0 : 1b may conflict with backward local anonymous label reference
|
|
662
|
+
def parse_intfloat(lexer, tok)
|
|
663
|
+
if not tok.value and tok.raw == '$'
|
|
664
|
+
l = lexer.program.cursource.last
|
|
665
|
+
if not l.kind_of? Label
|
|
666
|
+
l = Label.new(lexer.program.new_label('instr_start'))
|
|
667
|
+
l.backtrace = tok.backtrace
|
|
668
|
+
lexer.program.cursource << l
|
|
669
|
+
end
|
|
670
|
+
tok.value = l.name
|
|
671
|
+
elsif not tok.value and tok.raw == '$$'
|
|
672
|
+
l = lexer.program.cursource.first
|
|
673
|
+
if not l.kind_of? Label
|
|
674
|
+
l = Label.new(lexer.program.new_label('section_start'))
|
|
675
|
+
l.backtrace = tok.backtrace
|
|
676
|
+
lexer.program.cursource.unshift l
|
|
677
|
+
end
|
|
678
|
+
tok.value = l.name
|
|
679
|
+
elsif not tok.value and tok.raw == '$_'
|
|
680
|
+
tok.value = lexer.program.locallabels_fwd('endinstr')
|
|
681
|
+
elsif not tok.value and tok.raw =~ /^([1-9][0-9]*)([fb])$/
|
|
682
|
+
case $2
|
|
683
|
+
when 'b'; tok.value = lexer.program.locallabels_bkw($1) # may fallback to binary parser
|
|
684
|
+
when 'f'; tok.value = lexer.program.locallabels_fwd($1)
|
|
685
|
+
end
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
parse_num_value(lexer, tok)
|
|
689
|
+
end
|
|
690
|
+
|
|
691
|
+
# returns the next value from lexer (parenthesised expression, immediate, variable, unary operators)
|
|
692
|
+
def parse_value(lexer)
|
|
693
|
+
nil while tok = lexer.readtok and tok.type == :space
|
|
694
|
+
return if not tok
|
|
695
|
+
case tok.type
|
|
696
|
+
when :string
|
|
697
|
+
# ignores the 'offset' word if followed by a string
|
|
698
|
+
if not tok.value and tok.raw.downcase == 'offset'
|
|
699
|
+
nil while ntok = lexer.readtok and ntok.type == :space
|
|
700
|
+
if ntok.type == :string; tok = ntok
|
|
701
|
+
else lexer.unreadtok ntok
|
|
702
|
+
end
|
|
703
|
+
end
|
|
704
|
+
parse_intfloat(lexer, tok)
|
|
705
|
+
val = tok.value || tok.raw
|
|
706
|
+
when :quoted
|
|
707
|
+
if tok.raw[0] != ?'
|
|
708
|
+
lexer.unreadtok tok
|
|
709
|
+
return
|
|
710
|
+
end
|
|
711
|
+
s = tok.value || tok.raw[1..-2] # raise tok, 'need ppcessing !'
|
|
712
|
+
s = s.reverse if lexer.respond_to? :program and lexer.program and lexer.program.cpu and lexer.program.cpu.endianness == :little
|
|
713
|
+
val = s.unpack('C*').inject(0) { |sum, c| (sum << 8) | c }
|
|
714
|
+
when :punct
|
|
715
|
+
case tok.raw
|
|
716
|
+
when '('
|
|
717
|
+
nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
|
|
718
|
+
lexer.unreadtok ntok
|
|
719
|
+
val = parse(lexer)
|
|
720
|
+
nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
|
|
721
|
+
raise tok, "syntax error, no ) found after #{val.inspect}, got #{ntok.inspect}" if not ntok or ntok.type != :punct or ntok.raw != ')'
|
|
722
|
+
when '!', '+', '-', '~'
|
|
723
|
+
nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
|
|
724
|
+
lexer.unreadtok ntok
|
|
725
|
+
raise tok, 'need expression after unary operator' if not val = parse_value(lexer)
|
|
726
|
+
val = Expression[tok.raw.to_sym, val]
|
|
727
|
+
when '.'
|
|
728
|
+
parse_intfloat(lexer, tok)
|
|
729
|
+
if not tok.value
|
|
730
|
+
lexer.unreadtok tok
|
|
731
|
+
return
|
|
732
|
+
end
|
|
733
|
+
val = tok.value
|
|
734
|
+
else
|
|
735
|
+
lexer.unreadtok tok
|
|
736
|
+
return
|
|
737
|
+
end
|
|
738
|
+
else
|
|
739
|
+
lexer.unreadtok tok
|
|
740
|
+
return
|
|
741
|
+
end
|
|
742
|
+
nil while tok = lexer.readtok and tok.type == :space
|
|
743
|
+
lexer.unreadtok tok
|
|
744
|
+
val
|
|
745
|
+
end
|
|
746
|
+
|
|
747
|
+
# for boolean operators, true is 1 (or anything != 0), false is 0
|
|
748
|
+
def parse(lexer)
|
|
749
|
+
opstack = []
|
|
750
|
+
stack = []
|
|
751
|
+
|
|
752
|
+
return if not e = parse_value(lexer)
|
|
753
|
+
|
|
754
|
+
stack << e
|
|
755
|
+
|
|
756
|
+
while op = readop(lexer)
|
|
757
|
+
nil while ntok = lexer.readtok and (ntok.type == :space or ntok.type == :eol)
|
|
758
|
+
lexer.unreadtok ntok
|
|
759
|
+
until opstack.empty? or OP_PRIO[op.value][opstack.last]
|
|
760
|
+
stack << new(opstack.pop, stack.pop, stack.pop)
|
|
761
|
+
end
|
|
762
|
+
|
|
763
|
+
opstack << op.value
|
|
764
|
+
|
|
765
|
+
raise op, 'need rhs' if not e = parse_value(lexer)
|
|
766
|
+
|
|
767
|
+
stack << e
|
|
768
|
+
end
|
|
769
|
+
|
|
770
|
+
until opstack.empty?
|
|
771
|
+
stack << new(opstack.pop, stack.pop, stack.pop)
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
Expression[stack.first]
|
|
775
|
+
end
|
|
776
|
+
|
|
777
|
+
# parse an expression in a string
|
|
778
|
+
# updates the string to point after the parsed expression
|
|
779
|
+
def parse_string!(str, &b)
|
|
780
|
+
pp = Preprocessor.new(str)
|
|
781
|
+
|
|
782
|
+
e = parse(pp, &b)
|
|
783
|
+
|
|
784
|
+
# update arg
|
|
785
|
+
len = pp.pos
|
|
786
|
+
pp.queue.each { |t| len -= t.raw.length }
|
|
787
|
+
str[0, len] = ''
|
|
788
|
+
|
|
789
|
+
e
|
|
790
|
+
end
|
|
791
|
+
|
|
792
|
+
# parse an expression in a string
|
|
793
|
+
def parse_string(str, &b)
|
|
794
|
+
parse(Preprocessor.new(str), &b)
|
|
795
|
+
end
|
|
796
|
+
end
|
|
797
|
+
end
|
|
798
|
+
|
|
799
|
+
# an Expression whose ::parser handles indirection (byte ptr [foobar])
|
|
800
|
+
class IndExpression < Expression
|
|
801
|
+
class << self
|
|
802
|
+
def parse_value(lexer)
|
|
803
|
+
sz = nil
|
|
804
|
+
ptr = nil
|
|
805
|
+
loop do
|
|
806
|
+
nil while tok = lexer.readtok and tok.type == :space
|
|
807
|
+
return if not tok
|
|
808
|
+
case tok.raw
|
|
809
|
+
when 'qword'; sz=8
|
|
810
|
+
when 'dword'; sz=4
|
|
811
|
+
when 'word'; sz=2
|
|
812
|
+
when 'byte'; sz=1
|
|
813
|
+
when 'ptr'
|
|
814
|
+
when '['
|
|
815
|
+
ptr = parse(lexer)
|
|
816
|
+
nil while tok = lexer.readtok and tok.type == :space
|
|
817
|
+
raise tok || lexer, '] expected' if tok.raw != ']'
|
|
818
|
+
break
|
|
819
|
+
when '*'
|
|
820
|
+
ptr = parse_value(lexer)
|
|
821
|
+
break
|
|
822
|
+
when ':' # symbols, eg ':eax'
|
|
823
|
+
n = lexer.readtok
|
|
824
|
+
return n.raw.to_sym
|
|
825
|
+
else
|
|
826
|
+
lexer.unreadtok tok
|
|
827
|
+
break
|
|
828
|
+
end
|
|
829
|
+
end
|
|
830
|
+
raise lexer, 'invalid indirection' if sz and not ptr
|
|
831
|
+
if ptr; Indirection[ptr, sz] # if sz is nil, default cpu pointersz is set in resolve_expr
|
|
832
|
+
else super(lexer)
|
|
833
|
+
end
|
|
834
|
+
end
|
|
835
|
+
|
|
836
|
+
def parse(*a, &b)
|
|
837
|
+
# custom decimal converter
|
|
838
|
+
@parse_cb = b if b
|
|
839
|
+
e = super(*a)
|
|
840
|
+
@parse_cb = nil if b
|
|
841
|
+
e
|
|
842
|
+
end
|
|
843
|
+
|
|
844
|
+
# callback used to customize the parsing of /^([0-9]+)$/ tokens
|
|
845
|
+
# implicitely set by parse(expr) { cb }
|
|
846
|
+
# allows eg parsing '40000' as 0x40000 when relevant
|
|
847
|
+
attr_accessor :parse_cb
|
|
848
|
+
|
|
849
|
+
def parse_intfloat(lexer, tok)
|
|
850
|
+
case tok.raw
|
|
851
|
+
when /^([0-9]+)$/; tok.value = parse_cb ? @parse_cb[$1] : $1.to_i
|
|
852
|
+
when /^0x([0-9a-f]+)$/i, /^([0-9a-f]+)h$/i; tok.value = $1.to_i(16)
|
|
853
|
+
when /^0b([01]+)$/i; tok.value = $1.to_i(2)
|
|
854
|
+
end
|
|
855
|
+
end
|
|
856
|
+
|
|
857
|
+
def readop(lexer)
|
|
858
|
+
if t0 = lexer.readtok and t0.raw == '-' and t1 = lexer.readtok and t1.raw == '>'
|
|
859
|
+
op = t0.dup
|
|
860
|
+
op.raw << t1.raw
|
|
861
|
+
op.value = op.raw.to_sym
|
|
862
|
+
op
|
|
863
|
+
else
|
|
864
|
+
lexer.unreadtok t1
|
|
865
|
+
lexer.unreadtok t0
|
|
866
|
+
super(lexer)
|
|
867
|
+
end
|
|
868
|
+
end
|
|
869
|
+
|
|
870
|
+
def new(op, r, l)
|
|
871
|
+
return Indirection[[l, :+, r], nil] if op == :'->'
|
|
872
|
+
super(op, r, l)
|
|
873
|
+
end
|
|
874
|
+
end
|
|
875
|
+
end
|
|
876
|
+
end
|