voodoo 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/ast.rb +47 -0
- data/bin/voodooc +146 -0
- data/lib/voodoo.rb +74 -0
- data/lib/voodoo/code_generator.rb +74 -0
- data/lib/voodoo/compiler.rb +45 -0
- data/lib/voodoo/config.rb +43 -0
- data/lib/voodoo/generators/amd64_elf_generator.rb +28 -0
- data/lib/voodoo/generators/amd64_nasm_generator.rb +288 -0
- data/lib/voodoo/generators/command_postprocessor.rb +30 -0
- data/lib/voodoo/generators/common_code_generator.rb +238 -0
- data/lib/voodoo/generators/gas_generator.rb +91 -0
- data/lib/voodoo/generators/generator_api1.rb +95 -0
- data/lib/voodoo/generators/i386_elf_generator.rb +62 -0
- data/lib/voodoo/generators/i386_nasm_generator.rb +177 -0
- data/lib/voodoo/generators/mips_gas_generator.rb +148 -0
- data/lib/voodoo/generators/nasm_elf_generator.rb +55 -0
- data/lib/voodoo/generators/nasm_generator.rb +679 -0
- data/lib/voodoo/parser.rb +283 -0
- metadata +82 -0
@@ -0,0 +1,288 @@
|
|
1
|
+
require 'voodoo/generators/nasm_generator'
|
2
|
+
|
3
|
+
module Voodoo
|
4
|
+
# = AMD64 NASM Code Generator
|
5
|
+
#
|
6
|
+
# Code generator that emits NASM assembly code for AMD64 processors.
|
7
|
+
#
|
8
|
+
# == Calling Convention
|
9
|
+
#
|
10
|
+
# The calling convention implemented by this code generator is
|
11
|
+
# compatible with the System V ABI for AMD64, provided that all
|
12
|
+
# arguments are integers or pointers.
|
13
|
+
#
|
14
|
+
# Arguments are passed in registers. The registers are used in the
|
15
|
+
# following order:
|
16
|
+
#
|
17
|
+
# 1. +rdi+
|
18
|
+
# 2. +rsi+
|
19
|
+
# 3. +rdx+
|
20
|
+
# 4. +rcx+
|
21
|
+
# 5. +r8+
|
22
|
+
# 6. +r9+
|
23
|
+
#
|
24
|
+
# Additional arguments are pushed on the stack, starting with the last
|
25
|
+
# argument and working backwards. These arguments are removed from the
|
26
|
+
# stack by the caller, after the called function returns.
|
27
|
+
#
|
28
|
+
# The return value is passed in +rax+.
|
29
|
+
#
|
30
|
+
# For varargs functions, +rax+ must be set to an upper bound on the
|
31
|
+
# number of vector arguments. Since the code generator does not know
|
32
|
+
# whether the called function is a varargs function, this is always
|
33
|
+
# done. Since the code generator never passes any vector arguments,
|
34
|
+
# this means +rax+ is set to +0+ before each call.
|
35
|
+
#
|
36
|
+
# == Call Frames
|
37
|
+
#
|
38
|
+
# arg_n
|
39
|
+
# :
|
40
|
+
# arg_7
|
41
|
+
# arg_6
|
42
|
+
# saved_rip
|
43
|
+
# saved_rbp <-- rbp
|
44
|
+
# arg_0
|
45
|
+
# arg_1
|
46
|
+
# :
|
47
|
+
# arg_5
|
48
|
+
# local_0
|
49
|
+
# local_1
|
50
|
+
# :
|
51
|
+
# local_n <-- rsp
|
52
|
+
#
|
53
|
+
class AMD64NasmGenerator < NasmGenerator
|
54
|
+
def initialize params = {}
|
55
|
+
# Number of bytes in a word
|
56
|
+
@WORDSIZE = 8
|
57
|
+
# Word name in NASM lingo
|
58
|
+
@WORD_NAME = 'qword'
|
59
|
+
# Default alignment for code
|
60
|
+
@CODE_ALIGNMENT = 0
|
61
|
+
# Default alignment for data
|
62
|
+
@DATA_ALIGNMENT = @WORDSIZE
|
63
|
+
# Default alignment for functions
|
64
|
+
@FUNCTION_ALIGNMENT = 16
|
65
|
+
# Register used for return values
|
66
|
+
@RETURN_REG = 'rax'
|
67
|
+
# Register used as scratch register
|
68
|
+
@SCRATCH_REG = 'r11'
|
69
|
+
# Registers used for argument passing
|
70
|
+
@ARG_REGS = ['rdi', 'rsi', 'rdx', 'rcx', 'r8', 'r9']
|
71
|
+
# Accumulator index
|
72
|
+
@AX = 'rax'
|
73
|
+
# Base index
|
74
|
+
@BX = 'rbx'
|
75
|
+
# Count index
|
76
|
+
@CX = 'rcx'
|
77
|
+
# Data index
|
78
|
+
@DX = 'rdx'
|
79
|
+
super params
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# == Data Definition
|
84
|
+
#
|
85
|
+
|
86
|
+
# Define a machine word with the given value.
|
87
|
+
def word value
|
88
|
+
qword value
|
89
|
+
end
|
90
|
+
|
91
|
+
#
|
92
|
+
# == Functions
|
93
|
+
#
|
94
|
+
|
95
|
+
# Call a function.
|
96
|
+
def call func, *args
|
97
|
+
emit "; call #{func} #{args.join ' '}\n"
|
98
|
+
# First couple of arguments go in registers
|
99
|
+
register_args = args[0..number_of_register_arguments] || []
|
100
|
+
# Rest of arguments go on the stack
|
101
|
+
stack_args = args[number_of_register_arguments..-1] || []
|
102
|
+
emit "; register_args: #{register_args.inspect}\n"
|
103
|
+
emit "; stack_args: #{stack_args.inspect}\n"
|
104
|
+
# Push stack arguments
|
105
|
+
stack_args.reverse.each { |arg| push_qword arg }
|
106
|
+
# Load register arguments
|
107
|
+
register_args.each_with_index do |arg,i|
|
108
|
+
register = @ARG_REGS[i]
|
109
|
+
value_ref = load_value arg, register
|
110
|
+
if value_ref != register
|
111
|
+
emit "mov #{register}, #{value_ref}\n"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
# Call function
|
115
|
+
value_ref = load_value func, @SCRATCH_REG
|
116
|
+
emit "xor rax, rax\n"
|
117
|
+
# If value_ref is a symbol, use PLT-relative addressing
|
118
|
+
if global?(func)
|
119
|
+
emit "call #{value_ref} wrt ..plt\n"
|
120
|
+
else
|
121
|
+
emit "call #{value_ref}\n"
|
122
|
+
end
|
123
|
+
# Clean up stack
|
124
|
+
unless stack_args.empty?
|
125
|
+
emit "add rsp, #{stack_args.length * @WORDSIZE}\n"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Emit function prologue.
|
130
|
+
def emit_function_prologue formals = []
|
131
|
+
emit "push rbp\nmov rbp, rsp\n"
|
132
|
+
unless formals.empty?
|
133
|
+
register_args = formals[0...number_of_register_arguments]
|
134
|
+
register_args.each_with_index do |arg,i|
|
135
|
+
emit "push #{@ARG_REGS[i]}\n"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Call a function, re-using the current call frame if possible.
|
141
|
+
def tail_call func, *args
|
142
|
+
emit "; tail-call #{func} #{args.join ' '}\n"
|
143
|
+
# Compute required number of stack words
|
144
|
+
nstackargs = number_of_stack_arguments args.length
|
145
|
+
# If we need more stack arguments than we have now,
|
146
|
+
# perform a normal call and return
|
147
|
+
if nstackargs > number_of_stack_arguments(@environment.args)
|
148
|
+
emit "; Not enough space for proper tail call; using regular call\n"
|
149
|
+
ret :call, func, *args
|
150
|
+
end
|
151
|
+
|
152
|
+
# If any arguments are going to be overwritten before they are
|
153
|
+
# used, save them to new local variables and use those instead.
|
154
|
+
i = args.length - 1
|
155
|
+
while i >= -1
|
156
|
+
arg = (i >= 0) ? args[i] : func
|
157
|
+
|
158
|
+
if symbol?(arg)
|
159
|
+
x = @environment[arg]
|
160
|
+
if x && x[0] == :arg && x[1] < args.length && x[1] > i &&
|
161
|
+
(i >= 0 || func != args[x[1]])
|
162
|
+
# Save value
|
163
|
+
newsym = @environment.gensym
|
164
|
+
let newsym, arg
|
165
|
+
# Change reference
|
166
|
+
if i >= 0
|
167
|
+
args[i] = newsym
|
168
|
+
else
|
169
|
+
func = newsym
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
i = i - 1
|
174
|
+
end
|
175
|
+
|
176
|
+
# Set stack arguments
|
177
|
+
if args.length > number_of_register_arguments
|
178
|
+
(args.length - 1 .. number_of_register_arguments).each do |i|
|
179
|
+
arg = args[i]
|
180
|
+
|
181
|
+
value_ref = load_value arg, @SCRATCH_REG
|
182
|
+
newarg_ref = load_arg i
|
183
|
+
# Elide code if source is same as destination
|
184
|
+
unless value_ref == newarg_ref
|
185
|
+
emit "mov #{@WORD_NAME} #{newarg_ref}, #{value_ref}\n"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Set register arguments
|
191
|
+
if args.length > 0
|
192
|
+
number_of_register_arguments(args.length).times do |i|
|
193
|
+
register = @ARG_REGS[i]
|
194
|
+
load_value_into_register args[i], register
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# Tail call
|
199
|
+
func_ref = load_value func, @BX
|
200
|
+
emit "leave\n"
|
201
|
+
set_register @AX, 0
|
202
|
+
# If func_ref is a symbol, use PLT-relative addressing
|
203
|
+
if global?(func)
|
204
|
+
emit "jmp #{func_ref} wrt ..plt\n"
|
205
|
+
else
|
206
|
+
emit "jmp #{func_ref}\n"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
#
|
211
|
+
# == Loading Values
|
212
|
+
#
|
213
|
+
|
214
|
+
# Load the value of the nth argument
|
215
|
+
def load_arg n, reg = @SCRATCH_REG
|
216
|
+
if register_argument?(n)
|
217
|
+
# Arguments that were originally passed in a register
|
218
|
+
# are now below rbp
|
219
|
+
"[rbp - #{(n + 1) * @WORDSIZE}]"
|
220
|
+
else
|
221
|
+
# Arguments that were originally passed on the stack
|
222
|
+
# are now above rbp
|
223
|
+
"[rbp + #{(n + 1 - number_of_register_arguments) * @WORDSIZE}]"
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
# Load the value of the nth local variable
|
228
|
+
def load_local n, reg = @SCRATCH_REG
|
229
|
+
# If there current function has any arguments,
|
230
|
+
# local variables are offset by
|
231
|
+
# number_of_register_arguments(number_of_arguments)
|
232
|
+
# words.
|
233
|
+
offset = number_of_register_arguments(@environment.args) * @WORDSIZE
|
234
|
+
"[rbp - #{offset + (n + 1) * @WORDSIZE}]"
|
235
|
+
end
|
236
|
+
|
237
|
+
#
|
238
|
+
# == Variables
|
239
|
+
#
|
240
|
+
|
241
|
+
# Introduce a new local variable
|
242
|
+
def let symbol, *words
|
243
|
+
emit "; let #{symbol} #{words.join ' '}\n"
|
244
|
+
@environment.add_local symbol
|
245
|
+
eval_expr words, @RETURN_REG
|
246
|
+
emit "push #{@RETURN_REG}\n"
|
247
|
+
end
|
248
|
+
|
249
|
+
#
|
250
|
+
# == Miscellaneous
|
251
|
+
#
|
252
|
+
|
253
|
+
# Load a value and push it on the stack.
|
254
|
+
def push_qword value
|
255
|
+
value_ref = load_value value, @SCRATCH_REG
|
256
|
+
emit "push qword #{value_ref}\n"
|
257
|
+
end
|
258
|
+
|
259
|
+
# Calculate the number of register arguments,
|
260
|
+
# given the total number of arguments.
|
261
|
+
# If _n_ is +nil+, returns the maximum number of
|
262
|
+
# register arguments.
|
263
|
+
def number_of_register_arguments n = nil
|
264
|
+
if n.nil?
|
265
|
+
@ARG_REGS.length
|
266
|
+
else
|
267
|
+
[@ARG_REGS.length, n].min
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
# Calculate the number of stack arguments,
|
272
|
+
# given the total number of arguments.
|
273
|
+
def number_of_stack_arguments n
|
274
|
+
[0, n - number_of_register_arguments].max
|
275
|
+
end
|
276
|
+
|
277
|
+
# Tests if the nth argument is a register argument.
|
278
|
+
def register_argument? n
|
279
|
+
n < number_of_register_arguments
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
# Register class
|
285
|
+
Voodoo::CodeGenerator.register_generator AMD64NasmGenerator,
|
286
|
+
:architecture => :amd64,
|
287
|
+
:format => :nasm
|
288
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'delegate'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module Voodoo
|
5
|
+
# Utility functions for classes that use commands to postprocess
|
6
|
+
# generator output.
|
7
|
+
module CommandPostProcessor
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# Encodes a string so that it is safe for use as a shell argument
|
11
|
+
def shell_encode string
|
12
|
+
'"' + string.gsub(/([\\`"$\n])/, "\\\\\\1") + '"'
|
13
|
+
end
|
14
|
+
|
15
|
+
# Creates a temporary file and returns its name
|
16
|
+
def tempfile extension, base = nil
|
17
|
+
base = self.class.name unless base
|
18
|
+
file = Tempfile.open(basename + extension)
|
19
|
+
name = file.path
|
20
|
+
file.close
|
21
|
+
name
|
22
|
+
end
|
23
|
+
|
24
|
+
# Writes the contents of the named file to an IO handle
|
25
|
+
def write_file_to_io filename, io
|
26
|
+
File.open(filename) { |file| io.write(file.read) }
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,238 @@
|
|
1
|
+
require 'voodoo/generators/generator_api1'
|
2
|
+
|
3
|
+
module Voodoo
|
4
|
+
# Common base class for code generators.
|
5
|
+
#
|
6
|
+
# Code generators are expected to implement the following methods:
|
7
|
+
#
|
8
|
+
# - #new
|
9
|
+
# - #add
|
10
|
+
# - #add_function
|
11
|
+
# - #gensym
|
12
|
+
# - #output_file_name
|
13
|
+
# - #wordsize
|
14
|
+
# - #write
|
15
|
+
#
|
16
|
+
# This class contains base implementations of some of these methods,
|
17
|
+
# which can be used and/or overridden by subclasses.
|
18
|
+
#
|
19
|
+
# An example of how to use the code generators provided by this module
|
20
|
+
# is provided on the main page of the documentation of the Voodoo module.
|
21
|
+
#
|
22
|
+
class CommonCodeGenerator
|
23
|
+
# Provide compatibility with old API
|
24
|
+
include GeneratorApi1
|
25
|
+
|
26
|
+
# Initializes the code generator.
|
27
|
+
# _params_ shall be a hash containing parameters to the code generator,
|
28
|
+
# and shall at least contain the keys <tt>:architecture</tt> and
|
29
|
+
# <tt>:format</tt>, specifying the target architecture and output
|
30
|
+
# format, respectively.
|
31
|
+
def initialize params = {}
|
32
|
+
@architecture = params[:architecture] || Config.default_architecture
|
33
|
+
@format = params[:format] || Config.default_format
|
34
|
+
@sections = {}
|
35
|
+
@section_aliases = {}
|
36
|
+
# Default section aliases. Subclasses can start from scratch by
|
37
|
+
# doing @section_aliases = {}
|
38
|
+
section_alias :code, ".text"
|
39
|
+
section_alias :functions, :code
|
40
|
+
section_alias :data, ".data"
|
41
|
+
self.section = :code
|
42
|
+
@top_level = Environment.initial_environment
|
43
|
+
@environment = @top_level
|
44
|
+
end
|
45
|
+
|
46
|
+
# Adds code to the given section.
|
47
|
+
#
|
48
|
+
# Examples:
|
49
|
+
# add :code, [:return, 0]
|
50
|
+
# add :data, [:align], [:label, :xyzzy], [:word, 42]
|
51
|
+
#
|
52
|
+
# This method implements the required functionality in terms
|
53
|
+
# of the following methods, which must be implemented by subclasses:
|
54
|
+
#
|
55
|
+
# - #align
|
56
|
+
# - #byte
|
57
|
+
# - #call
|
58
|
+
# - #end_if
|
59
|
+
# - #export
|
60
|
+
# - #begin_function
|
61
|
+
# - #ifelse
|
62
|
+
# - #ifeq
|
63
|
+
# - #ifge
|
64
|
+
# - #ifgt
|
65
|
+
# - #ifle
|
66
|
+
# - #iflt
|
67
|
+
# - #ifne
|
68
|
+
# - #import
|
69
|
+
# - #label
|
70
|
+
# - #let
|
71
|
+
# - #ret
|
72
|
+
# - #set
|
73
|
+
# - #set_byte
|
74
|
+
# - #set_word
|
75
|
+
# - #string
|
76
|
+
# - #word
|
77
|
+
#
|
78
|
+
def add section, *code
|
79
|
+
in_section section do
|
80
|
+
code.each do |action|
|
81
|
+
keyword, args = action[0], action[1..-1]
|
82
|
+
case keyword
|
83
|
+
when :function
|
84
|
+
begin_function *args[0]
|
85
|
+
args[1..-1].each { |action| add section, action }
|
86
|
+
end_function
|
87
|
+
when :ifeq, :ifge, :ifgt, :ifle, :iflt, :ifne
|
88
|
+
truebody = action[2]
|
89
|
+
falsebody = action[3]
|
90
|
+
send keyword, action[1][0], action[1][1]
|
91
|
+
add section, *truebody
|
92
|
+
if falsebody && !falsebody.empty?
|
93
|
+
ifelse
|
94
|
+
add section, *falsebody
|
95
|
+
end
|
96
|
+
end_if
|
97
|
+
when :return
|
98
|
+
send :ret, *args
|
99
|
+
when :'set-word'
|
100
|
+
send :set_word, *args
|
101
|
+
when :'set-byte'
|
102
|
+
send :set_byte, *args
|
103
|
+
when :'tail-call'
|
104
|
+
send :tail_call, *args
|
105
|
+
else
|
106
|
+
send *action
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Add function.
|
113
|
+
#
|
114
|
+
# Parameters:
|
115
|
+
# [formals] an Array of formal parameter names
|
116
|
+
# [code] an Array of actions to be used as the function's body
|
117
|
+
#
|
118
|
+
# Example:
|
119
|
+
# add_function [:n], [:return, :add, :n, 1]
|
120
|
+
def add_function formals, *code
|
121
|
+
add :functions, [:function, formals] + code
|
122
|
+
end
|
123
|
+
|
124
|
+
# Generate a new, unused symbol
|
125
|
+
def gensym
|
126
|
+
Environment.gensym
|
127
|
+
end
|
128
|
+
|
129
|
+
# Add code to the current section
|
130
|
+
def emit code
|
131
|
+
@sections[real_section_name(@section)] << code
|
132
|
+
end
|
133
|
+
|
134
|
+
# Get the real name of a section.
|
135
|
+
# Given a section name which may be an alias, this method returns the
|
136
|
+
# real name of the section.
|
137
|
+
def real_section_name name
|
138
|
+
given_name = name
|
139
|
+
while true
|
140
|
+
x = @section_aliases[name]
|
141
|
+
break if x == nil # Not an alias, exit loop and return name
|
142
|
+
name = x
|
143
|
+
# If name == given_name, we're back where we started. Continuing
|
144
|
+
# would have us loop forever. Just return what we have now.
|
145
|
+
break if name == given_name
|
146
|
+
end
|
147
|
+
name
|
148
|
+
end
|
149
|
+
|
150
|
+
# Set the current section
|
151
|
+
def section= name
|
152
|
+
real_name = real_section_name name
|
153
|
+
@section = name
|
154
|
+
unless @sections.has_key? real_name
|
155
|
+
@sections[real_name] = ''
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def section name = nil
|
160
|
+
self.section = name if name
|
161
|
+
@section
|
162
|
+
end
|
163
|
+
|
164
|
+
# Set up +alias_name+ to refer to the same section as +original_name+.
|
165
|
+
def section_alias alias_name, original_name
|
166
|
+
@section_aliases[alias_name] = original_name
|
167
|
+
end
|
168
|
+
|
169
|
+
def in_section name, &block
|
170
|
+
oldsection = @section
|
171
|
+
self.section = name
|
172
|
+
begin
|
173
|
+
yield
|
174
|
+
ensure
|
175
|
+
self.section = oldsection
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Given an input file name, returns the canonical output file name
|
180
|
+
# for this code generator.
|
181
|
+
def output_file_name input_name
|
182
|
+
input_name.sub(/\.voo$/, '') + '.o'
|
183
|
+
end
|
184
|
+
|
185
|
+
class Environment
|
186
|
+
@@gensym_counter = 0
|
187
|
+
|
188
|
+
attr_reader :args, :locals, :symbols
|
189
|
+
|
190
|
+
def initialize parent = nil
|
191
|
+
## Parent environment
|
192
|
+
@parent = parent
|
193
|
+
## Symbol lookup table
|
194
|
+
@symbols = parent ? parent.symbols.dup : {}
|
195
|
+
## Number of arguments
|
196
|
+
@args = parent ? parent.args : 0
|
197
|
+
## Number of local variables
|
198
|
+
@locals = parent ? parent.locals : 0
|
199
|
+
end
|
200
|
+
|
201
|
+
def add_arg symbol
|
202
|
+
@symbols[symbol] = [:arg, @args]
|
203
|
+
@args = @args + 1
|
204
|
+
end
|
205
|
+
|
206
|
+
def add_args symbols
|
207
|
+
symbols.each { |sym| add_arg sym }
|
208
|
+
end
|
209
|
+
|
210
|
+
def add_local symbol
|
211
|
+
@symbols[symbol] = [:local, @locals]
|
212
|
+
@locals = @locals + 1
|
213
|
+
end
|
214
|
+
|
215
|
+
def add_locals symbols
|
216
|
+
symbols.each { |sym| add_local sym }
|
217
|
+
end
|
218
|
+
|
219
|
+
def gensym
|
220
|
+
Environment.gensym
|
221
|
+
end
|
222
|
+
|
223
|
+
def [] symbol
|
224
|
+
@symbols[symbol]
|
225
|
+
end
|
226
|
+
|
227
|
+
def self.gensym
|
228
|
+
@@gensym_counter = @@gensym_counter + 1
|
229
|
+
"_G#{@@gensym_counter}".to_sym
|
230
|
+
end
|
231
|
+
|
232
|
+
def self.initial_environment
|
233
|
+
Environment.new
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
end
|
238
|
+
end
|