voodoo 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/ast.rb +47 -0
- data/bin/voodooc +146 -0
- data/lib/voodoo.rb +74 -0
- data/lib/voodoo/code_generator.rb +74 -0
- data/lib/voodoo/compiler.rb +45 -0
- data/lib/voodoo/config.rb +43 -0
- data/lib/voodoo/generators/amd64_elf_generator.rb +28 -0
- data/lib/voodoo/generators/amd64_nasm_generator.rb +288 -0
- data/lib/voodoo/generators/command_postprocessor.rb +30 -0
- data/lib/voodoo/generators/common_code_generator.rb +238 -0
- data/lib/voodoo/generators/gas_generator.rb +91 -0
- data/lib/voodoo/generators/generator_api1.rb +95 -0
- data/lib/voodoo/generators/i386_elf_generator.rb +62 -0
- data/lib/voodoo/generators/i386_nasm_generator.rb +177 -0
- data/lib/voodoo/generators/mips_gas_generator.rb +148 -0
- data/lib/voodoo/generators/nasm_elf_generator.rb +55 -0
- data/lib/voodoo/generators/nasm_generator.rb +679 -0
- data/lib/voodoo/parser.rb +283 -0
- metadata +82 -0
@@ -0,0 +1,288 @@
|
|
1
|
+
require 'voodoo/generators/nasm_generator'
|
2
|
+
|
3
|
+
module Voodoo
|
4
|
+
# = AMD64 NASM Code Generator
|
5
|
+
#
|
6
|
+
# Code generator that emits NASM assembly code for AMD64 processors.
|
7
|
+
#
|
8
|
+
# == Calling Convention
|
9
|
+
#
|
10
|
+
# The calling convention implemented by this code generator is
|
11
|
+
# compatible with the System V ABI for AMD64, provided that all
|
12
|
+
# arguments are integers or pointers.
|
13
|
+
#
|
14
|
+
# Arguments are passed in registers. The registers are used in the
|
15
|
+
# following order:
|
16
|
+
#
|
17
|
+
# 1. +rdi+
|
18
|
+
# 2. +rsi+
|
19
|
+
# 3. +rdx+
|
20
|
+
# 4. +rcx+
|
21
|
+
# 5. +r8+
|
22
|
+
# 6. +r9+
|
23
|
+
#
|
24
|
+
# Additional arguments are pushed on the stack, starting with the last
|
25
|
+
# argument and working backwards. These arguments are removed from the
|
26
|
+
# stack by the caller, after the called function returns.
|
27
|
+
#
|
28
|
+
# The return value is passed in +rax+.
|
29
|
+
#
|
30
|
+
# For varargs functions, +rax+ must be set to an upper bound on the
|
31
|
+
# number of vector arguments. Since the code generator does not know
|
32
|
+
# whether the called function is a varargs function, this is always
|
33
|
+
# done. Since the code generator never passes any vector arguments,
|
34
|
+
# this means +rax+ is set to +0+ before each call.
|
35
|
+
#
|
36
|
+
# == Call Frames
|
37
|
+
#
|
38
|
+
# arg_n
|
39
|
+
# :
|
40
|
+
# arg_7
|
41
|
+
# arg_6
|
42
|
+
# saved_rip
|
43
|
+
# saved_rbp <-- rbp
|
44
|
+
# arg_0
|
45
|
+
# arg_1
|
46
|
+
# :
|
47
|
+
# arg_5
|
48
|
+
# local_0
|
49
|
+
# local_1
|
50
|
+
# :
|
51
|
+
# local_n <-- rsp
|
52
|
+
#
|
53
|
+
class AMD64NasmGenerator < NasmGenerator
|
54
|
+
def initialize params = {}
|
55
|
+
# Number of bytes in a word
|
56
|
+
@WORDSIZE = 8
|
57
|
+
# Word name in NASM lingo
|
58
|
+
@WORD_NAME = 'qword'
|
59
|
+
# Default alignment for code
|
60
|
+
@CODE_ALIGNMENT = 0
|
61
|
+
# Default alignment for data
|
62
|
+
@DATA_ALIGNMENT = @WORDSIZE
|
63
|
+
# Default alignment for functions
|
64
|
+
@FUNCTION_ALIGNMENT = 16
|
65
|
+
# Register used for return values
|
66
|
+
@RETURN_REG = 'rax'
|
67
|
+
# Register used as scratch register
|
68
|
+
@SCRATCH_REG = 'r11'
|
69
|
+
# Registers used for argument passing
|
70
|
+
@ARG_REGS = ['rdi', 'rsi', 'rdx', 'rcx', 'r8', 'r9']
|
71
|
+
# Accumulator index
|
72
|
+
@AX = 'rax'
|
73
|
+
# Base index
|
74
|
+
@BX = 'rbx'
|
75
|
+
# Count index
|
76
|
+
@CX = 'rcx'
|
77
|
+
# Data index
|
78
|
+
@DX = 'rdx'
|
79
|
+
super params
|
80
|
+
end
|
81
|
+
|
82
|
+
#
|
83
|
+
# == Data Definition
|
84
|
+
#
|
85
|
+
|
86
|
+
# Define a machine word with the given value.
|
87
|
+
def word value
|
88
|
+
qword value
|
89
|
+
end
|
90
|
+
|
91
|
+
#
|
92
|
+
# == Functions
|
93
|
+
#
|
94
|
+
|
95
|
+
# Call a function.
|
96
|
+
def call func, *args
|
97
|
+
emit "; call #{func} #{args.join ' '}\n"
|
98
|
+
# First couple of arguments go in registers
|
99
|
+
register_args = args[0..number_of_register_arguments] || []
|
100
|
+
# Rest of arguments go on the stack
|
101
|
+
stack_args = args[number_of_register_arguments..-1] || []
|
102
|
+
emit "; register_args: #{register_args.inspect}\n"
|
103
|
+
emit "; stack_args: #{stack_args.inspect}\n"
|
104
|
+
# Push stack arguments
|
105
|
+
stack_args.reverse.each { |arg| push_qword arg }
|
106
|
+
# Load register arguments
|
107
|
+
register_args.each_with_index do |arg,i|
|
108
|
+
register = @ARG_REGS[i]
|
109
|
+
value_ref = load_value arg, register
|
110
|
+
if value_ref != register
|
111
|
+
emit "mov #{register}, #{value_ref}\n"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
# Call function
|
115
|
+
value_ref = load_value func, @SCRATCH_REG
|
116
|
+
emit "xor rax, rax\n"
|
117
|
+
# If value_ref is a symbol, use PLT-relative addressing
|
118
|
+
if global?(func)
|
119
|
+
emit "call #{value_ref} wrt ..plt\n"
|
120
|
+
else
|
121
|
+
emit "call #{value_ref}\n"
|
122
|
+
end
|
123
|
+
# Clean up stack
|
124
|
+
unless stack_args.empty?
|
125
|
+
emit "add rsp, #{stack_args.length * @WORDSIZE}\n"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Emit function prologue.
|
130
|
+
def emit_function_prologue formals = []
|
131
|
+
emit "push rbp\nmov rbp, rsp\n"
|
132
|
+
unless formals.empty?
|
133
|
+
register_args = formals[0...number_of_register_arguments]
|
134
|
+
register_args.each_with_index do |arg,i|
|
135
|
+
emit "push #{@ARG_REGS[i]}\n"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Call a function, re-using the current call frame if possible.
|
141
|
+
def tail_call func, *args
|
142
|
+
emit "; tail-call #{func} #{args.join ' '}\n"
|
143
|
+
# Compute required number of stack words
|
144
|
+
nstackargs = number_of_stack_arguments args.length
|
145
|
+
# If we need more stack arguments than we have now,
|
146
|
+
# perform a normal call and return
|
147
|
+
if nstackargs > number_of_stack_arguments(@environment.args)
|
148
|
+
emit "; Not enough space for proper tail call; using regular call\n"
|
149
|
+
ret :call, func, *args
|
150
|
+
end
|
151
|
+
|
152
|
+
# If any arguments are going to be overwritten before they are
|
153
|
+
# used, save them to new local variables and use those instead.
|
154
|
+
i = args.length - 1
|
155
|
+
while i >= -1
|
156
|
+
arg = (i >= 0) ? args[i] : func
|
157
|
+
|
158
|
+
if symbol?(arg)
|
159
|
+
x = @environment[arg]
|
160
|
+
if x && x[0] == :arg && x[1] < args.length && x[1] > i &&
|
161
|
+
(i >= 0 || func != args[x[1]])
|
162
|
+
# Save value
|
163
|
+
newsym = @environment.gensym
|
164
|
+
let newsym, arg
|
165
|
+
# Change reference
|
166
|
+
if i >= 0
|
167
|
+
args[i] = newsym
|
168
|
+
else
|
169
|
+
func = newsym
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
i = i - 1
|
174
|
+
end
|
175
|
+
|
176
|
+
# Set stack arguments
|
177
|
+
if args.length > number_of_register_arguments
|
178
|
+
(args.length - 1 .. number_of_register_arguments).each do |i|
|
179
|
+
arg = args[i]
|
180
|
+
|
181
|
+
value_ref = load_value arg, @SCRATCH_REG
|
182
|
+
newarg_ref = load_arg i
|
183
|
+
# Elide code if source is same as destination
|
184
|
+
unless value_ref == newarg_ref
|
185
|
+
emit "mov #{@WORD_NAME} #{newarg_ref}, #{value_ref}\n"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Set register arguments
|
191
|
+
if args.length > 0
|
192
|
+
number_of_register_arguments(args.length).times do |i|
|
193
|
+
register = @ARG_REGS[i]
|
194
|
+
load_value_into_register args[i], register
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# Tail call
|
199
|
+
func_ref = load_value func, @BX
|
200
|
+
emit "leave\n"
|
201
|
+
set_register @AX, 0
|
202
|
+
# If func_ref is a symbol, use PLT-relative addressing
|
203
|
+
if global?(func)
|
204
|
+
emit "jmp #{func_ref} wrt ..plt\n"
|
205
|
+
else
|
206
|
+
emit "jmp #{func_ref}\n"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
#
|
211
|
+
# == Loading Values
|
212
|
+
#
|
213
|
+
|
214
|
+
# Load the value of the nth argument
|
215
|
+
def load_arg n, reg = @SCRATCH_REG
|
216
|
+
if register_argument?(n)
|
217
|
+
# Arguments that were originally passed in a register
|
218
|
+
# are now below rbp
|
219
|
+
"[rbp - #{(n + 1) * @WORDSIZE}]"
|
220
|
+
else
|
221
|
+
# Arguments that were originally passed on the stack
|
222
|
+
# are now above rbp
|
223
|
+
"[rbp + #{(n + 1 - number_of_register_arguments) * @WORDSIZE}]"
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
# Load the value of the nth local variable
|
228
|
+
def load_local n, reg = @SCRATCH_REG
|
229
|
+
# If there current function has any arguments,
|
230
|
+
# local variables are offset by
|
231
|
+
# number_of_register_arguments(number_of_arguments)
|
232
|
+
# words.
|
233
|
+
offset = number_of_register_arguments(@environment.args) * @WORDSIZE
|
234
|
+
"[rbp - #{offset + (n + 1) * @WORDSIZE}]"
|
235
|
+
end
|
236
|
+
|
237
|
+
#
|
238
|
+
# == Variables
|
239
|
+
#
|
240
|
+
|
241
|
+
# Introduce a new local variable
|
242
|
+
def let symbol, *words
|
243
|
+
emit "; let #{symbol} #{words.join ' '}\n"
|
244
|
+
@environment.add_local symbol
|
245
|
+
eval_expr words, @RETURN_REG
|
246
|
+
emit "push #{@RETURN_REG}\n"
|
247
|
+
end
|
248
|
+
|
249
|
+
#
|
250
|
+
# == Miscellaneous
|
251
|
+
#
|
252
|
+
|
253
|
+
# Load a value and push it on the stack.
|
254
|
+
def push_qword value
|
255
|
+
value_ref = load_value value, @SCRATCH_REG
|
256
|
+
emit "push qword #{value_ref}\n"
|
257
|
+
end
|
258
|
+
|
259
|
+
# Calculate the number of register arguments,
|
260
|
+
# given the total number of arguments.
|
261
|
+
# If _n_ is +nil+, returns the maximum number of
|
262
|
+
# register arguments.
|
263
|
+
def number_of_register_arguments n = nil
|
264
|
+
if n.nil?
|
265
|
+
@ARG_REGS.length
|
266
|
+
else
|
267
|
+
[@ARG_REGS.length, n].min
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
# Calculate the number of stack arguments,
|
272
|
+
# given the total number of arguments.
|
273
|
+
def number_of_stack_arguments n
|
274
|
+
[0, n - number_of_register_arguments].max
|
275
|
+
end
|
276
|
+
|
277
|
+
# Tests if the nth argument is a register argument.
|
278
|
+
def register_argument? n
|
279
|
+
n < number_of_register_arguments
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
# Register class
|
285
|
+
Voodoo::CodeGenerator.register_generator AMD64NasmGenerator,
|
286
|
+
:architecture => :amd64,
|
287
|
+
:format => :nasm
|
288
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'delegate'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module Voodoo
|
5
|
+
# Utility functions for classes that use commands to postprocess
|
6
|
+
# generator output.
|
7
|
+
module CommandPostProcessor
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# Encodes a string so that it is safe for use as a shell argument
|
11
|
+
def shell_encode string
|
12
|
+
'"' + string.gsub(/([\\`"$\n])/, "\\\\\\1") + '"'
|
13
|
+
end
|
14
|
+
|
15
|
+
# Creates a temporary file and returns its name
|
16
|
+
def tempfile extension, base = nil
|
17
|
+
base = self.class.name unless base
|
18
|
+
file = Tempfile.open(basename + extension)
|
19
|
+
name = file.path
|
20
|
+
file.close
|
21
|
+
name
|
22
|
+
end
|
23
|
+
|
24
|
+
# Writes the contents of the named file to an IO handle
|
25
|
+
def write_file_to_io filename, io
|
26
|
+
File.open(filename) { |file| io.write(file.read) }
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,238 @@
|
|
1
|
+
require 'voodoo/generators/generator_api1'
|
2
|
+
|
3
|
+
module Voodoo
|
4
|
+
# Common base class for code generators.
|
5
|
+
#
|
6
|
+
# Code generators are expected to implement the following methods:
|
7
|
+
#
|
8
|
+
# - #new
|
9
|
+
# - #add
|
10
|
+
# - #add_function
|
11
|
+
# - #gensym
|
12
|
+
# - #output_file_name
|
13
|
+
# - #wordsize
|
14
|
+
# - #write
|
15
|
+
#
|
16
|
+
# This class contains base implementations of some of these methods,
|
17
|
+
# which can be used and/or overridden by subclasses.
|
18
|
+
#
|
19
|
+
# An example of how to use the code generators provided by this module
|
20
|
+
# is provided on the main page of the documentation of the Voodoo module.
|
21
|
+
#
|
22
|
+
class CommonCodeGenerator
|
23
|
+
# Provide compatibility with old API
|
24
|
+
include GeneratorApi1
|
25
|
+
|
26
|
+
# Initializes the code generator.
|
27
|
+
# _params_ shall be a hash containing parameters to the code generator,
|
28
|
+
# and shall at least contain the keys <tt>:architecture</tt> and
|
29
|
+
# <tt>:format</tt>, specifying the target architecture and output
|
30
|
+
# format, respectively.
|
31
|
+
def initialize params = {}
|
32
|
+
@architecture = params[:architecture] || Config.default_architecture
|
33
|
+
@format = params[:format] || Config.default_format
|
34
|
+
@sections = {}
|
35
|
+
@section_aliases = {}
|
36
|
+
# Default section aliases. Subclasses can start from scratch by
|
37
|
+
# doing @section_aliases = {}
|
38
|
+
section_alias :code, ".text"
|
39
|
+
section_alias :functions, :code
|
40
|
+
section_alias :data, ".data"
|
41
|
+
self.section = :code
|
42
|
+
@top_level = Environment.initial_environment
|
43
|
+
@environment = @top_level
|
44
|
+
end
|
45
|
+
|
46
|
+
# Adds code to the given section.
|
47
|
+
#
|
48
|
+
# Examples:
|
49
|
+
# add :code, [:return, 0]
|
50
|
+
# add :data, [:align], [:label, :xyzzy], [:word, 42]
|
51
|
+
#
|
52
|
+
# This method implements the required functionality in terms
|
53
|
+
# of the following methods, which must be implemented by subclasses:
|
54
|
+
#
|
55
|
+
# - #align
|
56
|
+
# - #byte
|
57
|
+
# - #call
|
58
|
+
# - #end_if
|
59
|
+
# - #export
|
60
|
+
# - #begin_function
|
61
|
+
# - #ifelse
|
62
|
+
# - #ifeq
|
63
|
+
# - #ifge
|
64
|
+
# - #ifgt
|
65
|
+
# - #ifle
|
66
|
+
# - #iflt
|
67
|
+
# - #ifne
|
68
|
+
# - #import
|
69
|
+
# - #label
|
70
|
+
# - #let
|
71
|
+
# - #ret
|
72
|
+
# - #set
|
73
|
+
# - #set_byte
|
74
|
+
# - #set_word
|
75
|
+
# - #string
|
76
|
+
# - #word
|
77
|
+
#
|
78
|
+
def add section, *code
|
79
|
+
in_section section do
|
80
|
+
code.each do |action|
|
81
|
+
keyword, args = action[0], action[1..-1]
|
82
|
+
case keyword
|
83
|
+
when :function
|
84
|
+
begin_function *args[0]
|
85
|
+
args[1..-1].each { |action| add section, action }
|
86
|
+
end_function
|
87
|
+
when :ifeq, :ifge, :ifgt, :ifle, :iflt, :ifne
|
88
|
+
truebody = action[2]
|
89
|
+
falsebody = action[3]
|
90
|
+
send keyword, action[1][0], action[1][1]
|
91
|
+
add section, *truebody
|
92
|
+
if falsebody && !falsebody.empty?
|
93
|
+
ifelse
|
94
|
+
add section, *falsebody
|
95
|
+
end
|
96
|
+
end_if
|
97
|
+
when :return
|
98
|
+
send :ret, *args
|
99
|
+
when :'set-word'
|
100
|
+
send :set_word, *args
|
101
|
+
when :'set-byte'
|
102
|
+
send :set_byte, *args
|
103
|
+
when :'tail-call'
|
104
|
+
send :tail_call, *args
|
105
|
+
else
|
106
|
+
send *action
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Add function.
|
113
|
+
#
|
114
|
+
# Parameters:
|
115
|
+
# [formals] an Array of formal parameter names
|
116
|
+
# [code] an Array of actions to be used as the function's body
|
117
|
+
#
|
118
|
+
# Example:
|
119
|
+
# add_function [:n], [:return, :add, :n, 1]
|
120
|
+
def add_function formals, *code
|
121
|
+
add :functions, [:function, formals] + code
|
122
|
+
end
|
123
|
+
|
124
|
+
# Generate a new, unused symbol
|
125
|
+
def gensym
|
126
|
+
Environment.gensym
|
127
|
+
end
|
128
|
+
|
129
|
+
# Add code to the current section
|
130
|
+
def emit code
|
131
|
+
@sections[real_section_name(@section)] << code
|
132
|
+
end
|
133
|
+
|
134
|
+
# Get the real name of a section.
|
135
|
+
# Given a section name which may be an alias, this method returns the
|
136
|
+
# real name of the section.
|
137
|
+
def real_section_name name
|
138
|
+
given_name = name
|
139
|
+
while true
|
140
|
+
x = @section_aliases[name]
|
141
|
+
break if x == nil # Not an alias, exit loop and return name
|
142
|
+
name = x
|
143
|
+
# If name == given_name, we're back where we started. Continuing
|
144
|
+
# would have us loop forever. Just return what we have now.
|
145
|
+
break if name == given_name
|
146
|
+
end
|
147
|
+
name
|
148
|
+
end
|
149
|
+
|
150
|
+
# Set the current section
|
151
|
+
def section= name
|
152
|
+
real_name = real_section_name name
|
153
|
+
@section = name
|
154
|
+
unless @sections.has_key? real_name
|
155
|
+
@sections[real_name] = ''
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def section name = nil
|
160
|
+
self.section = name if name
|
161
|
+
@section
|
162
|
+
end
|
163
|
+
|
164
|
+
# Set up +alias_name+ to refer to the same section as +original_name+.
|
165
|
+
def section_alias alias_name, original_name
|
166
|
+
@section_aliases[alias_name] = original_name
|
167
|
+
end
|
168
|
+
|
169
|
+
def in_section name, &block
|
170
|
+
oldsection = @section
|
171
|
+
self.section = name
|
172
|
+
begin
|
173
|
+
yield
|
174
|
+
ensure
|
175
|
+
self.section = oldsection
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Given an input file name, returns the canonical output file name
|
180
|
+
# for this code generator.
|
181
|
+
def output_file_name input_name
|
182
|
+
input_name.sub(/\.voo$/, '') + '.o'
|
183
|
+
end
|
184
|
+
|
185
|
+
class Environment
|
186
|
+
@@gensym_counter = 0
|
187
|
+
|
188
|
+
attr_reader :args, :locals, :symbols
|
189
|
+
|
190
|
+
def initialize parent = nil
|
191
|
+
## Parent environment
|
192
|
+
@parent = parent
|
193
|
+
## Symbol lookup table
|
194
|
+
@symbols = parent ? parent.symbols.dup : {}
|
195
|
+
## Number of arguments
|
196
|
+
@args = parent ? parent.args : 0
|
197
|
+
## Number of local variables
|
198
|
+
@locals = parent ? parent.locals : 0
|
199
|
+
end
|
200
|
+
|
201
|
+
def add_arg symbol
|
202
|
+
@symbols[symbol] = [:arg, @args]
|
203
|
+
@args = @args + 1
|
204
|
+
end
|
205
|
+
|
206
|
+
def add_args symbols
|
207
|
+
symbols.each { |sym| add_arg sym }
|
208
|
+
end
|
209
|
+
|
210
|
+
def add_local symbol
|
211
|
+
@symbols[symbol] = [:local, @locals]
|
212
|
+
@locals = @locals + 1
|
213
|
+
end
|
214
|
+
|
215
|
+
def add_locals symbols
|
216
|
+
symbols.each { |sym| add_local sym }
|
217
|
+
end
|
218
|
+
|
219
|
+
def gensym
|
220
|
+
Environment.gensym
|
221
|
+
end
|
222
|
+
|
223
|
+
def [] symbol
|
224
|
+
@symbols[symbol]
|
225
|
+
end
|
226
|
+
|
227
|
+
def self.gensym
|
228
|
+
@@gensym_counter = @@gensym_counter + 1
|
229
|
+
"_G#{@@gensym_counter}".to_sym
|
230
|
+
end
|
231
|
+
|
232
|
+
def self.initial_environment
|
233
|
+
Environment.new
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
end
|
238
|
+
end
|