rucc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +55 -0
- data/.rspec +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +46 -0
- data/LICENCE +21 -0
- data/README.md +82 -0
- data/Rakefile +2 -0
- data/Vagrantfile +10 -0
- data/bin/console +10 -0
- data/bin/rspec +2 -0
- data/bin/setup +8 -0
- data/exe/rucc +7 -0
- data/include/8cc.h +48 -0
- data/include/float.h +44 -0
- data/include/iso646.h +20 -0
- data/include/rucc.h +2 -0
- data/include/stdalign.h +11 -0
- data/include/stdarg.h +52 -0
- data/include/stdbool.h +11 -0
- data/include/stddef.h +15 -0
- data/include/stdnoreturn.h +8 -0
- data/lib/rucc.rb +8 -0
- data/lib/rucc/case.rb +22 -0
- data/lib/rucc/decl.rb +9 -0
- data/lib/rucc/enc.rb +9 -0
- data/lib/rucc/engine.rb +138 -0
- data/lib/rucc/file_io.rb +108 -0
- data/lib/rucc/file_io_list.rb +56 -0
- data/lib/rucc/gen.rb +1602 -0
- data/lib/rucc/int_evaluator.rb +114 -0
- data/lib/rucc/k.rb +73 -0
- data/lib/rucc/keyword.rb +17 -0
- data/lib/rucc/kind.rb +43 -0
- data/lib/rucc/label_gen.rb +13 -0
- data/lib/rucc/lexer.rb +40 -0
- data/lib/rucc/lexer/impl.rb +683 -0
- data/lib/rucc/lexer/preprocessor.rb +888 -0
- data/lib/rucc/lexer/preprocessor/cond_incl.rb +27 -0
- data/lib/rucc/lexer/preprocessor/constructor.rb +54 -0
- data/lib/rucc/lexer/preprocessor/pragma.rb +31 -0
- data/lib/rucc/lexer/preprocessor/special_macro.rb +110 -0
- data/lib/rucc/libc.rb +47 -0
- data/lib/rucc/m.rb +7 -0
- data/lib/rucc/macro.rb +24 -0
- data/lib/rucc/node.rb +530 -0
- data/lib/rucc/node/conv.rb +33 -0
- data/lib/rucc/op.rb +61 -0
- data/lib/rucc/operator.rb +13 -0
- data/lib/rucc/option.rb +30 -0
- data/lib/rucc/parser.rb +961 -0
- data/lib/rucc/parser/break.rb +18 -0
- data/lib/rucc/parser/builtin.rb +25 -0
- data/lib/rucc/parser/continue.rb +18 -0
- data/lib/rucc/parser/do.rb +33 -0
- data/lib/rucc/parser/ensure.rb +39 -0
- data/lib/rucc/parser/enum.rb +64 -0
- data/lib/rucc/parser/expr.rb +493 -0
- data/lib/rucc/parser/for.rb +71 -0
- data/lib/rucc/parser/func.rb +274 -0
- data/lib/rucc/parser/func_call.rb +54 -0
- data/lib/rucc/parser/goto.rb +29 -0
- data/lib/rucc/parser/if.rb +23 -0
- data/lib/rucc/parser/initializer.rb +237 -0
- data/lib/rucc/parser/label.rb +31 -0
- data/lib/rucc/parser/return.rb +16 -0
- data/lib/rucc/parser/struct_and_union.rb +280 -0
- data/lib/rucc/parser/switch.rb +117 -0
- data/lib/rucc/parser/while.rb +29 -0
- data/lib/rucc/pos.rb +11 -0
- data/lib/rucc/rmap.rb +22 -0
- data/lib/rucc/s.rb +9 -0
- data/lib/rucc/static_label_gen.rb +15 -0
- data/lib/rucc/t.rb +18 -0
- data/lib/rucc/tempname_gen.rb +14 -0
- data/lib/rucc/token.rb +114 -0
- data/lib/rucc/token_gen.rb +68 -0
- data/lib/rucc/type.rb +304 -0
- data/lib/rucc/type/check.rb +39 -0
- data/lib/rucc/type/conv.rb +29 -0
- data/lib/rucc/type_info.rb +21 -0
- data/lib/rucc/utf.rb +126 -0
- data/lib/rucc/util.rb +111 -0
- data/lib/rucc/version.rb +3 -0
- data/rucc.gemspec +38 -0
- metadata +201 -0
@@ -0,0 +1,114 @@
|
|
1
|
+
module Rucc
|
2
|
+
class IntEvaluator
|
3
|
+
class << self
|
4
|
+
# @param [Node] node
|
5
|
+
# @return [<Integer, (Node, NilClass)>]
|
6
|
+
def eval(node)
|
7
|
+
i, addr = do_eval(node)
|
8
|
+
|
9
|
+
case i
|
10
|
+
when TrueClass
|
11
|
+
r = 1
|
12
|
+
when FalseClass
|
13
|
+
r = 0
|
14
|
+
when Integer
|
15
|
+
r = i
|
16
|
+
else
|
17
|
+
raise "Integer expression expected, but got #{node}"
|
18
|
+
end
|
19
|
+
|
20
|
+
return r, addr
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# @param [Node] node
|
26
|
+
# @return [<Integer, (Node, NilClass)>]
|
27
|
+
def do_eval(node)
|
28
|
+
case node.kind
|
29
|
+
when AST::LITERAL
|
30
|
+
if Type.is_inttype(node.ty)
|
31
|
+
return [node.ival, nil]
|
32
|
+
end
|
33
|
+
raise_error(node)
|
34
|
+
when '!'
|
35
|
+
i, addr = self.eval(node.operand)
|
36
|
+
r = (i == 0) ? 1 : 0
|
37
|
+
return [r, addr]
|
38
|
+
when '~'
|
39
|
+
r, addr = self.eval(node.operand)
|
40
|
+
return [~r, addr]
|
41
|
+
when OP::CAST then return self.eval(node.operand)
|
42
|
+
when AST::CONV then return self.eval(node.operand)
|
43
|
+
when AST::ADDR
|
44
|
+
if node.operand.kind == AST::STRUCT_REF
|
45
|
+
return eval_struct_ref(node.operand, 0)
|
46
|
+
end
|
47
|
+
return 0, Node.conv(node)
|
48
|
+
when AST::GVAR
|
49
|
+
return 0, Node.conv(node)
|
50
|
+
when AST::DEREF
|
51
|
+
if node.operand.ty.kind == Kind::PTR
|
52
|
+
return self.eval(node.operand)
|
53
|
+
end
|
54
|
+
raise_error!(node)
|
55
|
+
when AST::TERNARY
|
56
|
+
cond, addr = self.eval(node.cond)
|
57
|
+
if cond
|
58
|
+
return node.thn ? self.eval(node.thn) : [cond, addr]
|
59
|
+
end
|
60
|
+
return self.eval(node.els)
|
61
|
+
when '+' then return eval_binary_expr(node, &:+)
|
62
|
+
when '-' then return eval_binary_expr(node, &:-)
|
63
|
+
when '*' then return eval_binary_expr(node, &:*)
|
64
|
+
when '/' then return eval_binary_expr(node, &:/)
|
65
|
+
when '<' then return eval_binary_expr(node, &:<)
|
66
|
+
when '^' then return eval_binary_expr(node, &:^)
|
67
|
+
when '&' then return eval_binary_expr(node, &:&)
|
68
|
+
when '|' then return eval_binary_expr(node, &:|)
|
69
|
+
when '%' then return eval_binary_expr(node, &:%)
|
70
|
+
when OP::EQ then return eval_binary_expr(node, &:==)
|
71
|
+
when OP::GE then return eval_binary_expr(node, &:>=)
|
72
|
+
when OP::LE then return eval_binary_expr(node, &:<=)
|
73
|
+
when OP::NE then return eval_binary_expr(node, &:!=)
|
74
|
+
when OP::SAL then return eval_binary_expr(node, &:<<)
|
75
|
+
when OP::SAR then return eval_binary_expr(node, &:>>)
|
76
|
+
when OP::SHR then return eval_binary_expr(node, &:>>)
|
77
|
+
when OP::LOGAND then return eval_binary_expr(node) { |a, b| (a != 0) && (b != 0)} # [Integer] a, [Integer] b
|
78
|
+
when OP::LOGOR then return eval_binary_expr(node) { |a, b| (a != 0) || (b != 0)} # [Integer] a, [Integer] b
|
79
|
+
else
|
80
|
+
raise_error(node)
|
81
|
+
# error("Integer expression expected, but got %s", node2s(node));
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
# @param [Node] node
|
88
|
+
def raise_error(node)
|
89
|
+
raise "Integer expression expected, but got #{node}"
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param [Node] node
|
93
|
+
# @return [<Integer, (Node, NilClass)>]
|
94
|
+
def eval_binary_expr(node, &block)
|
95
|
+
left, addr_l = self.eval(node.left)
|
96
|
+
right, addr_r = self.eval(node.right)
|
97
|
+
addr = addr_r || addr_l
|
98
|
+
r = yield(left, right)
|
99
|
+
return r, addr
|
100
|
+
end
|
101
|
+
|
102
|
+
# @param [Node] node
|
103
|
+
# @param [Integer] offset
|
104
|
+
# @return [<Integer, (Node, NilClass)>]
|
105
|
+
def eval_struct_ref(node, offset)
|
106
|
+
if node.kind == AST::STRUCT_REF
|
107
|
+
return eval_struct_ref(node.struct, node.ty.offset + offset)
|
108
|
+
end
|
109
|
+
n, addr = self.eval(node)
|
110
|
+
return n + offset, addr
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
data/lib/rucc/k.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
require "rucc/keyword"
|
2
|
+
|
3
|
+
module Rucc
|
4
|
+
module K
|
5
|
+
# Container of keyword
|
6
|
+
# @key [String]
|
7
|
+
# @value [Keyword]
|
8
|
+
@keywords = {}
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def keywords
|
12
|
+
@keywords
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
# @param [Symbol] name keyword name
|
18
|
+
# @param [String] str String representation
|
19
|
+
# @param [Boolean] is_type
|
20
|
+
def keyword(name, str, is_type)
|
21
|
+
k = Keyword.new(str, is_type)
|
22
|
+
const_set(name, k)
|
23
|
+
@keywords[str] = k
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
keyword :ALIGNAS, "_Alignas", true
|
28
|
+
keyword :ALIGNOF, "_Alignof", false
|
29
|
+
keyword :AUTO, "auto", true
|
30
|
+
keyword :BOOL, "_Bool", true
|
31
|
+
keyword :BREAK, "break", false
|
32
|
+
keyword :CASE, "case", false
|
33
|
+
keyword :CHAR, "char", true
|
34
|
+
keyword :COMPLEX, "_Complex", true
|
35
|
+
keyword :CONST, "const", true
|
36
|
+
keyword :CONTINUE, "continue", false
|
37
|
+
keyword :DEFAULT, "default", false
|
38
|
+
keyword :DO, "do", false
|
39
|
+
keyword :DOUBLE, "double", true
|
40
|
+
keyword :ELSE, "else", false
|
41
|
+
keyword :ENUM, "enum", true
|
42
|
+
keyword :EXTERN, "extern", true
|
43
|
+
keyword :FLOAT, "float", true
|
44
|
+
keyword :FOR, "for", false
|
45
|
+
keyword :GENERIC, "_Generic", false
|
46
|
+
keyword :GOTO, "goto", false
|
47
|
+
keyword :IF, "if", false
|
48
|
+
keyword :IMAGINARY, "_Imaginary", true
|
49
|
+
keyword :INLINE, "inline", true
|
50
|
+
keyword :INT, "int", true
|
51
|
+
keyword :LONG, "long", true
|
52
|
+
keyword :NORETURN, "_Noreturn", true
|
53
|
+
keyword :REGISTER, "register", true
|
54
|
+
keyword :RESTRICT, "restrict", true
|
55
|
+
keyword :RETURN, "return", false
|
56
|
+
keyword :HASHHASH, "##", false
|
57
|
+
keyword :SHORT, "short", true
|
58
|
+
keyword :SIGNED, "signed", true
|
59
|
+
keyword :SIZEOF, "sizeof", false
|
60
|
+
keyword :STATIC, "static", true
|
61
|
+
keyword :STATIC_ASSERT, "_Static_assert", false
|
62
|
+
keyword :STRUCT, "struct", true
|
63
|
+
keyword :SWITCH, "switch", false
|
64
|
+
keyword :ELLIPSIS, "...", false
|
65
|
+
keyword :TYPEDEF, "typedef", true
|
66
|
+
keyword :TYPEOF, "typeof", true
|
67
|
+
keyword :UNION, "union", true
|
68
|
+
keyword :UNSIGNED, "unsigned", true
|
69
|
+
keyword :VOID, "void", true
|
70
|
+
keyword :VOLATILE, "volatile", true
|
71
|
+
keyword :WHILE, "while", false
|
72
|
+
end
|
73
|
+
end
|
data/lib/rucc/keyword.rb
ADDED
data/lib/rucc/kind.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
module Rucc
|
2
|
+
class Kind
|
3
|
+
include Comparable
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def def_kind(name, id)
|
7
|
+
const_set(name, self.new(id, name))
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(id, name)
|
12
|
+
@id = id # Used only for <=>
|
13
|
+
@name = name
|
14
|
+
end
|
15
|
+
attr_reader :id
|
16
|
+
|
17
|
+
def <=>(other)
|
18
|
+
@id <=> other.id
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_s
|
22
|
+
@name.to_s
|
23
|
+
end
|
24
|
+
|
25
|
+
def_kind :VOID, 0
|
26
|
+
def_kind :BOOL, 1
|
27
|
+
def_kind :CHAR, 2
|
28
|
+
def_kind :SHORT, 3
|
29
|
+
def_kind :INT, 4
|
30
|
+
def_kind :LONG, 5
|
31
|
+
def_kind :LLONG, 6
|
32
|
+
def_kind :FLOAT, 7
|
33
|
+
def_kind :DOUBLE, 8
|
34
|
+
def_kind :LDOUBLE, 9
|
35
|
+
def_kind :ARRAY, 10
|
36
|
+
def_kind :ENUM, 11
|
37
|
+
def_kind :PTR, 12
|
38
|
+
def_kind :STRUCT, 13
|
39
|
+
def_kind :FUNC, 14
|
40
|
+
# used only in parser
|
41
|
+
def_kind :STUB, 15
|
42
|
+
end
|
43
|
+
end
|
data/lib/rucc/lexer.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require "rucc/libc"
|
2
|
+
require "rucc/token"
|
3
|
+
require "rucc/util"
|
4
|
+
|
5
|
+
require "rucc/lexer/impl"
|
6
|
+
require "rucc/lexer/preprocessor"
|
7
|
+
|
8
|
+
require "rucc/file_io"
|
9
|
+
require "rucc/file_io_list"
|
10
|
+
|
11
|
+
module Rucc
|
12
|
+
class Lexer
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
# TODO(south37) Impl file management as input
|
16
|
+
# @param [IO] input buffered io of C sourcecode.
|
17
|
+
# @param [String] filename
|
18
|
+
def initialize(input, filename)
|
19
|
+
@files = FileIOList.new(FileIO.new(input, filename))
|
20
|
+
@impl = Impl.new(@files)
|
21
|
+
@preprocessor = Preprocessor.new(@impl)
|
22
|
+
end
|
23
|
+
|
24
|
+
delegate [
|
25
|
+
:read_token,
|
26
|
+
:unget_token,
|
27
|
+
:peek_token,
|
28
|
+
:append_include_path,
|
29
|
+
:expr_reader=,
|
30
|
+
] => :@preprocessor
|
31
|
+
|
32
|
+
delegate [
|
33
|
+
:stream_stash,
|
34
|
+
:stream_unstash,
|
35
|
+
] => :@files
|
36
|
+
|
37
|
+
# NOTE: Used only for debug
|
38
|
+
delegate [:lex] => :@impl
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,683 @@
|
|
1
|
+
require "forwardable"
|
2
|
+
|
3
|
+
require "rucc/file_io"
|
4
|
+
require "rucc/token_gen"
|
5
|
+
require "rucc/pos"
|
6
|
+
|
7
|
+
module Rucc
|
8
|
+
class Lexer
|
9
|
+
class Impl
|
10
|
+
extend Forwardable
|
11
|
+
|
12
|
+
# @param [FileIOList] files
|
13
|
+
def initialize(files)
|
14
|
+
@infile = files.first
|
15
|
+
@files = files
|
16
|
+
|
17
|
+
@buffers = [[]] # stack buffers to impl peek.
|
18
|
+
@token_gen = TokenGen.new(@files)
|
19
|
+
end
|
20
|
+
attr_reader :infile
|
21
|
+
delegate [:stream_depth] => :@files
|
22
|
+
|
23
|
+
# @param [Token] tok
|
24
|
+
def unget_token(tok)
|
25
|
+
return if tok.kind == T::EOF # Does not unget
|
26
|
+
buf = @buffers.last
|
27
|
+
buf.push(tok)
|
28
|
+
end
|
29
|
+
|
30
|
+
# @param [<Token>] tokens
|
31
|
+
def unget_all(tokens)
|
32
|
+
tokens.reverse.each { |token| unget_token(token) }
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Token]
|
36
|
+
def lex
|
37
|
+
buf = @buffers.last
|
38
|
+
if buf.size > 0
|
39
|
+
return buf.pop
|
40
|
+
end
|
41
|
+
|
42
|
+
if @buffers.size > 1
|
43
|
+
return Token::EOF_TOKEN
|
44
|
+
end
|
45
|
+
|
46
|
+
bol = (current_file.column == 1)
|
47
|
+
tok = do_read_token
|
48
|
+
while tok.kind == T::SPACE
|
49
|
+
tok = do_read_token
|
50
|
+
tok.space = true
|
51
|
+
end
|
52
|
+
tok.bol = bol
|
53
|
+
|
54
|
+
# NOTE: only for debug
|
55
|
+
# if tok.kind == T::NEWLINE
|
56
|
+
# print "\n"
|
57
|
+
# else
|
58
|
+
# print " " if tok.space
|
59
|
+
# print tok
|
60
|
+
# # print current_file.name
|
61
|
+
# end
|
62
|
+
|
63
|
+
tok
|
64
|
+
end
|
65
|
+
|
66
|
+
# Reads a token from a given string.
|
67
|
+
# This function temporarily switches the main input stream to
|
68
|
+
# a given string and reads one token.
|
69
|
+
#
|
70
|
+
# @param [String] s
|
71
|
+
# @return [Token
|
72
|
+
def lex_string(s)
|
73
|
+
@files.stream_stash([FileIO.new(StringIO.new(s), "-")])
|
74
|
+
r = do_read_token
|
75
|
+
next?("\n")
|
76
|
+
p = get_pos(0)
|
77
|
+
if peek != nil # EOF
|
78
|
+
raise "#{p}: unconsumed input: #{s}"
|
79
|
+
# errorp(p, "unconsumed input: %s", s)
|
80
|
+
end
|
81
|
+
@files.stream_unstash
|
82
|
+
r
|
83
|
+
end
|
84
|
+
|
85
|
+
# Reads a header file name for #include.
|
86
|
+
#
|
87
|
+
# Filenames after #include need a special tokenization treatment.
|
88
|
+
# A filename string may be quoted by < and > instead of "".
|
89
|
+
# Even if it's quoted by "", it's still different from a regular string token.
|
90
|
+
# For example, \ in this context is not interpreted as a quote.
|
91
|
+
# Thus, we cannot use lex() to read a filename.
|
92
|
+
#
|
93
|
+
# That the C preprocessor requires a special lexer behavior only for
|
94
|
+
# #include is a violation of layering. Ideally, the lexer should be
|
95
|
+
# agnostic about higher layers status. But we need this for the C grammar.
|
96
|
+
#
|
97
|
+
# @return [<String, Boolean>, <NilClass, NilClass>]
|
98
|
+
def read_header_file_name
|
99
|
+
std = nil
|
100
|
+
if !buffer_empty?
|
101
|
+
return nil, std
|
102
|
+
end
|
103
|
+
|
104
|
+
skip_space!
|
105
|
+
p = get_pos(0)
|
106
|
+
if next?('"')
|
107
|
+
std = false
|
108
|
+
close = '"'
|
109
|
+
elsif next?('<')
|
110
|
+
std = true
|
111
|
+
close = '>'
|
112
|
+
else
|
113
|
+
return nil, std
|
114
|
+
end
|
115
|
+
b = ""
|
116
|
+
while !next?(close)
|
117
|
+
c = readc
|
118
|
+
if c.nil? || c == '\n'
|
119
|
+
raise "#{p}: premature end of header name"
|
120
|
+
# errorp(p, "premature end of header name");
|
121
|
+
end
|
122
|
+
b << c
|
123
|
+
end
|
124
|
+
if b.size == 0
|
125
|
+
raise "#{p}: header name should not be empty"
|
126
|
+
# errorp(p, "header name should not be empty");
|
127
|
+
end
|
128
|
+
|
129
|
+
return b, std
|
130
|
+
end
|
131
|
+
|
132
|
+
# @param [FileIO]
|
133
|
+
def push_file(file)
|
134
|
+
@files.push(file)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Temporarily switches the input token stream to given list of tokens,
|
138
|
+
# so that you can get the tokens as return values of lex() again.
|
139
|
+
# After the tokens are exhausted, EOF is returned from lex() until
|
140
|
+
# "unstash" is called to restore the original state.
|
141
|
+
#
|
142
|
+
# @param [<Token>] buf
|
143
|
+
def token_buffer_stash(buf)
|
144
|
+
@buffers.push(buf)
|
145
|
+
end
|
146
|
+
|
147
|
+
def token_buffer_unstash
|
148
|
+
@buffers.pop
|
149
|
+
end
|
150
|
+
|
151
|
+
# Skips a block of code excluded from input by #if, #ifdef and the like.
|
152
|
+
# C11 6.10 says that code within #if and #endif needs to be a sequence of
|
153
|
+
# valid tokens even if skipped. However, in reality, most compilers don't
|
154
|
+
# tokenize nor validate contents. We don't do that, too.
|
155
|
+
# This function is to skip code until matching #endif as fast as we can.
|
156
|
+
def skip_cond_incl!
|
157
|
+
nest = 0
|
158
|
+
while true
|
159
|
+
bol = current_file.column == 1
|
160
|
+
skip_space!
|
161
|
+
c = readc
|
162
|
+
if c.nil? # EOF
|
163
|
+
return
|
164
|
+
end
|
165
|
+
if c == '\''
|
166
|
+
skip_char!
|
167
|
+
next
|
168
|
+
end
|
169
|
+
if c == '"'
|
170
|
+
skip_string!
|
171
|
+
next
|
172
|
+
end
|
173
|
+
if (c != '#' || !bol)
|
174
|
+
next
|
175
|
+
end
|
176
|
+
column = current_file.column - 1
|
177
|
+
tok = lex
|
178
|
+
if (tok.kind != T::IDENT)
|
179
|
+
next
|
180
|
+
end
|
181
|
+
if (nest == 0) && (Token.is_ident?(tok, "else") || Token.is_ident?(tok, "elif") || Token.is_ident?(tok, "endif"))
|
182
|
+
unget_token(tok)
|
183
|
+
hash = @token_gen.make_keyword('#')
|
184
|
+
hash.bol = true
|
185
|
+
hash.column = column
|
186
|
+
unget_token(hash)
|
187
|
+
return
|
188
|
+
end
|
189
|
+
if Token.is_ident?(tok, "if") || Token.is_ident?(tok, "ifdef") || Token.is_ident?(tok, "ifndef")
|
190
|
+
nest += 1
|
191
|
+
elsif (nest > 0) && Token.is_ident?(tok, "endif")
|
192
|
+
nest -= 1
|
193
|
+
end
|
194
|
+
skip_line!
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# @return [FileIO]
|
199
|
+
def current_file
|
200
|
+
@files.current
|
201
|
+
end
|
202
|
+
|
203
|
+
private
|
204
|
+
|
205
|
+
# @return [Char, NilClass]
|
206
|
+
def readc
|
207
|
+
@files.readc
|
208
|
+
end
|
209
|
+
|
210
|
+
# @param [Char]
|
211
|
+
def unreadc(c)
|
212
|
+
@files.unreadc(c)
|
213
|
+
end
|
214
|
+
|
215
|
+
# Update current position
|
216
|
+
def mark!
|
217
|
+
@token_gen.pos = get_pos(0)
|
218
|
+
end
|
219
|
+
|
220
|
+
# @param [Integer] delta
|
221
|
+
# @return [Pos]
|
222
|
+
def get_pos(delta)
|
223
|
+
Pos.new(current_file.line, current_file.column + delta)
|
224
|
+
end
|
225
|
+
|
226
|
+
# @return [Boolean]
|
227
|
+
def buffer_empty?
|
228
|
+
@buffers.size == 1 && @buffers.first.size == 0
|
229
|
+
end
|
230
|
+
|
231
|
+
# @param [Char] c
|
232
|
+
# @return [Boolean]
|
233
|
+
def iswhitespace(c)
|
234
|
+
(c == ' ' || c == "\t" || c == "\f" || c == "\v")
|
235
|
+
end
|
236
|
+
|
237
|
+
def skip_block_comment!
|
238
|
+
# TODO(south37) Impl when necessary
|
239
|
+
# Pos p = get_pos(-2);
|
240
|
+
maybe_end = false
|
241
|
+
while true
|
242
|
+
c = readc
|
243
|
+
if c.nil?
|
244
|
+
raise "premature end of block comment"
|
245
|
+
# TODO(south37) Impl when necessary
|
246
|
+
# errorp(p, "premature end of block comment");
|
247
|
+
end
|
248
|
+
return if (c == '/' && maybe_end)
|
249
|
+
maybe_end = (c == '*')
|
250
|
+
end
|
251
|
+
raise "Must not reach here"
|
252
|
+
end
|
253
|
+
|
254
|
+
def skip_line!
|
255
|
+
while true
|
256
|
+
c = readc
|
257
|
+
return if c.nil?
|
258
|
+
if c == "\n"
|
259
|
+
unreadc(c)
|
260
|
+
return
|
261
|
+
end
|
262
|
+
end
|
263
|
+
raise "Must not reach here"
|
264
|
+
end
|
265
|
+
|
266
|
+
# Skips spaces including comments.
|
267
|
+
# Returns true if at least one space is skipped.
|
268
|
+
#
|
269
|
+
# @return [Boolean] true if skipped
|
270
|
+
def skip_space!
|
271
|
+
if !do_skip_space!
|
272
|
+
return false
|
273
|
+
end
|
274
|
+
while do_skip_space!; end
|
275
|
+
true
|
276
|
+
end
|
277
|
+
|
278
|
+
# @return [Boolean]
|
279
|
+
def do_skip_space!
|
280
|
+
c = readc
|
281
|
+
if c.nil? # EOF
|
282
|
+
return false
|
283
|
+
end
|
284
|
+
if iswhitespace(c)
|
285
|
+
return true;
|
286
|
+
end
|
287
|
+
if c == '/'
|
288
|
+
if next?('*')
|
289
|
+
skip_block_comment!
|
290
|
+
return true
|
291
|
+
end
|
292
|
+
if next?('/')
|
293
|
+
skip_line!
|
294
|
+
return true
|
295
|
+
end
|
296
|
+
end
|
297
|
+
unreadc(c)
|
298
|
+
false
|
299
|
+
end
|
300
|
+
|
301
|
+
def skip_char!
|
302
|
+
if readc == '\\'
|
303
|
+
readc
|
304
|
+
end
|
305
|
+
c = readc
|
306
|
+
while (!c.nil? && c != '\'')
|
307
|
+
c = readc
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def skip_string!
|
312
|
+
c = readc
|
313
|
+
while (!c.nil? && c != '"')
|
314
|
+
if c == '\\'
|
315
|
+
readc
|
316
|
+
end
|
317
|
+
c = readc
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
# @param [Char] c
|
322
|
+
# @return [Boolean]
|
323
|
+
def next?(expect)
|
324
|
+
c = readc
|
325
|
+
return true if c == expect
|
326
|
+
unreadc(c)
|
327
|
+
false
|
328
|
+
end
|
329
|
+
|
330
|
+
# @param [Char] expect1
|
331
|
+
# @param [OP] t
|
332
|
+
# @param [Char] els
|
333
|
+
# @return [Token]
|
334
|
+
def read_rep(expect, t, els)
|
335
|
+
@token_gen.make_keyword(next?(expect) ? t : els)
|
336
|
+
end
|
337
|
+
|
338
|
+
# @param [Char] expect1
|
339
|
+
# @param [OP] t1
|
340
|
+
# @param [Char] expect2
|
341
|
+
# @param [OP] t2
|
342
|
+
# @param [Char] els
|
343
|
+
# @return [Token]
|
344
|
+
def read_rep2(expect1, t1, expect2, t2, els)
|
345
|
+
return @token_gen.make_keyword(t1) if next?(expect1)
|
346
|
+
return @token_gen.make_keyword(t2) if next?(expect2)
|
347
|
+
@token_gen.make_keyword(els)
|
348
|
+
end
|
349
|
+
|
350
|
+
# Reads a digraph starting with '%'. Digraphs are alternative spellings
|
351
|
+
# for some punctuation characters. They are useless in ASCII.
|
352
|
+
# We implement this just for the standard compliance.
|
353
|
+
# See C11 6.4.6p3 for the spec.
|
354
|
+
#
|
355
|
+
# @return [Token, NilClass]
|
356
|
+
def read_hash_digraph
|
357
|
+
if next?('>')
|
358
|
+
return @token_gen.make_keyword('}')
|
359
|
+
end
|
360
|
+
if next?(':')
|
361
|
+
if next?('%')
|
362
|
+
if next?(':')
|
363
|
+
return @token_gen.make_keyword(K::HASHHASH)
|
364
|
+
end
|
365
|
+
unreadc('%')
|
366
|
+
end
|
367
|
+
return @token_gen.make_keyword('#')
|
368
|
+
end
|
369
|
+
nil
|
370
|
+
end
|
371
|
+
|
372
|
+
def read_ident(c)
|
373
|
+
b = c.dup
|
374
|
+
while true
|
375
|
+
c = readc
|
376
|
+
if c && (Libc.isalnum(c) || ((c.ord & 0x80) > 0) || (c == '_') || (c == '$'))
|
377
|
+
b << c
|
378
|
+
next
|
379
|
+
end
|
380
|
+
# C11 6.4.2.1: \u or \U characters (universal-character-name)
|
381
|
+
# are allowed to be part of identifiers.
|
382
|
+
if c && (c == '\\' && (peek == 'u' || peek == 'U'))
|
383
|
+
escaped = read_escaped_char
|
384
|
+
UTF.write_utf8(b, escaped)
|
385
|
+
next
|
386
|
+
end
|
387
|
+
unreadc(c)
|
388
|
+
return @token_gen.make_ident(b)
|
389
|
+
end
|
390
|
+
raise "Must not reach here!"
|
391
|
+
end
|
392
|
+
|
393
|
+
# @param [ENC] enc
|
394
|
+
# @return [Token]
|
395
|
+
def read_char(enc)
|
396
|
+
c = readc
|
397
|
+
r = (c == '\\'.freeze) ? read_escaped_char : c.ord
|
398
|
+
c = readc
|
399
|
+
if c != "'".freeze
|
400
|
+
raise "unterminated char"
|
401
|
+
# errorp(pos, "unterminated char");
|
402
|
+
end
|
403
|
+
if enc == ENC::NONE
|
404
|
+
# NOTE: Only lower 8 bit has meaning
|
405
|
+
return @token_gen.make_char(0xFF & r, enc)
|
406
|
+
end
|
407
|
+
@token_gen.make_char(r, enc)
|
408
|
+
end
|
409
|
+
|
410
|
+
# @param [ENC]
|
411
|
+
# @return [Token]
|
412
|
+
def read_string(enc)
|
413
|
+
b = ""
|
414
|
+
while true
|
415
|
+
c = readc
|
416
|
+
if c.nil?
|
417
|
+
raise "unterminated string"
|
418
|
+
# TODO(south37) Impl errorp if necessary
|
419
|
+
# errorp(pos, "unterminated string");
|
420
|
+
end
|
421
|
+
if c == '"'
|
422
|
+
break
|
423
|
+
end
|
424
|
+
if c != '\\'
|
425
|
+
b << c
|
426
|
+
next
|
427
|
+
end
|
428
|
+
# Just after backslash escape
|
429
|
+
isucs = (peek == 'u' || peek == 'U')
|
430
|
+
c = read_escaped_char
|
431
|
+
if isucs
|
432
|
+
UTF.write_utf8(b, c)
|
433
|
+
next
|
434
|
+
end
|
435
|
+
b << c
|
436
|
+
end
|
437
|
+
@token_gen.make_strtok(b, enc)
|
438
|
+
end
|
439
|
+
|
440
|
+
# @return [Integer]
|
441
|
+
def read_escaped_char
|
442
|
+
# TODO(south37) Impl when necessary
|
443
|
+
# Pos p = get_pos(-1);
|
444
|
+
c = readc
|
445
|
+
case c
|
446
|
+
when '\'', '"', '?', '\\'
|
447
|
+
c.ord
|
448
|
+
when 'a'
|
449
|
+
return "\a".ord
|
450
|
+
when 'b'
|
451
|
+
return "\b".ord
|
452
|
+
when 'f'
|
453
|
+
return "\f".ord
|
454
|
+
when 'n'
|
455
|
+
return "\n".ord
|
456
|
+
when 'r'
|
457
|
+
return "\r".ord
|
458
|
+
when 't'
|
459
|
+
return "\t".ord
|
460
|
+
when 'v'
|
461
|
+
return "\v".ord
|
462
|
+
when 'e'
|
463
|
+
return "\e".ord # '\e' is GNU extension
|
464
|
+
when 'x'
|
465
|
+
return read_hex_char
|
466
|
+
when 'u'
|
467
|
+
return read_universal_char(4)
|
468
|
+
when 'U'
|
469
|
+
return read_universal_char(8)
|
470
|
+
when *'0'..'7'
|
471
|
+
return read_octal_char(c)
|
472
|
+
end
|
473
|
+
# TODO(south37) Impl when necessary
|
474
|
+
# warnp(p, "unknown escape character: \\%c", c);
|
475
|
+
c.ord
|
476
|
+
end
|
477
|
+
|
478
|
+
# Reads a number literal. Lexer's grammar on numbers is not strict.
|
479
|
+
# Integers and floating point numbers and different base numbers are not distinguished.
|
480
|
+
# @param [Char] c
|
481
|
+
def read_number(c)
|
482
|
+
b = c.dup
|
483
|
+
last = c
|
484
|
+
while true
|
485
|
+
c = readc
|
486
|
+
flonum = "eEpP".freeze.include?(last) && "+-".freeze.include?(c)
|
487
|
+
if !Libc.isdigit(c) && !Libc.isalpha(c) && c != '.' && !flonum
|
488
|
+
unreadc(c)
|
489
|
+
return @token_gen.make_number(b)
|
490
|
+
end
|
491
|
+
b << c
|
492
|
+
last = c
|
493
|
+
end
|
494
|
+
raise "Must not reach here"
|
495
|
+
end
|
496
|
+
|
497
|
+
# Reads a \x escape sequence.
|
498
|
+
#
|
499
|
+
# @return [Integer]
|
500
|
+
def read_hex_char
|
501
|
+
p = get_pos(-2)
|
502
|
+
c = readc
|
503
|
+
if !Libc.isxdigit(c)
|
504
|
+
raise "#{p}: \\x is not followed by a hexadecimal character: #{c}"
|
505
|
+
# errorp(p, "\\x is not followed by a hexadecimal character: %c", c);
|
506
|
+
end
|
507
|
+
r = 0
|
508
|
+
while true
|
509
|
+
case c
|
510
|
+
when '0' .. '9' then r = (r << 4) | (c.ord - '0'.ord)
|
511
|
+
when 'a' .. 'f' then r = (r << 4) | (c.ord - 'a'.ord + 10)
|
512
|
+
when 'A' .. 'F' then r = (r << 4) | (c.ord - 'A'.ord + 10)
|
513
|
+
else
|
514
|
+
unreadc(c)
|
515
|
+
return r
|
516
|
+
end
|
517
|
+
c = readc
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
# Reads \u or \U escape sequences. len is 4 or 8, respecitvely.
|
522
|
+
#
|
523
|
+
# @param [Integer] len
|
524
|
+
# @return [Integer]
|
525
|
+
def read_universal_char(len)
|
526
|
+
p = get_pos(-2)
|
527
|
+
r = 0
|
528
|
+
len.times do
|
529
|
+
c = readc
|
530
|
+
case c
|
531
|
+
when *'0'..'9' then r = (r << 4) | (c.ord - '0'.ord)
|
532
|
+
when *'a'..'f' then r = (r << 4) | (c.ord - 'a'.ord + 10)
|
533
|
+
when *'A'..'F' then r = (r << 4) | (c.ord - 'A'.ord + 10)
|
534
|
+
else
|
535
|
+
raise "#{p}: invalid universal character: #{c}"
|
536
|
+
# errorp(p, "invalid universal character: %c", c)
|
537
|
+
end
|
538
|
+
end
|
539
|
+
if !is_valid_ucn(r)
|
540
|
+
raise "#{p}: invalid universal character: \\#{(len == 4) ? 'u' : 'U'}#{format("%0#{len}d", r)}"
|
541
|
+
# errorp(p, "invalid universal character: \\%c%0*x", (len == 4) ? 'u' : 'U', len, r);
|
542
|
+
end
|
543
|
+
r
|
544
|
+
end
|
545
|
+
|
546
|
+
# @param [Integer] c
|
547
|
+
# @return [Boolean]
|
548
|
+
def is_valid_ucn(c)
|
549
|
+
# C11 6.4.3p2: U+D800 to U+DFFF are reserved for surrogate pairs.
|
550
|
+
# A codepoint within the range cannot be a valid character.
|
551
|
+
if (0xD800 <= c) && (c <= 0xDFFF)
|
552
|
+
return false
|
553
|
+
end
|
554
|
+
# It's not allowed to encode ASCII characters using \U or \u.
|
555
|
+
# Some characters not in the basic character set (C11 5.2.1p3)
|
556
|
+
# are allowed as exceptions.
|
557
|
+
(0xA0 <= c) || (c == '$'.ord) || (c == '@'.ord) || (c == '`'.ord)
|
558
|
+
end
|
559
|
+
|
560
|
+
# Reads an octal escape sequence.
|
561
|
+
#
|
562
|
+
# @param [Char] c
|
563
|
+
# @return [Integer]
|
564
|
+
def read_octal_char(c)
|
565
|
+
r = c.ord - '0'.ord
|
566
|
+
if !nextoct?
|
567
|
+
return r
|
568
|
+
end
|
569
|
+
|
570
|
+
r = (r << 3) | (readc.ord - '0'.ord)
|
571
|
+
if !nextoct?
|
572
|
+
return r
|
573
|
+
end
|
574
|
+
|
575
|
+
(r << 3) | (readc.ord - '0'.ord)
|
576
|
+
end
|
577
|
+
|
578
|
+
# @return [Boolean]
|
579
|
+
def nextoct?
|
580
|
+
('0'..'7').include?(peek)
|
581
|
+
end
|
582
|
+
|
583
|
+
# @return [Token]
|
584
|
+
def do_read_token
|
585
|
+
if skip_space!
|
586
|
+
return Token::SPACE_TOKEN
|
587
|
+
end
|
588
|
+
mark!
|
589
|
+
c = readc
|
590
|
+
case c
|
591
|
+
when "\n"
|
592
|
+
return Token::NEWLINE_TOKEN
|
593
|
+
when ':'
|
594
|
+
return @token_gen.make_keyword(next?('>') ? ']' : ':')
|
595
|
+
when '#'
|
596
|
+
return @token_gen.make_keyword(next?('#') ? K::HASHHASH : '#')
|
597
|
+
when '+'
|
598
|
+
return read_rep2('+', OP::INC, '=', OP::A_ADD, '+')
|
599
|
+
when '*'
|
600
|
+
return read_rep('=', OP::A_MUL, '*')
|
601
|
+
when '='
|
602
|
+
return read_rep('=', OP::EQ, '=')
|
603
|
+
when '!'
|
604
|
+
return read_rep('=', OP::NE, '!')
|
605
|
+
when '&'
|
606
|
+
return read_rep2('&', OP::LOGAND, '=', OP::A_AND, '&');
|
607
|
+
when '|'
|
608
|
+
return read_rep2('|', OP::LOGOR, '=', OP::A_OR, '|');
|
609
|
+
when '^'
|
610
|
+
return read_rep('=', OP::A_XOR, '^')
|
611
|
+
when '"'
|
612
|
+
return read_string(ENC::NONE)
|
613
|
+
when '\''
|
614
|
+
return read_char(ENC::NONE)
|
615
|
+
when '/'
|
616
|
+
return @token_gen.make_keyword(next?('=') ? OP::A_DIV : '/');
|
617
|
+
when *'a'..'t', *'v'..'z', *'A'..'K', *'M'..'T', *'V'..'Z', '_', '$', *(0x80.chr..0xFD.chr)
|
618
|
+
return read_ident(c)
|
619
|
+
when *'0'..'9'
|
620
|
+
return read_number(c)
|
621
|
+
when 'L', 'U'
|
622
|
+
# NOTE: Wide/char32_t character/string literal
|
623
|
+
enc = (c == 'L') ? ENC::WCHAR : ENC::CHAR32
|
624
|
+
return read_string(enc) if next?('"')
|
625
|
+
return read_char(enc) if next?('\'')
|
626
|
+
return read_ident(c)
|
627
|
+
when 'u'
|
628
|
+
return read_string(ENC::CHAR16) if next?('"')
|
629
|
+
return read_char(ENC::CHAR16) if next?('\'')
|
630
|
+
# C11 6.4.5: UTF-8 string literal
|
631
|
+
if next?('8')
|
632
|
+
if next?('"')
|
633
|
+
return read_string(ENC::UTF8)
|
634
|
+
end
|
635
|
+
unreadc('8')
|
636
|
+
end
|
637
|
+
return read_ident(c)
|
638
|
+
when '.'
|
639
|
+
return read_number(c) if Libc.isdigit(peek)
|
640
|
+
if next?('.')
|
641
|
+
if next?('.')
|
642
|
+
return @token_gen.make_keyword(K::ELLIPSIS)
|
643
|
+
end
|
644
|
+
return @token_gen.make_ident('..')
|
645
|
+
end
|
646
|
+
return @token_gen.make_keyword('.')
|
647
|
+
when '(', ')', ',', ';', '[', ']', '{', '}', '?', '~'
|
648
|
+
return @token_gen.make_keyword(c)
|
649
|
+
when '-'
|
650
|
+
return @token_gen.make_keyword(OP::DEC) if next?('-')
|
651
|
+
return @token_gen.make_keyword(OP::ARROW) if next?('>')
|
652
|
+
return @token_gen.make_keyword(OP::A_SUB) if next?('=')
|
653
|
+
return @token_gen.make_keyword('-');
|
654
|
+
when '<'
|
655
|
+
return read_rep('=', OP::A_SAL, OP::SAL) if next?('<')
|
656
|
+
return @token_gen.make_keyword(OP::LE) if next?('=')
|
657
|
+
return @token_gen.make_keyword('[') if next?(':')
|
658
|
+
return @token_gen.make_keyword('{') if next?('%')
|
659
|
+
return @token_gen.make_keyword('<')
|
660
|
+
when '>'
|
661
|
+
return @token_gen.make_keyword(OP::GE) if next?('=')
|
662
|
+
return read_rep('=', OP::A_SAR, OP::SAR) if next?('>')
|
663
|
+
return @token_gen.make_keyword('>')
|
664
|
+
when '%'
|
665
|
+
tok = read_hash_digraph
|
666
|
+
return tok if tok
|
667
|
+
return read_rep('=', OP::A_MOD, '%')
|
668
|
+
when nil
|
669
|
+
return Token::EOF_TOKEN
|
670
|
+
else
|
671
|
+
return @token_gen.make_invalid(c.ord)
|
672
|
+
end
|
673
|
+
end
|
674
|
+
|
675
|
+
# @return [Char]
|
676
|
+
def peek
|
677
|
+
r = readc
|
678
|
+
unreadc(r)
|
679
|
+
r
|
680
|
+
end
|
681
|
+
end
|
682
|
+
end
|
683
|
+
end
|