eden 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +4 -0
- data/LICENSE +20 -0
- data/README.md +48 -0
- data/Rakefile +10 -0
- data/bin/eden +132 -0
- data/lib/eden.rb +10 -0
- data/lib/eden/defaults.rb +26 -0
- data/lib/eden/formatter.rb +25 -0
- data/lib/eden/formatters/block_formatter.rb +45 -0
- data/lib/eden/formatters/indenter.rb +91 -0
- data/lib/eden/formatters/white_space_cleaner.rb +14 -0
- data/lib/eden/line.rb +65 -0
- data/lib/eden/source_file.rb +32 -0
- data/lib/eden/token.rb +62 -0
- data/lib/eden/tokenizer.rb +259 -0
- data/lib/eden/tokenizers/basic_tokenizer.rb +167 -0
- data/lib/eden/tokenizers/delimited_literal_tokenizer.rb +38 -0
- data/lib/eden/tokenizers/number_tokenizer.rb +68 -0
- data/lib/eden/tokenizers/operator_tokenizer.rb +211 -0
- data/lib/eden/tokenizers/regex_tokenizer.rb +37 -0
- data/lib/eden/tokenizers/string_tokenizer.rb +149 -0
- data/test/array_literal_tokenization_test.rb +43 -0
- data/test/basic_tokenization_test.rb +29 -0
- data/test/block_formatter_test.rb +47 -0
- data/test/class_var_token_test.rb +21 -0
- data/test/identifier_token_test.rb +140 -0
- data/test/indenter_test.rb +314 -0
- data/test/instance_var_token_test.rb +48 -0
- data/test/number_tokenization_test.rb +83 -0
- data/test/operator_tokenization_test.rb +180 -0
- data/test/regex_tokenization_test.rb +68 -0
- data/test/single_character_tokenization_test.rb +87 -0
- data/test/string_tokenization_test.rb +291 -0
- data/test/symbol_tokenization_test.rb +64 -0
- data/test/test_helper.rb +13 -0
- data/test/white_space_cleaner_test.rb +35 -0
- data/test/whitespace_token_test.rb +63 -0
- metadata +108 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
module Eden
|
2
|
+
module DelimitedLiteralTokenizer
|
3
|
+
def tokenize_delimited_literal
|
4
|
+
advance # Pass the %
|
5
|
+
|
6
|
+
if( /[^A-Za-z0-9]/.match( cchar ) )
|
7
|
+
def_char = 'Q'
|
8
|
+
@state = :double_q_string
|
9
|
+
elsif( /[qQswWrx]/.match( cchar) )
|
10
|
+
def_char = cchar
|
11
|
+
@state = infer_delimited_literal_type
|
12
|
+
advance
|
13
|
+
else
|
14
|
+
raise "Invalid delimiter character"
|
15
|
+
end
|
16
|
+
|
17
|
+
case def_char
|
18
|
+
when 'r', 'Q', 'W', 'x'
|
19
|
+
token = tokenize_expanded_string( cchar )
|
20
|
+
when 's', 'q', 'w'
|
21
|
+
token = tokenize_non_expanded_string( cchar )
|
22
|
+
end
|
23
|
+
|
24
|
+
return token
|
25
|
+
end
|
26
|
+
|
27
|
+
def infer_delimited_literal_type
|
28
|
+
case cchar
|
29
|
+
when 's' then :symbol
|
30
|
+
when 'w', 'W' then :array_literal
|
31
|
+
when 'q' then :single_q_string
|
32
|
+
when 'Q' then :double_q_string
|
33
|
+
when 'r' then :regex
|
34
|
+
when 'x' then :backquote_string
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Eden
|
2
|
+
module NumberTokenizer
|
3
|
+
|
4
|
+
# Tokenize a non-decimal integer literal - e.g. Ox4F2E, 0b1101
|
5
|
+
def tokenize_integer_literal
|
6
|
+
@expr_state = :end
|
7
|
+
if peek_ahead_for(/[_oObBxX]/)
|
8
|
+
advance(2) # Pass 0x / 0b / 0O
|
9
|
+
else
|
10
|
+
advance # Pass 0 for Octal digits
|
11
|
+
end
|
12
|
+
pattern = {:bin_literal => /[01]/,
|
13
|
+
:oct_literal => /[0-7]/,
|
14
|
+
:hex_literal => /[0-9a-fA-F]/}[@state]
|
15
|
+
advance until( pattern.match( cchar ).nil? )
|
16
|
+
capture_token( @state )
|
17
|
+
end
|
18
|
+
|
19
|
+
def tokenize_decimal_literal
|
20
|
+
@expr_state = :end
|
21
|
+
# Capture the sign
|
22
|
+
advance if cchar == '+' || cchar == '-'
|
23
|
+
|
24
|
+
# Handle a lone zero
|
25
|
+
if cchar == '0' && !peek_ahead_for(/[dD]/)
|
26
|
+
advance
|
27
|
+
return capture_token( :dec_literal )
|
28
|
+
end
|
29
|
+
|
30
|
+
# Handle 0d1234 digits
|
31
|
+
advance(2) if cchar == '0' && peek_ahead_for(/[dD]/)
|
32
|
+
|
33
|
+
until( /[0-9.eE]/.match( cchar ).nil? )
|
34
|
+
case cchar
|
35
|
+
when '.'
|
36
|
+
return tokenize_float_literal
|
37
|
+
when 'e', 'E'
|
38
|
+
return tokenize_exponent_literal
|
39
|
+
when '0'..'9'
|
40
|
+
advance
|
41
|
+
else
|
42
|
+
end
|
43
|
+
end
|
44
|
+
capture_token( :dec_literal )
|
45
|
+
end
|
46
|
+
|
47
|
+
# Tokenize a literal with an exponent - e.g. 3.4E+22
|
48
|
+
def tokenize_exponent_literal
|
49
|
+
advance # Pass the e/E
|
50
|
+
advance if cchar == '+' or cchar == '-'
|
51
|
+
advance until( /[0-9]/.match( cchar ).nil? )
|
52
|
+
capture_token( :exp_literal )
|
53
|
+
end
|
54
|
+
|
55
|
+
# Tokenize a float literal - e.g. 2.0, 2.1101
|
56
|
+
def tokenize_float_literal
|
57
|
+
advance # Pass the .
|
58
|
+
|
59
|
+
until( /[0-9eE]/.match( cchar ).nil? )
|
60
|
+
if cchar == 'e' || cchar == 'E'
|
61
|
+
return tokenize_exponent_literal
|
62
|
+
end
|
63
|
+
advance
|
64
|
+
end
|
65
|
+
capture_token( :float_literal )
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
module Eden
|
2
|
+
module OperatorTokenizer
|
3
|
+
def tokenize_equals_operators
|
4
|
+
advance
|
5
|
+
default_expr_state_transition!
|
6
|
+
case cchar
|
7
|
+
when '>'
|
8
|
+
advance and capture_token(:hash_rocket)
|
9
|
+
when "~"
|
10
|
+
advance and capture_token(:matches)
|
11
|
+
when '='
|
12
|
+
advance
|
13
|
+
if cchar == '='
|
14
|
+
advance and capture_token(:identity_equality)
|
15
|
+
else
|
16
|
+
capture_token(:equality)
|
17
|
+
end
|
18
|
+
else
|
19
|
+
capture_token(:equals)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def tokenize_bang_operators
|
24
|
+
@expr_state = :beg
|
25
|
+
advance
|
26
|
+
case cchar
|
27
|
+
when '='
|
28
|
+
advance and capture_token(:not_equals)
|
29
|
+
when '~'
|
30
|
+
advance and capture_token(:not_matches)
|
31
|
+
else
|
32
|
+
capture_token(:logical_not)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def tokenize_plus_operators
|
37
|
+
advance
|
38
|
+
case cchar
|
39
|
+
when '='
|
40
|
+
@expr_state = :beg
|
41
|
+
advance and capture_token(:plus_equals)
|
42
|
+
when '@'
|
43
|
+
@expr_state = :arg if [:fname, :dot].include?(@expr_state)
|
44
|
+
advance and capture_token(:plus_at)
|
45
|
+
else
|
46
|
+
@expr_state = :beg
|
47
|
+
capture_token(:plus)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def tokenize_minus_operators
|
52
|
+
advance
|
53
|
+
case cchar
|
54
|
+
when '='
|
55
|
+
@expr_state = :beg
|
56
|
+
advance and capture_token(:minus_equals)
|
57
|
+
when '@'
|
58
|
+
@expr_state = :arg if [:fname, :dot].include?(@expr_state)
|
59
|
+
advance and capture_token(:minus_at)
|
60
|
+
else
|
61
|
+
@expr_state = :beg
|
62
|
+
capture_token(:minus)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def tokenize_multiply_operators
|
67
|
+
advance
|
68
|
+
case cchar
|
69
|
+
when '*'
|
70
|
+
@expr_state = :beg
|
71
|
+
advance
|
72
|
+
if cchar == '='
|
73
|
+
advance and capture_token(:exponent_equals)
|
74
|
+
else
|
75
|
+
capture_token(:exponent)
|
76
|
+
end
|
77
|
+
when '='
|
78
|
+
@expr_state = :beg
|
79
|
+
advance and capture_token(:multiply_equals)
|
80
|
+
else
|
81
|
+
# TODO: Add logic to disamiguate between splat and multiply
|
82
|
+
default_expr_state_transition!
|
83
|
+
capture_token(:multiply)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Tokenizes operators using the \ character, after tokenize_potential_regex
|
88
|
+
# has decided that it's not the start of a regex.
|
89
|
+
def tokenize_divide_operators
|
90
|
+
advance
|
91
|
+
if cchar == '='
|
92
|
+
@expr_state = :beg
|
93
|
+
advance and capture_token(:divide_equals)
|
94
|
+
else
|
95
|
+
default_expr_state_transition!
|
96
|
+
capture_token(:divide)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def tokenize_lt_operators
|
101
|
+
advance
|
102
|
+
default_expr_state_transition!
|
103
|
+
case cchar
|
104
|
+
when '<'
|
105
|
+
advance
|
106
|
+
|
107
|
+
if ![:end, :dot, :end_arg, :class].include?(@expr_state) && cchar != ' '
|
108
|
+
token = tokenize_heredoc_delimiter
|
109
|
+
return token if token
|
110
|
+
end
|
111
|
+
|
112
|
+
if cchar == '='
|
113
|
+
@expr_state = :beg
|
114
|
+
advance and capture_token(:left_shift_equals)
|
115
|
+
else
|
116
|
+
capture_token(:left_shift)
|
117
|
+
end
|
118
|
+
when '='
|
119
|
+
advance
|
120
|
+
if cchar == '>'
|
121
|
+
advance and capture_token(:sort_operator)
|
122
|
+
else
|
123
|
+
capture_token(:lte)
|
124
|
+
end
|
125
|
+
else
|
126
|
+
capture_token(:lt)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def tokenize_gt_operators
|
131
|
+
advance
|
132
|
+
default_expr_state_transition!
|
133
|
+
case cchar
|
134
|
+
when '>'
|
135
|
+
advance
|
136
|
+
if cchar == '='
|
137
|
+
advance and capture_token(:right_shift_equals)
|
138
|
+
else
|
139
|
+
capture_token(:right_shift)
|
140
|
+
end
|
141
|
+
when '='
|
142
|
+
advance and capture_token(:gte)
|
143
|
+
else
|
144
|
+
capture_token(:gt)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def tokenize_pipe_operators
|
149
|
+
advance
|
150
|
+
case cchar
|
151
|
+
when '|'
|
152
|
+
advance
|
153
|
+
@expr_state = :beg
|
154
|
+
if cchar == '='
|
155
|
+
advance and capture_token(:logical_or_equals)
|
156
|
+
else
|
157
|
+
capture_token(:logical_or)
|
158
|
+
end
|
159
|
+
when '='
|
160
|
+
@expr_state = :beg
|
161
|
+
advance and capture_token(:bitwise_or_equals)
|
162
|
+
else
|
163
|
+
default_expr_state_transition!
|
164
|
+
capture_token(:bitwise_or)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def tokenize_ampersand_operators
|
169
|
+
advance
|
170
|
+
case cchar
|
171
|
+
when '&'
|
172
|
+
advance
|
173
|
+
@expr_state = :beg
|
174
|
+
if cchar == '='
|
175
|
+
advance and capture_token(:logical_and_equals)
|
176
|
+
else
|
177
|
+
capture_token(:logical_and)
|
178
|
+
end
|
179
|
+
when '='
|
180
|
+
@expr_state = :beg
|
181
|
+
advance and capture_token(:bitwise_and_equals)
|
182
|
+
else
|
183
|
+
default_expr_state_transition!
|
184
|
+
capture_token(:bitwise_and)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def tokenize_caret_operators
|
189
|
+
advance
|
190
|
+
if cchar == "="
|
191
|
+
@expr_state = :beg
|
192
|
+
advance and capture_token(:caret_equals)
|
193
|
+
else
|
194
|
+
default_expr_state_transition!
|
195
|
+
capture_token(:caret)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def tokenize_modulo_operators
|
200
|
+
advance
|
201
|
+
if cchar == "="
|
202
|
+
@expr_state = :beg
|
203
|
+
advance and capture_token(:modulo_equals)
|
204
|
+
else
|
205
|
+
default_expr_state_transition!
|
206
|
+
capture_token(:modulo)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Eden
|
2
|
+
module RegexTokenizer
|
3
|
+
def tokenize_potential_regex
|
4
|
+
|
5
|
+
if @expr_state == :beg || @expr_state == :mid || @expr_state == :class
|
6
|
+
return tokenize_regex
|
7
|
+
end
|
8
|
+
|
9
|
+
if peek_ahead_for(/=/)
|
10
|
+
return tokenize_divide_operators
|
11
|
+
end
|
12
|
+
|
13
|
+
if (@expr_state == :arg || @expr == :cmd_arg) && @line.last_token_is_space?
|
14
|
+
return tokenize_regex
|
15
|
+
end
|
16
|
+
|
17
|
+
return tokenize_divide_operators
|
18
|
+
end
|
19
|
+
|
20
|
+
def tokenize_regex
|
21
|
+
advance # Consume the leading /
|
22
|
+
while true
|
23
|
+
if cchar == '/'
|
24
|
+
advance
|
25
|
+
# Capture the regex option
|
26
|
+
advance if cchar =~ /i|o|m|x|n|e|u|s/
|
27
|
+
return capture_token(:regex)
|
28
|
+
end
|
29
|
+
if cchar == nil #end of file
|
30
|
+
raise "Unclosed Regex found"
|
31
|
+
end
|
32
|
+
advance if cchar == '\\'
|
33
|
+
advance
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module Eden
|
2
|
+
module StringTokenizer
|
3
|
+
def tokenize_single_quote_string
|
4
|
+
tokenize_non_expanded_string("'")
|
5
|
+
end
|
6
|
+
|
7
|
+
# If a block is given, it gets run after the final delimiter is detected. The
|
8
|
+
# primary purpose for this is to allow the capture of regex modifiers
|
9
|
+
def tokenize_non_expanded_string( start_delimiter )
|
10
|
+
delimiter_depth = 0
|
11
|
+
matched_delimiter = is_matched_delimiter?( start_delimiter )
|
12
|
+
end_delimiter = find_matching_delimiter( start_delimiter )
|
13
|
+
|
14
|
+
advance # Pass the opening delimiter
|
15
|
+
|
16
|
+
until((cchar == end_delimiter && delimiter_depth == 0) || @i >= @length)
|
17
|
+
|
18
|
+
if matched_delimiter
|
19
|
+
delimiter_depth += 1 if cchar == start_delimiter
|
20
|
+
delimiter_depth -= 1 if cchar == end_delimiter
|
21
|
+
end
|
22
|
+
|
23
|
+
if cchar == '\\'
|
24
|
+
advance(2) # Pass the escaped character
|
25
|
+
else
|
26
|
+
advance
|
27
|
+
end
|
28
|
+
end
|
29
|
+
advance # Pass the closing quote
|
30
|
+
|
31
|
+
if @state == :regex
|
32
|
+
advance if ['i', 'm'].include?( cchar )
|
33
|
+
end
|
34
|
+
|
35
|
+
@expr_state = :end
|
36
|
+
capture_token( @state )
|
37
|
+
end
|
38
|
+
|
39
|
+
def tokenize_backquote_string
|
40
|
+
tokenize_expanded_string( '`' )
|
41
|
+
end
|
42
|
+
|
43
|
+
def tokenize_double_quote_string( in_string_already = false )
|
44
|
+
tokenize_expanded_string('"', in_string_already)
|
45
|
+
end
|
46
|
+
|
47
|
+
def tokenize_expanded_string( start_delimiter, in_string_already = false )
|
48
|
+
saved_state = @state
|
49
|
+
tokens = []
|
50
|
+
end_delimiter = find_matching_delimiter( start_delimiter )
|
51
|
+
advance unless in_string_already # Pass the opening backquote
|
52
|
+
until( cchar == end_delimiter || @i >= @length )
|
53
|
+
if cchar == '\\'
|
54
|
+
advance(2) # Pass the escaped character
|
55
|
+
elsif cchar == '#'
|
56
|
+
advance # include the # character in the string
|
57
|
+
case cchar
|
58
|
+
when '{'
|
59
|
+
@interpolating.push( @state )
|
60
|
+
@delimiters.push( start_delimiter )
|
61
|
+
tokens << Token.new( @state, thunk )
|
62
|
+
reset_thunk!
|
63
|
+
@state = :lcurly
|
64
|
+
tokens << tokenize_single_character
|
65
|
+
return tokens
|
66
|
+
when '@'
|
67
|
+
tokens << capture_token( @state )
|
68
|
+
if peek_ahead_for('@')
|
69
|
+
tokens << tokenize_classvar
|
70
|
+
else
|
71
|
+
tokens << tokenize_instancevar
|
72
|
+
end
|
73
|
+
@state = saved_state
|
74
|
+
when '$'
|
75
|
+
tokens << capture_token( @state )
|
76
|
+
tokens << tokenize_globalvar
|
77
|
+
@state = saved_state
|
78
|
+
end
|
79
|
+
else
|
80
|
+
advance
|
81
|
+
end
|
82
|
+
end
|
83
|
+
advance # Pass the closing delimiter
|
84
|
+
if @state == :regex
|
85
|
+
advance if ['i', 'm'].include?( cchar )
|
86
|
+
end
|
87
|
+
@expr_state = :end
|
88
|
+
tokens << capture_token( @state )
|
89
|
+
return tokens
|
90
|
+
end
|
91
|
+
|
92
|
+
# Called from tokenize_lt_operators when it identifies that
|
93
|
+
# << is a heredoc delimiter. Expects that '<<' will already
|
94
|
+
# be included in the current thunk.
|
95
|
+
def tokenize_heredoc_delimiter
|
96
|
+
offset = 2
|
97
|
+
if cchar == '-'
|
98
|
+
advance
|
99
|
+
offset = 3
|
100
|
+
end
|
101
|
+
|
102
|
+
if cchar =~ /[A-Za-z_]/
|
103
|
+
advance
|
104
|
+
advance until /[A-Za-z0-9_]/.match( cchar ).nil?
|
105
|
+
elsif /['"`]/.match(cchar)
|
106
|
+
advance_through_quoted_delimiter(cchar)
|
107
|
+
else
|
108
|
+
return nil
|
109
|
+
end
|
110
|
+
@heredoc_delimiter = thunk[offset..-1]
|
111
|
+
capture_token( :heredoc_delimiter )
|
112
|
+
end
|
113
|
+
|
114
|
+
def tokenize_heredoc_body
|
115
|
+
if @heredoc_delimiter
|
116
|
+
advance until (@sf.source[@i, @heredoc_delimiter.length ] == @heredoc_delimiter &&
|
117
|
+
@sf.source[@i, @heredoc_delimiter.length+1 ] == @heredoc_delimiter + "\n" ||
|
118
|
+
@sf.source[@i, @heredoc_delimiter.length+2 ] == @heredoc_delimiter + "\r\n") ||
|
119
|
+
@i >= @length
|
120
|
+
end
|
121
|
+
@heredoc_delimiter.length.times { advance }
|
122
|
+
@heredoc_delimiter = nil
|
123
|
+
capture_token( :heredoc_body )
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
# Returns the matching delimiter for the 4 "paired" delimiters
|
128
|
+
def find_matching_delimiter( start_delimiter )
|
129
|
+
case start_delimiter
|
130
|
+
when '{' then '}'
|
131
|
+
when '(' then ')'
|
132
|
+
when '[' then ']'
|
133
|
+
when '<' then '>'
|
134
|
+
else
|
135
|
+
start_delimiter
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def is_matched_delimiter?( cchar )
|
140
|
+
!! /[{\(\[<]/.match(cchar)
|
141
|
+
end
|
142
|
+
|
143
|
+
def advance_through_quoted_delimiter( delimiter )
|
144
|
+
advance
|
145
|
+
advance until cchar == delimiter
|
146
|
+
advance
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|