eden 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/LICENSE +20 -0
- data/README.md +48 -0
- data/Rakefile +10 -0
- data/bin/eden +132 -0
- data/lib/eden.rb +10 -0
- data/lib/eden/defaults.rb +26 -0
- data/lib/eden/formatter.rb +25 -0
- data/lib/eden/formatters/block_formatter.rb +45 -0
- data/lib/eden/formatters/indenter.rb +91 -0
- data/lib/eden/formatters/white_space_cleaner.rb +14 -0
- data/lib/eden/line.rb +65 -0
- data/lib/eden/source_file.rb +32 -0
- data/lib/eden/token.rb +62 -0
- data/lib/eden/tokenizer.rb +259 -0
- data/lib/eden/tokenizers/basic_tokenizer.rb +167 -0
- data/lib/eden/tokenizers/delimited_literal_tokenizer.rb +38 -0
- data/lib/eden/tokenizers/number_tokenizer.rb +68 -0
- data/lib/eden/tokenizers/operator_tokenizer.rb +211 -0
- data/lib/eden/tokenizers/regex_tokenizer.rb +37 -0
- data/lib/eden/tokenizers/string_tokenizer.rb +149 -0
- data/test/array_literal_tokenization_test.rb +43 -0
- data/test/basic_tokenization_test.rb +29 -0
- data/test/block_formatter_test.rb +47 -0
- data/test/class_var_token_test.rb +21 -0
- data/test/identifier_token_test.rb +140 -0
- data/test/indenter_test.rb +314 -0
- data/test/instance_var_token_test.rb +48 -0
- data/test/number_tokenization_test.rb +83 -0
- data/test/operator_tokenization_test.rb +180 -0
- data/test/regex_tokenization_test.rb +68 -0
- data/test/single_character_tokenization_test.rb +87 -0
- data/test/string_tokenization_test.rb +291 -0
- data/test/symbol_tokenization_test.rb +64 -0
- data/test/test_helper.rb +13 -0
- data/test/white_space_cleaner_test.rb +35 -0
- data/test/whitespace_token_test.rb +63 -0
- metadata +108 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
module Eden
|
2
|
+
module DelimitedLiteralTokenizer
|
3
|
+
def tokenize_delimited_literal
|
4
|
+
advance # Pass the %
|
5
|
+
|
6
|
+
if( /[^A-Za-z0-9]/.match( cchar ) )
|
7
|
+
def_char = 'Q'
|
8
|
+
@state = :double_q_string
|
9
|
+
elsif( /[qQswWrx]/.match( cchar) )
|
10
|
+
def_char = cchar
|
11
|
+
@state = infer_delimited_literal_type
|
12
|
+
advance
|
13
|
+
else
|
14
|
+
raise "Invalid delimiter character"
|
15
|
+
end
|
16
|
+
|
17
|
+
case def_char
|
18
|
+
when 'r', 'Q', 'W', 'x'
|
19
|
+
token = tokenize_expanded_string( cchar )
|
20
|
+
when 's', 'q', 'w'
|
21
|
+
token = tokenize_non_expanded_string( cchar )
|
22
|
+
end
|
23
|
+
|
24
|
+
return token
|
25
|
+
end
|
26
|
+
|
27
|
+
def infer_delimited_literal_type
|
28
|
+
case cchar
|
29
|
+
when 's' then :symbol
|
30
|
+
when 'w', 'W' then :array_literal
|
31
|
+
when 'q' then :single_q_string
|
32
|
+
when 'Q' then :double_q_string
|
33
|
+
when 'r' then :regex
|
34
|
+
when 'x' then :backquote_string
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Eden
|
2
|
+
module NumberTokenizer
|
3
|
+
|
4
|
+
# Tokenize a non-decimal integer literal - e.g. Ox4F2E, 0b1101
|
5
|
+
def tokenize_integer_literal
|
6
|
+
@expr_state = :end
|
7
|
+
if peek_ahead_for(/[_oObBxX]/)
|
8
|
+
advance(2) # Pass 0x / 0b / 0O
|
9
|
+
else
|
10
|
+
advance # Pass 0 for Octal digits
|
11
|
+
end
|
12
|
+
pattern = {:bin_literal => /[01]/,
|
13
|
+
:oct_literal => /[0-7]/,
|
14
|
+
:hex_literal => /[0-9a-fA-F]/}[@state]
|
15
|
+
advance until( pattern.match( cchar ).nil? )
|
16
|
+
capture_token( @state )
|
17
|
+
end
|
18
|
+
|
19
|
+
def tokenize_decimal_literal
|
20
|
+
@expr_state = :end
|
21
|
+
# Capture the sign
|
22
|
+
advance if cchar == '+' || cchar == '-'
|
23
|
+
|
24
|
+
# Handle a lone zero
|
25
|
+
if cchar == '0' && !peek_ahead_for(/[dD]/)
|
26
|
+
advance
|
27
|
+
return capture_token( :dec_literal )
|
28
|
+
end
|
29
|
+
|
30
|
+
# Handle 0d1234 digits
|
31
|
+
advance(2) if cchar == '0' && peek_ahead_for(/[dD]/)
|
32
|
+
|
33
|
+
until( /[0-9.eE]/.match( cchar ).nil? )
|
34
|
+
case cchar
|
35
|
+
when '.'
|
36
|
+
return tokenize_float_literal
|
37
|
+
when 'e', 'E'
|
38
|
+
return tokenize_exponent_literal
|
39
|
+
when '0'..'9'
|
40
|
+
advance
|
41
|
+
else
|
42
|
+
end
|
43
|
+
end
|
44
|
+
capture_token( :dec_literal )
|
45
|
+
end
|
46
|
+
|
47
|
+
# Tokenize a literal with an exponent - e.g. 3.4E+22
|
48
|
+
def tokenize_exponent_literal
|
49
|
+
advance # Pass the e/E
|
50
|
+
advance if cchar == '+' or cchar == '-'
|
51
|
+
advance until( /[0-9]/.match( cchar ).nil? )
|
52
|
+
capture_token( :exp_literal )
|
53
|
+
end
|
54
|
+
|
55
|
+
# Tokenize a float literal - e.g. 2.0, 2.1101
|
56
|
+
def tokenize_float_literal
|
57
|
+
advance # Pass the .
|
58
|
+
|
59
|
+
until( /[0-9eE]/.match( cchar ).nil? )
|
60
|
+
if cchar == 'e' || cchar == 'E'
|
61
|
+
return tokenize_exponent_literal
|
62
|
+
end
|
63
|
+
advance
|
64
|
+
end
|
65
|
+
capture_token( :float_literal )
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
module Eden
|
2
|
+
module OperatorTokenizer
|
3
|
+
def tokenize_equals_operators
|
4
|
+
advance
|
5
|
+
default_expr_state_transition!
|
6
|
+
case cchar
|
7
|
+
when '>'
|
8
|
+
advance and capture_token(:hash_rocket)
|
9
|
+
when "~"
|
10
|
+
advance and capture_token(:matches)
|
11
|
+
when '='
|
12
|
+
advance
|
13
|
+
if cchar == '='
|
14
|
+
advance and capture_token(:identity_equality)
|
15
|
+
else
|
16
|
+
capture_token(:equality)
|
17
|
+
end
|
18
|
+
else
|
19
|
+
capture_token(:equals)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def tokenize_bang_operators
|
24
|
+
@expr_state = :beg
|
25
|
+
advance
|
26
|
+
case cchar
|
27
|
+
when '='
|
28
|
+
advance and capture_token(:not_equals)
|
29
|
+
when '~'
|
30
|
+
advance and capture_token(:not_matches)
|
31
|
+
else
|
32
|
+
capture_token(:logical_not)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def tokenize_plus_operators
|
37
|
+
advance
|
38
|
+
case cchar
|
39
|
+
when '='
|
40
|
+
@expr_state = :beg
|
41
|
+
advance and capture_token(:plus_equals)
|
42
|
+
when '@'
|
43
|
+
@expr_state = :arg if [:fname, :dot].include?(@expr_state)
|
44
|
+
advance and capture_token(:plus_at)
|
45
|
+
else
|
46
|
+
@expr_state = :beg
|
47
|
+
capture_token(:plus)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def tokenize_minus_operators
|
52
|
+
advance
|
53
|
+
case cchar
|
54
|
+
when '='
|
55
|
+
@expr_state = :beg
|
56
|
+
advance and capture_token(:minus_equals)
|
57
|
+
when '@'
|
58
|
+
@expr_state = :arg if [:fname, :dot].include?(@expr_state)
|
59
|
+
advance and capture_token(:minus_at)
|
60
|
+
else
|
61
|
+
@expr_state = :beg
|
62
|
+
capture_token(:minus)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def tokenize_multiply_operators
|
67
|
+
advance
|
68
|
+
case cchar
|
69
|
+
when '*'
|
70
|
+
@expr_state = :beg
|
71
|
+
advance
|
72
|
+
if cchar == '='
|
73
|
+
advance and capture_token(:exponent_equals)
|
74
|
+
else
|
75
|
+
capture_token(:exponent)
|
76
|
+
end
|
77
|
+
when '='
|
78
|
+
@expr_state = :beg
|
79
|
+
advance and capture_token(:multiply_equals)
|
80
|
+
else
|
81
|
+
# TODO: Add logic to disamiguate between splat and multiply
|
82
|
+
default_expr_state_transition!
|
83
|
+
capture_token(:multiply)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Tokenizes operators using the \ character, after tokenize_potential_regex
|
88
|
+
# has decided that it's not the start of a regex.
|
89
|
+
def tokenize_divide_operators
|
90
|
+
advance
|
91
|
+
if cchar == '='
|
92
|
+
@expr_state = :beg
|
93
|
+
advance and capture_token(:divide_equals)
|
94
|
+
else
|
95
|
+
default_expr_state_transition!
|
96
|
+
capture_token(:divide)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def tokenize_lt_operators
|
101
|
+
advance
|
102
|
+
default_expr_state_transition!
|
103
|
+
case cchar
|
104
|
+
when '<'
|
105
|
+
advance
|
106
|
+
|
107
|
+
if ![:end, :dot, :end_arg, :class].include?(@expr_state) && cchar != ' '
|
108
|
+
token = tokenize_heredoc_delimiter
|
109
|
+
return token if token
|
110
|
+
end
|
111
|
+
|
112
|
+
if cchar == '='
|
113
|
+
@expr_state = :beg
|
114
|
+
advance and capture_token(:left_shift_equals)
|
115
|
+
else
|
116
|
+
capture_token(:left_shift)
|
117
|
+
end
|
118
|
+
when '='
|
119
|
+
advance
|
120
|
+
if cchar == '>'
|
121
|
+
advance and capture_token(:sort_operator)
|
122
|
+
else
|
123
|
+
capture_token(:lte)
|
124
|
+
end
|
125
|
+
else
|
126
|
+
capture_token(:lt)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def tokenize_gt_operators
|
131
|
+
advance
|
132
|
+
default_expr_state_transition!
|
133
|
+
case cchar
|
134
|
+
when '>'
|
135
|
+
advance
|
136
|
+
if cchar == '='
|
137
|
+
advance and capture_token(:right_shift_equals)
|
138
|
+
else
|
139
|
+
capture_token(:right_shift)
|
140
|
+
end
|
141
|
+
when '='
|
142
|
+
advance and capture_token(:gte)
|
143
|
+
else
|
144
|
+
capture_token(:gt)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def tokenize_pipe_operators
|
149
|
+
advance
|
150
|
+
case cchar
|
151
|
+
when '|'
|
152
|
+
advance
|
153
|
+
@expr_state = :beg
|
154
|
+
if cchar == '='
|
155
|
+
advance and capture_token(:logical_or_equals)
|
156
|
+
else
|
157
|
+
capture_token(:logical_or)
|
158
|
+
end
|
159
|
+
when '='
|
160
|
+
@expr_state = :beg
|
161
|
+
advance and capture_token(:bitwise_or_equals)
|
162
|
+
else
|
163
|
+
default_expr_state_transition!
|
164
|
+
capture_token(:bitwise_or)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def tokenize_ampersand_operators
|
169
|
+
advance
|
170
|
+
case cchar
|
171
|
+
when '&'
|
172
|
+
advance
|
173
|
+
@expr_state = :beg
|
174
|
+
if cchar == '='
|
175
|
+
advance and capture_token(:logical_and_equals)
|
176
|
+
else
|
177
|
+
capture_token(:logical_and)
|
178
|
+
end
|
179
|
+
when '='
|
180
|
+
@expr_state = :beg
|
181
|
+
advance and capture_token(:bitwise_and_equals)
|
182
|
+
else
|
183
|
+
default_expr_state_transition!
|
184
|
+
capture_token(:bitwise_and)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def tokenize_caret_operators
|
189
|
+
advance
|
190
|
+
if cchar == "="
|
191
|
+
@expr_state = :beg
|
192
|
+
advance and capture_token(:caret_equals)
|
193
|
+
else
|
194
|
+
default_expr_state_transition!
|
195
|
+
capture_token(:caret)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def tokenize_modulo_operators
|
200
|
+
advance
|
201
|
+
if cchar == "="
|
202
|
+
@expr_state = :beg
|
203
|
+
advance and capture_token(:modulo_equals)
|
204
|
+
else
|
205
|
+
default_expr_state_transition!
|
206
|
+
capture_token(:modulo)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Eden
|
2
|
+
module RegexTokenizer
|
3
|
+
def tokenize_potential_regex
|
4
|
+
|
5
|
+
if @expr_state == :beg || @expr_state == :mid || @expr_state == :class
|
6
|
+
return tokenize_regex
|
7
|
+
end
|
8
|
+
|
9
|
+
if peek_ahead_for(/=/)
|
10
|
+
return tokenize_divide_operators
|
11
|
+
end
|
12
|
+
|
13
|
+
if (@expr_state == :arg || @expr == :cmd_arg) && @line.last_token_is_space?
|
14
|
+
return tokenize_regex
|
15
|
+
end
|
16
|
+
|
17
|
+
return tokenize_divide_operators
|
18
|
+
end
|
19
|
+
|
20
|
+
def tokenize_regex
|
21
|
+
advance # Consume the leading /
|
22
|
+
while true
|
23
|
+
if cchar == '/'
|
24
|
+
advance
|
25
|
+
# Capture the regex option
|
26
|
+
advance if cchar =~ /i|o|m|x|n|e|u|s/
|
27
|
+
return capture_token(:regex)
|
28
|
+
end
|
29
|
+
if cchar == nil #end of file
|
30
|
+
raise "Unclosed Regex found"
|
31
|
+
end
|
32
|
+
advance if cchar == '\\'
|
33
|
+
advance
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module Eden
|
2
|
+
module StringTokenizer
|
3
|
+
def tokenize_single_quote_string
|
4
|
+
tokenize_non_expanded_string("'")
|
5
|
+
end
|
6
|
+
|
7
|
+
# If a block is given, it gets run after the final delimiter is detected. The
|
8
|
+
# primary purpose for this is to allow the capture of regex modifiers
|
9
|
+
def tokenize_non_expanded_string( start_delimiter )
|
10
|
+
delimiter_depth = 0
|
11
|
+
matched_delimiter = is_matched_delimiter?( start_delimiter )
|
12
|
+
end_delimiter = find_matching_delimiter( start_delimiter )
|
13
|
+
|
14
|
+
advance # Pass the opening delimiter
|
15
|
+
|
16
|
+
until((cchar == end_delimiter && delimiter_depth == 0) || @i >= @length)
|
17
|
+
|
18
|
+
if matched_delimiter
|
19
|
+
delimiter_depth += 1 if cchar == start_delimiter
|
20
|
+
delimiter_depth -= 1 if cchar == end_delimiter
|
21
|
+
end
|
22
|
+
|
23
|
+
if cchar == '\\'
|
24
|
+
advance(2) # Pass the escaped character
|
25
|
+
else
|
26
|
+
advance
|
27
|
+
end
|
28
|
+
end
|
29
|
+
advance # Pass the closing quote
|
30
|
+
|
31
|
+
if @state == :regex
|
32
|
+
advance if ['i', 'm'].include?( cchar )
|
33
|
+
end
|
34
|
+
|
35
|
+
@expr_state = :end
|
36
|
+
capture_token( @state )
|
37
|
+
end
|
38
|
+
|
39
|
+
def tokenize_backquote_string
|
40
|
+
tokenize_expanded_string( '`' )
|
41
|
+
end
|
42
|
+
|
43
|
+
def tokenize_double_quote_string( in_string_already = false )
|
44
|
+
tokenize_expanded_string('"', in_string_already)
|
45
|
+
end
|
46
|
+
|
47
|
+
def tokenize_expanded_string( start_delimiter, in_string_already = false )
|
48
|
+
saved_state = @state
|
49
|
+
tokens = []
|
50
|
+
end_delimiter = find_matching_delimiter( start_delimiter )
|
51
|
+
advance unless in_string_already # Pass the opening backquote
|
52
|
+
until( cchar == end_delimiter || @i >= @length )
|
53
|
+
if cchar == '\\'
|
54
|
+
advance(2) # Pass the escaped character
|
55
|
+
elsif cchar == '#'
|
56
|
+
advance # include the # character in the string
|
57
|
+
case cchar
|
58
|
+
when '{'
|
59
|
+
@interpolating.push( @state )
|
60
|
+
@delimiters.push( start_delimiter )
|
61
|
+
tokens << Token.new( @state, thunk )
|
62
|
+
reset_thunk!
|
63
|
+
@state = :lcurly
|
64
|
+
tokens << tokenize_single_character
|
65
|
+
return tokens
|
66
|
+
when '@'
|
67
|
+
tokens << capture_token( @state )
|
68
|
+
if peek_ahead_for('@')
|
69
|
+
tokens << tokenize_classvar
|
70
|
+
else
|
71
|
+
tokens << tokenize_instancevar
|
72
|
+
end
|
73
|
+
@state = saved_state
|
74
|
+
when '$'
|
75
|
+
tokens << capture_token( @state )
|
76
|
+
tokens << tokenize_globalvar
|
77
|
+
@state = saved_state
|
78
|
+
end
|
79
|
+
else
|
80
|
+
advance
|
81
|
+
end
|
82
|
+
end
|
83
|
+
advance # Pass the closing delimiter
|
84
|
+
if @state == :regex
|
85
|
+
advance if ['i', 'm'].include?( cchar )
|
86
|
+
end
|
87
|
+
@expr_state = :end
|
88
|
+
tokens << capture_token( @state )
|
89
|
+
return tokens
|
90
|
+
end
|
91
|
+
|
92
|
+
# Called from tokenize_lt_operators when it identifies that
|
93
|
+
# << is a heredoc delimiter. Expects that '<<' will already
|
94
|
+
# be included in the current thunk.
|
95
|
+
def tokenize_heredoc_delimiter
|
96
|
+
offset = 2
|
97
|
+
if cchar == '-'
|
98
|
+
advance
|
99
|
+
offset = 3
|
100
|
+
end
|
101
|
+
|
102
|
+
if cchar =~ /[A-Za-z_]/
|
103
|
+
advance
|
104
|
+
advance until /[A-Za-z0-9_]/.match( cchar ).nil?
|
105
|
+
elsif /['"`]/.match(cchar)
|
106
|
+
advance_through_quoted_delimiter(cchar)
|
107
|
+
else
|
108
|
+
return nil
|
109
|
+
end
|
110
|
+
@heredoc_delimiter = thunk[offset..-1]
|
111
|
+
capture_token( :heredoc_delimiter )
|
112
|
+
end
|
113
|
+
|
114
|
+
def tokenize_heredoc_body
|
115
|
+
if @heredoc_delimiter
|
116
|
+
advance until (@sf.source[@i, @heredoc_delimiter.length ] == @heredoc_delimiter &&
|
117
|
+
@sf.source[@i, @heredoc_delimiter.length+1 ] == @heredoc_delimiter + "\n" ||
|
118
|
+
@sf.source[@i, @heredoc_delimiter.length+2 ] == @heredoc_delimiter + "\r\n") ||
|
119
|
+
@i >= @length
|
120
|
+
end
|
121
|
+
@heredoc_delimiter.length.times { advance }
|
122
|
+
@heredoc_delimiter = nil
|
123
|
+
capture_token( :heredoc_body )
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
# Returns the matching delimiter for the 4 "paired" delimiters
|
128
|
+
def find_matching_delimiter( start_delimiter )
|
129
|
+
case start_delimiter
|
130
|
+
when '{' then '}'
|
131
|
+
when '(' then ')'
|
132
|
+
when '[' then ']'
|
133
|
+
when '<' then '>'
|
134
|
+
else
|
135
|
+
start_delimiter
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def is_matched_delimiter?( cchar )
|
140
|
+
!! /[{\(\[<]/.match(cchar)
|
141
|
+
end
|
142
|
+
|
143
|
+
def advance_through_quoted_delimiter( delimiter )
|
144
|
+
advance
|
145
|
+
advance until cchar == delimiter
|
146
|
+
advance
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|