eden 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/CHANGELOG +4 -0
  2. data/LICENSE +20 -0
  3. data/README.md +48 -0
  4. data/Rakefile +10 -0
  5. data/bin/eden +132 -0
  6. data/lib/eden.rb +10 -0
  7. data/lib/eden/defaults.rb +26 -0
  8. data/lib/eden/formatter.rb +25 -0
  9. data/lib/eden/formatters/block_formatter.rb +45 -0
  10. data/lib/eden/formatters/indenter.rb +91 -0
  11. data/lib/eden/formatters/white_space_cleaner.rb +14 -0
  12. data/lib/eden/line.rb +65 -0
  13. data/lib/eden/source_file.rb +32 -0
  14. data/lib/eden/token.rb +62 -0
  15. data/lib/eden/tokenizer.rb +259 -0
  16. data/lib/eden/tokenizers/basic_tokenizer.rb +167 -0
  17. data/lib/eden/tokenizers/delimited_literal_tokenizer.rb +38 -0
  18. data/lib/eden/tokenizers/number_tokenizer.rb +68 -0
  19. data/lib/eden/tokenizers/operator_tokenizer.rb +211 -0
  20. data/lib/eden/tokenizers/regex_tokenizer.rb +37 -0
  21. data/lib/eden/tokenizers/string_tokenizer.rb +149 -0
  22. data/test/array_literal_tokenization_test.rb +43 -0
  23. data/test/basic_tokenization_test.rb +29 -0
  24. data/test/block_formatter_test.rb +47 -0
  25. data/test/class_var_token_test.rb +21 -0
  26. data/test/identifier_token_test.rb +140 -0
  27. data/test/indenter_test.rb +314 -0
  28. data/test/instance_var_token_test.rb +48 -0
  29. data/test/number_tokenization_test.rb +83 -0
  30. data/test/operator_tokenization_test.rb +180 -0
  31. data/test/regex_tokenization_test.rb +68 -0
  32. data/test/single_character_tokenization_test.rb +87 -0
  33. data/test/string_tokenization_test.rb +291 -0
  34. data/test/symbol_tokenization_test.rb +64 -0
  35. data/test/test_helper.rb +13 -0
  36. data/test/white_space_cleaner_test.rb +35 -0
  37. data/test/whitespace_token_test.rb +63 -0
  38. metadata +108 -0
@@ -0,0 +1,38 @@
1
+ module Eden
2
+ module DelimitedLiteralTokenizer
3
+ def tokenize_delimited_literal
4
+ advance # Pass the %
5
+
6
+ if( /[^A-Za-z0-9]/.match( cchar ) )
7
+ def_char = 'Q'
8
+ @state = :double_q_string
9
+ elsif( /[qQswWrx]/.match( cchar) )
10
+ def_char = cchar
11
+ @state = infer_delimited_literal_type
12
+ advance
13
+ else
14
+ raise "Invalid delimiter character"
15
+ end
16
+
17
+ case def_char
18
+ when 'r', 'Q', 'W', 'x'
19
+ token = tokenize_expanded_string( cchar )
20
+ when 's', 'q', 'w'
21
+ token = tokenize_non_expanded_string( cchar )
22
+ end
23
+
24
+ return token
25
+ end
26
+
27
+ def infer_delimited_literal_type
28
+ case cchar
29
+ when 's' then :symbol
30
+ when 'w', 'W' then :array_literal
31
+ when 'q' then :single_q_string
32
+ when 'Q' then :double_q_string
33
+ when 'r' then :regex
34
+ when 'x' then :backquote_string
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,68 @@
1
+ module Eden
2
+ module NumberTokenizer
3
+
4
+ # Tokenize a non-decimal integer literal - e.g. Ox4F2E, 0b1101
5
+ def tokenize_integer_literal
6
+ @expr_state = :end
7
+ if peek_ahead_for(/[_oObBxX]/)
8
+ advance(2) # Pass 0x / 0b / 0O
9
+ else
10
+ advance # Pass 0 for Octal digits
11
+ end
12
+ pattern = {:bin_literal => /[01]/,
13
+ :oct_literal => /[0-7]/,
14
+ :hex_literal => /[0-9a-fA-F]/}[@state]
15
+ advance until( pattern.match( cchar ).nil? )
16
+ capture_token( @state )
17
+ end
18
+
19
+ def tokenize_decimal_literal
20
+ @expr_state = :end
21
+ # Capture the sign
22
+ advance if cchar == '+' || cchar == '-'
23
+
24
+ # Handle a lone zero
25
+ if cchar == '0' && !peek_ahead_for(/[dD]/)
26
+ advance
27
+ return capture_token( :dec_literal )
28
+ end
29
+
30
+ # Handle 0d1234 digits
31
+ advance(2) if cchar == '0' && peek_ahead_for(/[dD]/)
32
+
33
+ until( /[0-9.eE]/.match( cchar ).nil? )
34
+ case cchar
35
+ when '.'
36
+ return tokenize_float_literal
37
+ when 'e', 'E'
38
+ return tokenize_exponent_literal
39
+ when '0'..'9'
40
+ advance
41
+ else
42
+ end
43
+ end
44
+ capture_token( :dec_literal )
45
+ end
46
+
47
+ # Tokenize a literal with an exponent - e.g. 3.4E+22
48
+ def tokenize_exponent_literal
49
+ advance # Pass the e/E
50
+ advance if cchar == '+' or cchar == '-'
51
+ advance until( /[0-9]/.match( cchar ).nil? )
52
+ capture_token( :exp_literal )
53
+ end
54
+
55
+ # Tokenize a float literal - e.g. 2.0, 2.1101
56
+ def tokenize_float_literal
57
+ advance # Pass the .
58
+
59
+ until( /[0-9eE]/.match( cchar ).nil? )
60
+ if cchar == 'e' || cchar == 'E'
61
+ return tokenize_exponent_literal
62
+ end
63
+ advance
64
+ end
65
+ capture_token( :float_literal )
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,211 @@
1
+ module Eden
2
+ module OperatorTokenizer
3
+ def tokenize_equals_operators
4
+ advance
5
+ default_expr_state_transition!
6
+ case cchar
7
+ when '>'
8
+ advance and capture_token(:hash_rocket)
9
+ when "~"
10
+ advance and capture_token(:matches)
11
+ when '='
12
+ advance
13
+ if cchar == '='
14
+ advance and capture_token(:identity_equality)
15
+ else
16
+ capture_token(:equality)
17
+ end
18
+ else
19
+ capture_token(:equals)
20
+ end
21
+ end
22
+
23
+ def tokenize_bang_operators
24
+ @expr_state = :beg
25
+ advance
26
+ case cchar
27
+ when '='
28
+ advance and capture_token(:not_equals)
29
+ when '~'
30
+ advance and capture_token(:not_matches)
31
+ else
32
+ capture_token(:logical_not)
33
+ end
34
+ end
35
+
36
+ def tokenize_plus_operators
37
+ advance
38
+ case cchar
39
+ when '='
40
+ @expr_state = :beg
41
+ advance and capture_token(:plus_equals)
42
+ when '@'
43
+ @expr_state = :arg if [:fname, :dot].include?(@expr_state)
44
+ advance and capture_token(:plus_at)
45
+ else
46
+ @expr_state = :beg
47
+ capture_token(:plus)
48
+ end
49
+ end
50
+
51
+ def tokenize_minus_operators
52
+ advance
53
+ case cchar
54
+ when '='
55
+ @expr_state = :beg
56
+ advance and capture_token(:minus_equals)
57
+ when '@'
58
+ @expr_state = :arg if [:fname, :dot].include?(@expr_state)
59
+ advance and capture_token(:minus_at)
60
+ else
61
+ @expr_state = :beg
62
+ capture_token(:minus)
63
+ end
64
+ end
65
+
66
+ def tokenize_multiply_operators
67
+ advance
68
+ case cchar
69
+ when '*'
70
+ @expr_state = :beg
71
+ advance
72
+ if cchar == '='
73
+ advance and capture_token(:exponent_equals)
74
+ else
75
+ capture_token(:exponent)
76
+ end
77
+ when '='
78
+ @expr_state = :beg
79
+ advance and capture_token(:multiply_equals)
80
+ else
81
+ # TODO: Add logic to disamiguate between splat and multiply
82
+ default_expr_state_transition!
83
+ capture_token(:multiply)
84
+ end
85
+ end
86
+
87
+ # Tokenizes operators using the \ character, after tokenize_potential_regex
88
+ # has decided that it's not the start of a regex.
89
+ def tokenize_divide_operators
90
+ advance
91
+ if cchar == '='
92
+ @expr_state = :beg
93
+ advance and capture_token(:divide_equals)
94
+ else
95
+ default_expr_state_transition!
96
+ capture_token(:divide)
97
+ end
98
+ end
99
+
100
+ def tokenize_lt_operators
101
+ advance
102
+ default_expr_state_transition!
103
+ case cchar
104
+ when '<'
105
+ advance
106
+
107
+ if ![:end, :dot, :end_arg, :class].include?(@expr_state) && cchar != ' '
108
+ token = tokenize_heredoc_delimiter
109
+ return token if token
110
+ end
111
+
112
+ if cchar == '='
113
+ @expr_state = :beg
114
+ advance and capture_token(:left_shift_equals)
115
+ else
116
+ capture_token(:left_shift)
117
+ end
118
+ when '='
119
+ advance
120
+ if cchar == '>'
121
+ advance and capture_token(:sort_operator)
122
+ else
123
+ capture_token(:lte)
124
+ end
125
+ else
126
+ capture_token(:lt)
127
+ end
128
+ end
129
+
130
+ def tokenize_gt_operators
131
+ advance
132
+ default_expr_state_transition!
133
+ case cchar
134
+ when '>'
135
+ advance
136
+ if cchar == '='
137
+ advance and capture_token(:right_shift_equals)
138
+ else
139
+ capture_token(:right_shift)
140
+ end
141
+ when '='
142
+ advance and capture_token(:gte)
143
+ else
144
+ capture_token(:gt)
145
+ end
146
+ end
147
+
148
+ def tokenize_pipe_operators
149
+ advance
150
+ case cchar
151
+ when '|'
152
+ advance
153
+ @expr_state = :beg
154
+ if cchar == '='
155
+ advance and capture_token(:logical_or_equals)
156
+ else
157
+ capture_token(:logical_or)
158
+ end
159
+ when '='
160
+ @expr_state = :beg
161
+ advance and capture_token(:bitwise_or_equals)
162
+ else
163
+ default_expr_state_transition!
164
+ capture_token(:bitwise_or)
165
+ end
166
+ end
167
+
168
+ def tokenize_ampersand_operators
169
+ advance
170
+ case cchar
171
+ when '&'
172
+ advance
173
+ @expr_state = :beg
174
+ if cchar == '='
175
+ advance and capture_token(:logical_and_equals)
176
+ else
177
+ capture_token(:logical_and)
178
+ end
179
+ when '='
180
+ @expr_state = :beg
181
+ advance and capture_token(:bitwise_and_equals)
182
+ else
183
+ default_expr_state_transition!
184
+ capture_token(:bitwise_and)
185
+ end
186
+ end
187
+
188
+ def tokenize_caret_operators
189
+ advance
190
+ if cchar == "="
191
+ @expr_state = :beg
192
+ advance and capture_token(:caret_equals)
193
+ else
194
+ default_expr_state_transition!
195
+ capture_token(:caret)
196
+ end
197
+ end
198
+
199
+ def tokenize_modulo_operators
200
+ advance
201
+ if cchar == "="
202
+ @expr_state = :beg
203
+ advance and capture_token(:modulo_equals)
204
+ else
205
+ default_expr_state_transition!
206
+ capture_token(:modulo)
207
+ end
208
+ end
209
+
210
+ end
211
+ end
@@ -0,0 +1,37 @@
1
+ module Eden
2
+ module RegexTokenizer
3
+ def tokenize_potential_regex
4
+
5
+ if @expr_state == :beg || @expr_state == :mid || @expr_state == :class
6
+ return tokenize_regex
7
+ end
8
+
9
+ if peek_ahead_for(/=/)
10
+ return tokenize_divide_operators
11
+ end
12
+
13
+ if (@expr_state == :arg || @expr == :cmd_arg) && @line.last_token_is_space?
14
+ return tokenize_regex
15
+ end
16
+
17
+ return tokenize_divide_operators
18
+ end
19
+
20
+ def tokenize_regex
21
+ advance # Consume the leading /
22
+ while true
23
+ if cchar == '/'
24
+ advance
25
+ # Capture the regex option
26
+ advance if cchar =~ /i|o|m|x|n|e|u|s/
27
+ return capture_token(:regex)
28
+ end
29
+ if cchar == nil #end of file
30
+ raise "Unclosed Regex found"
31
+ end
32
+ advance if cchar == '\\'
33
+ advance
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,149 @@
1
+ module Eden
2
+ module StringTokenizer
3
+ def tokenize_single_quote_string
4
+ tokenize_non_expanded_string("'")
5
+ end
6
+
7
+ # If a block is given, it gets run after the final delimiter is detected. The
8
+ # primary purpose for this is to allow the capture of regex modifiers
9
+ def tokenize_non_expanded_string( start_delimiter )
10
+ delimiter_depth = 0
11
+ matched_delimiter = is_matched_delimiter?( start_delimiter )
12
+ end_delimiter = find_matching_delimiter( start_delimiter )
13
+
14
+ advance # Pass the opening delimiter
15
+
16
+ until((cchar == end_delimiter && delimiter_depth == 0) || @i >= @length)
17
+
18
+ if matched_delimiter
19
+ delimiter_depth += 1 if cchar == start_delimiter
20
+ delimiter_depth -= 1 if cchar == end_delimiter
21
+ end
22
+
23
+ if cchar == '\\'
24
+ advance(2) # Pass the escaped character
25
+ else
26
+ advance
27
+ end
28
+ end
29
+ advance # Pass the closing quote
30
+
31
+ if @state == :regex
32
+ advance if ['i', 'm'].include?( cchar )
33
+ end
34
+
35
+ @expr_state = :end
36
+ capture_token( @state )
37
+ end
38
+
39
+ def tokenize_backquote_string
40
+ tokenize_expanded_string( '`' )
41
+ end
42
+
43
+ def tokenize_double_quote_string( in_string_already = false )
44
+ tokenize_expanded_string('"', in_string_already)
45
+ end
46
+
47
+ def tokenize_expanded_string( start_delimiter, in_string_already = false )
48
+ saved_state = @state
49
+ tokens = []
50
+ end_delimiter = find_matching_delimiter( start_delimiter )
51
+ advance unless in_string_already # Pass the opening backquote
52
+ until( cchar == end_delimiter || @i >= @length )
53
+ if cchar == '\\'
54
+ advance(2) # Pass the escaped character
55
+ elsif cchar == '#'
56
+ advance # include the # character in the string
57
+ case cchar
58
+ when '{'
59
+ @interpolating.push( @state )
60
+ @delimiters.push( start_delimiter )
61
+ tokens << Token.new( @state, thunk )
62
+ reset_thunk!
63
+ @state = :lcurly
64
+ tokens << tokenize_single_character
65
+ return tokens
66
+ when '@'
67
+ tokens << capture_token( @state )
68
+ if peek_ahead_for('@')
69
+ tokens << tokenize_classvar
70
+ else
71
+ tokens << tokenize_instancevar
72
+ end
73
+ @state = saved_state
74
+ when '$'
75
+ tokens << capture_token( @state )
76
+ tokens << tokenize_globalvar
77
+ @state = saved_state
78
+ end
79
+ else
80
+ advance
81
+ end
82
+ end
83
+ advance # Pass the closing delimiter
84
+ if @state == :regex
85
+ advance if ['i', 'm'].include?( cchar )
86
+ end
87
+ @expr_state = :end
88
+ tokens << capture_token( @state )
89
+ return tokens
90
+ end
91
+
92
+ # Called from tokenize_lt_operators when it identifies that
93
+ # << is a heredoc delimiter. Expects that '<<' will already
94
+ # be included in the current thunk.
95
+ def tokenize_heredoc_delimiter
96
+ offset = 2
97
+ if cchar == '-'
98
+ advance
99
+ offset = 3
100
+ end
101
+
102
+ if cchar =~ /[A-Za-z_]/
103
+ advance
104
+ advance until /[A-Za-z0-9_]/.match( cchar ).nil?
105
+ elsif /['"`]/.match(cchar)
106
+ advance_through_quoted_delimiter(cchar)
107
+ else
108
+ return nil
109
+ end
110
+ @heredoc_delimiter = thunk[offset..-1]
111
+ capture_token( :heredoc_delimiter )
112
+ end
113
+
114
+ def tokenize_heredoc_body
115
+ if @heredoc_delimiter
116
+ advance until (@sf.source[@i, @heredoc_delimiter.length ] == @heredoc_delimiter &&
117
+ @sf.source[@i, @heredoc_delimiter.length+1 ] == @heredoc_delimiter + "\n" ||
118
+ @sf.source[@i, @heredoc_delimiter.length+2 ] == @heredoc_delimiter + "\r\n") ||
119
+ @i >= @length
120
+ end
121
+ @heredoc_delimiter.length.times { advance }
122
+ @heredoc_delimiter = nil
123
+ capture_token( :heredoc_body )
124
+ end
125
+
126
+ private
127
+ # Returns the matching delimiter for the 4 "paired" delimiters
128
+ def find_matching_delimiter( start_delimiter )
129
+ case start_delimiter
130
+ when '{' then '}'
131
+ when '(' then ')'
132
+ when '[' then ']'
133
+ when '<' then '>'
134
+ else
135
+ start_delimiter
136
+ end
137
+ end
138
+
139
+ def is_matched_delimiter?( cchar )
140
+ !! /[{\(\[<]/.match(cchar)
141
+ end
142
+
143
+ def advance_through_quoted_delimiter( delimiter )
144
+ advance
145
+ advance until cchar == delimiter
146
+ advance
147
+ end
148
+ end
149
+ end