eden 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/CHANGELOG +4 -0
  2. data/LICENSE +20 -0
  3. data/README.md +48 -0
  4. data/Rakefile +10 -0
  5. data/bin/eden +132 -0
  6. data/lib/eden.rb +10 -0
  7. data/lib/eden/defaults.rb +26 -0
  8. data/lib/eden/formatter.rb +25 -0
  9. data/lib/eden/formatters/block_formatter.rb +45 -0
  10. data/lib/eden/formatters/indenter.rb +91 -0
  11. data/lib/eden/formatters/white_space_cleaner.rb +14 -0
  12. data/lib/eden/line.rb +65 -0
  13. data/lib/eden/source_file.rb +32 -0
  14. data/lib/eden/token.rb +62 -0
  15. data/lib/eden/tokenizer.rb +259 -0
  16. data/lib/eden/tokenizers/basic_tokenizer.rb +167 -0
  17. data/lib/eden/tokenizers/delimited_literal_tokenizer.rb +38 -0
  18. data/lib/eden/tokenizers/number_tokenizer.rb +68 -0
  19. data/lib/eden/tokenizers/operator_tokenizer.rb +211 -0
  20. data/lib/eden/tokenizers/regex_tokenizer.rb +37 -0
  21. data/lib/eden/tokenizers/string_tokenizer.rb +149 -0
  22. data/test/array_literal_tokenization_test.rb +43 -0
  23. data/test/basic_tokenization_test.rb +29 -0
  24. data/test/block_formatter_test.rb +47 -0
  25. data/test/class_var_token_test.rb +21 -0
  26. data/test/identifier_token_test.rb +140 -0
  27. data/test/indenter_test.rb +314 -0
  28. data/test/instance_var_token_test.rb +48 -0
  29. data/test/number_tokenization_test.rb +83 -0
  30. data/test/operator_tokenization_test.rb +180 -0
  31. data/test/regex_tokenization_test.rb +68 -0
  32. data/test/single_character_tokenization_test.rb +87 -0
  33. data/test/string_tokenization_test.rb +291 -0
  34. data/test/symbol_tokenization_test.rb +64 -0
  35. data/test/test_helper.rb +13 -0
  36. data/test/white_space_cleaner_test.rb +35 -0
  37. data/test/whitespace_token_test.rb +63 -0
  38. metadata +108 -0
@@ -0,0 +1,38 @@
1
+ module Eden
2
+ module DelimitedLiteralTokenizer
3
+ def tokenize_delimited_literal
4
+ advance # Pass the %
5
+
6
+ if( /[^A-Za-z0-9]/.match( cchar ) )
7
+ def_char = 'Q'
8
+ @state = :double_q_string
9
+ elsif( /[qQswWrx]/.match( cchar) )
10
+ def_char = cchar
11
+ @state = infer_delimited_literal_type
12
+ advance
13
+ else
14
+ raise "Invalid delimiter character"
15
+ end
16
+
17
+ case def_char
18
+ when 'r', 'Q', 'W', 'x'
19
+ token = tokenize_expanded_string( cchar )
20
+ when 's', 'q', 'w'
21
+ token = tokenize_non_expanded_string( cchar )
22
+ end
23
+
24
+ return token
25
+ end
26
+
27
+ def infer_delimited_literal_type
28
+ case cchar
29
+ when 's' then :symbol
30
+ when 'w', 'W' then :array_literal
31
+ when 'q' then :single_q_string
32
+ when 'Q' then :double_q_string
33
+ when 'r' then :regex
34
+ when 'x' then :backquote_string
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,68 @@
1
+ module Eden
2
+ module NumberTokenizer
3
+
4
+ # Tokenize a non-decimal integer literal - e.g. Ox4F2E, 0b1101
5
+ def tokenize_integer_literal
6
+ @expr_state = :end
7
+ if peek_ahead_for(/[_oObBxX]/)
8
+ advance(2) # Pass 0x / 0b / 0O
9
+ else
10
+ advance # Pass 0 for Octal digits
11
+ end
12
+ pattern = {:bin_literal => /[01]/,
13
+ :oct_literal => /[0-7]/,
14
+ :hex_literal => /[0-9a-fA-F]/}[@state]
15
+ advance until( pattern.match( cchar ).nil? )
16
+ capture_token( @state )
17
+ end
18
+
19
+ def tokenize_decimal_literal
20
+ @expr_state = :end
21
+ # Capture the sign
22
+ advance if cchar == '+' || cchar == '-'
23
+
24
+ # Handle a lone zero
25
+ if cchar == '0' && !peek_ahead_for(/[dD]/)
26
+ advance
27
+ return capture_token( :dec_literal )
28
+ end
29
+
30
+ # Handle 0d1234 digits
31
+ advance(2) if cchar == '0' && peek_ahead_for(/[dD]/)
32
+
33
+ until( /[0-9.eE]/.match( cchar ).nil? )
34
+ case cchar
35
+ when '.'
36
+ return tokenize_float_literal
37
+ when 'e', 'E'
38
+ return tokenize_exponent_literal
39
+ when '0'..'9'
40
+ advance
41
+ else
42
+ end
43
+ end
44
+ capture_token( :dec_literal )
45
+ end
46
+
47
+ # Tokenize a literal with an exponent - e.g. 3.4E+22
48
+ def tokenize_exponent_literal
49
+ advance # Pass the e/E
50
+ advance if cchar == '+' or cchar == '-'
51
+ advance until( /[0-9]/.match( cchar ).nil? )
52
+ capture_token( :exp_literal )
53
+ end
54
+
55
+ # Tokenize a float literal - e.g. 2.0, 2.1101
56
+ def tokenize_float_literal
57
+ advance # Pass the .
58
+
59
+ until( /[0-9eE]/.match( cchar ).nil? )
60
+ if cchar == 'e' || cchar == 'E'
61
+ return tokenize_exponent_literal
62
+ end
63
+ advance
64
+ end
65
+ capture_token( :float_literal )
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,211 @@
1
+ module Eden
2
+ module OperatorTokenizer
3
+ def tokenize_equals_operators
4
+ advance
5
+ default_expr_state_transition!
6
+ case cchar
7
+ when '>'
8
+ advance and capture_token(:hash_rocket)
9
+ when "~"
10
+ advance and capture_token(:matches)
11
+ when '='
12
+ advance
13
+ if cchar == '='
14
+ advance and capture_token(:identity_equality)
15
+ else
16
+ capture_token(:equality)
17
+ end
18
+ else
19
+ capture_token(:equals)
20
+ end
21
+ end
22
+
23
+ def tokenize_bang_operators
24
+ @expr_state = :beg
25
+ advance
26
+ case cchar
27
+ when '='
28
+ advance and capture_token(:not_equals)
29
+ when '~'
30
+ advance and capture_token(:not_matches)
31
+ else
32
+ capture_token(:logical_not)
33
+ end
34
+ end
35
+
36
+ def tokenize_plus_operators
37
+ advance
38
+ case cchar
39
+ when '='
40
+ @expr_state = :beg
41
+ advance and capture_token(:plus_equals)
42
+ when '@'
43
+ @expr_state = :arg if [:fname, :dot].include?(@expr_state)
44
+ advance and capture_token(:plus_at)
45
+ else
46
+ @expr_state = :beg
47
+ capture_token(:plus)
48
+ end
49
+ end
50
+
51
+ def tokenize_minus_operators
52
+ advance
53
+ case cchar
54
+ when '='
55
+ @expr_state = :beg
56
+ advance and capture_token(:minus_equals)
57
+ when '@'
58
+ @expr_state = :arg if [:fname, :dot].include?(@expr_state)
59
+ advance and capture_token(:minus_at)
60
+ else
61
+ @expr_state = :beg
62
+ capture_token(:minus)
63
+ end
64
+ end
65
+
66
+ def tokenize_multiply_operators
67
+ advance
68
+ case cchar
69
+ when '*'
70
+ @expr_state = :beg
71
+ advance
72
+ if cchar == '='
73
+ advance and capture_token(:exponent_equals)
74
+ else
75
+ capture_token(:exponent)
76
+ end
77
+ when '='
78
+ @expr_state = :beg
79
+ advance and capture_token(:multiply_equals)
80
+ else
81
+ # TODO: Add logic to disamiguate between splat and multiply
82
+ default_expr_state_transition!
83
+ capture_token(:multiply)
84
+ end
85
+ end
86
+
87
+ # Tokenizes operators using the \ character, after tokenize_potential_regex
88
+ # has decided that it's not the start of a regex.
89
+ def tokenize_divide_operators
90
+ advance
91
+ if cchar == '='
92
+ @expr_state = :beg
93
+ advance and capture_token(:divide_equals)
94
+ else
95
+ default_expr_state_transition!
96
+ capture_token(:divide)
97
+ end
98
+ end
99
+
100
+ def tokenize_lt_operators
101
+ advance
102
+ default_expr_state_transition!
103
+ case cchar
104
+ when '<'
105
+ advance
106
+
107
+ if ![:end, :dot, :end_arg, :class].include?(@expr_state) && cchar != ' '
108
+ token = tokenize_heredoc_delimiter
109
+ return token if token
110
+ end
111
+
112
+ if cchar == '='
113
+ @expr_state = :beg
114
+ advance and capture_token(:left_shift_equals)
115
+ else
116
+ capture_token(:left_shift)
117
+ end
118
+ when '='
119
+ advance
120
+ if cchar == '>'
121
+ advance and capture_token(:sort_operator)
122
+ else
123
+ capture_token(:lte)
124
+ end
125
+ else
126
+ capture_token(:lt)
127
+ end
128
+ end
129
+
130
+ def tokenize_gt_operators
131
+ advance
132
+ default_expr_state_transition!
133
+ case cchar
134
+ when '>'
135
+ advance
136
+ if cchar == '='
137
+ advance and capture_token(:right_shift_equals)
138
+ else
139
+ capture_token(:right_shift)
140
+ end
141
+ when '='
142
+ advance and capture_token(:gte)
143
+ else
144
+ capture_token(:gt)
145
+ end
146
+ end
147
+
148
+ def tokenize_pipe_operators
149
+ advance
150
+ case cchar
151
+ when '|'
152
+ advance
153
+ @expr_state = :beg
154
+ if cchar == '='
155
+ advance and capture_token(:logical_or_equals)
156
+ else
157
+ capture_token(:logical_or)
158
+ end
159
+ when '='
160
+ @expr_state = :beg
161
+ advance and capture_token(:bitwise_or_equals)
162
+ else
163
+ default_expr_state_transition!
164
+ capture_token(:bitwise_or)
165
+ end
166
+ end
167
+
168
+ def tokenize_ampersand_operators
169
+ advance
170
+ case cchar
171
+ when '&'
172
+ advance
173
+ @expr_state = :beg
174
+ if cchar == '='
175
+ advance and capture_token(:logical_and_equals)
176
+ else
177
+ capture_token(:logical_and)
178
+ end
179
+ when '='
180
+ @expr_state = :beg
181
+ advance and capture_token(:bitwise_and_equals)
182
+ else
183
+ default_expr_state_transition!
184
+ capture_token(:bitwise_and)
185
+ end
186
+ end
187
+
188
+ def tokenize_caret_operators
189
+ advance
190
+ if cchar == "="
191
+ @expr_state = :beg
192
+ advance and capture_token(:caret_equals)
193
+ else
194
+ default_expr_state_transition!
195
+ capture_token(:caret)
196
+ end
197
+ end
198
+
199
+ def tokenize_modulo_operators
200
+ advance
201
+ if cchar == "="
202
+ @expr_state = :beg
203
+ advance and capture_token(:modulo_equals)
204
+ else
205
+ default_expr_state_transition!
206
+ capture_token(:modulo)
207
+ end
208
+ end
209
+
210
+ end
211
+ end
@@ -0,0 +1,37 @@
1
+ module Eden
2
+ module RegexTokenizer
3
+ def tokenize_potential_regex
4
+
5
+ if @expr_state == :beg || @expr_state == :mid || @expr_state == :class
6
+ return tokenize_regex
7
+ end
8
+
9
+ if peek_ahead_for(/=/)
10
+ return tokenize_divide_operators
11
+ end
12
+
13
+ if (@expr_state == :arg || @expr == :cmd_arg) && @line.last_token_is_space?
14
+ return tokenize_regex
15
+ end
16
+
17
+ return tokenize_divide_operators
18
+ end
19
+
20
+ def tokenize_regex
21
+ advance # Consume the leading /
22
+ while true
23
+ if cchar == '/'
24
+ advance
25
+ # Capture the regex option
26
+ advance if cchar =~ /i|o|m|x|n|e|u|s/
27
+ return capture_token(:regex)
28
+ end
29
+ if cchar == nil #end of file
30
+ raise "Unclosed Regex found"
31
+ end
32
+ advance if cchar == '\\'
33
+ advance
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,149 @@
1
+ module Eden
2
+ module StringTokenizer
3
+ def tokenize_single_quote_string
4
+ tokenize_non_expanded_string("'")
5
+ end
6
+
7
+ # If a block is given, it gets run after the final delimiter is detected. The
8
+ # primary purpose for this is to allow the capture of regex modifiers
9
+ def tokenize_non_expanded_string( start_delimiter )
10
+ delimiter_depth = 0
11
+ matched_delimiter = is_matched_delimiter?( start_delimiter )
12
+ end_delimiter = find_matching_delimiter( start_delimiter )
13
+
14
+ advance # Pass the opening delimiter
15
+
16
+ until((cchar == end_delimiter && delimiter_depth == 0) || @i >= @length)
17
+
18
+ if matched_delimiter
19
+ delimiter_depth += 1 if cchar == start_delimiter
20
+ delimiter_depth -= 1 if cchar == end_delimiter
21
+ end
22
+
23
+ if cchar == '\\'
24
+ advance(2) # Pass the escaped character
25
+ else
26
+ advance
27
+ end
28
+ end
29
+ advance # Pass the closing quote
30
+
31
+ if @state == :regex
32
+ advance if ['i', 'm'].include?( cchar )
33
+ end
34
+
35
+ @expr_state = :end
36
+ capture_token( @state )
37
+ end
38
+
39
+ def tokenize_backquote_string
40
+ tokenize_expanded_string( '`' )
41
+ end
42
+
43
+ def tokenize_double_quote_string( in_string_already = false )
44
+ tokenize_expanded_string('"', in_string_already)
45
+ end
46
+
47
+ def tokenize_expanded_string( start_delimiter, in_string_already = false )
48
+ saved_state = @state
49
+ tokens = []
50
+ end_delimiter = find_matching_delimiter( start_delimiter )
51
+ advance unless in_string_already # Pass the opening backquote
52
+ until( cchar == end_delimiter || @i >= @length )
53
+ if cchar == '\\'
54
+ advance(2) # Pass the escaped character
55
+ elsif cchar == '#'
56
+ advance # include the # character in the string
57
+ case cchar
58
+ when '{'
59
+ @interpolating.push( @state )
60
+ @delimiters.push( start_delimiter )
61
+ tokens << Token.new( @state, thunk )
62
+ reset_thunk!
63
+ @state = :lcurly
64
+ tokens << tokenize_single_character
65
+ return tokens
66
+ when '@'
67
+ tokens << capture_token( @state )
68
+ if peek_ahead_for('@')
69
+ tokens << tokenize_classvar
70
+ else
71
+ tokens << tokenize_instancevar
72
+ end
73
+ @state = saved_state
74
+ when '$'
75
+ tokens << capture_token( @state )
76
+ tokens << tokenize_globalvar
77
+ @state = saved_state
78
+ end
79
+ else
80
+ advance
81
+ end
82
+ end
83
+ advance # Pass the closing delimiter
84
+ if @state == :regex
85
+ advance if ['i', 'm'].include?( cchar )
86
+ end
87
+ @expr_state = :end
88
+ tokens << capture_token( @state )
89
+ return tokens
90
+ end
91
+
92
+ # Called from tokenize_lt_operators when it identifies that
93
+ # << is a heredoc delimiter. Expects that '<<' will already
94
+ # be included in the current thunk.
95
+ def tokenize_heredoc_delimiter
96
+ offset = 2
97
+ if cchar == '-'
98
+ advance
99
+ offset = 3
100
+ end
101
+
102
+ if cchar =~ /[A-Za-z_]/
103
+ advance
104
+ advance until /[A-Za-z0-9_]/.match( cchar ).nil?
105
+ elsif /['"`]/.match(cchar)
106
+ advance_through_quoted_delimiter(cchar)
107
+ else
108
+ return nil
109
+ end
110
+ @heredoc_delimiter = thunk[offset..-1]
111
+ capture_token( :heredoc_delimiter )
112
+ end
113
+
114
+ def tokenize_heredoc_body
115
+ if @heredoc_delimiter
116
+ advance until (@sf.source[@i, @heredoc_delimiter.length ] == @heredoc_delimiter &&
117
+ @sf.source[@i, @heredoc_delimiter.length+1 ] == @heredoc_delimiter + "\n" ||
118
+ @sf.source[@i, @heredoc_delimiter.length+2 ] == @heredoc_delimiter + "\r\n") ||
119
+ @i >= @length
120
+ end
121
+ @heredoc_delimiter.length.times { advance }
122
+ @heredoc_delimiter = nil
123
+ capture_token( :heredoc_body )
124
+ end
125
+
126
+ private
127
+ # Returns the matching delimiter for the 4 "paired" delimiters
128
+ def find_matching_delimiter( start_delimiter )
129
+ case start_delimiter
130
+ when '{' then '}'
131
+ when '(' then ')'
132
+ when '[' then ']'
133
+ when '<' then '>'
134
+ else
135
+ start_delimiter
136
+ end
137
+ end
138
+
139
+ def is_matched_delimiter?( cchar )
140
+ !! /[{\(\[<]/.match(cchar)
141
+ end
142
+
143
+ def advance_through_quoted_delimiter( delimiter )
144
+ advance
145
+ advance until cchar == delimiter
146
+ advance
147
+ end
148
+ end
149
+ end