eden 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/CHANGELOG +4 -0
  2. data/LICENSE +20 -0
  3. data/README.md +48 -0
  4. data/Rakefile +10 -0
  5. data/bin/eden +132 -0
  6. data/lib/eden.rb +10 -0
  7. data/lib/eden/defaults.rb +26 -0
  8. data/lib/eden/formatter.rb +25 -0
  9. data/lib/eden/formatters/block_formatter.rb +45 -0
  10. data/lib/eden/formatters/indenter.rb +91 -0
  11. data/lib/eden/formatters/white_space_cleaner.rb +14 -0
  12. data/lib/eden/line.rb +65 -0
  13. data/lib/eden/source_file.rb +32 -0
  14. data/lib/eden/token.rb +62 -0
  15. data/lib/eden/tokenizer.rb +259 -0
  16. data/lib/eden/tokenizers/basic_tokenizer.rb +167 -0
  17. data/lib/eden/tokenizers/delimited_literal_tokenizer.rb +38 -0
  18. data/lib/eden/tokenizers/number_tokenizer.rb +68 -0
  19. data/lib/eden/tokenizers/operator_tokenizer.rb +211 -0
  20. data/lib/eden/tokenizers/regex_tokenizer.rb +37 -0
  21. data/lib/eden/tokenizers/string_tokenizer.rb +149 -0
  22. data/test/array_literal_tokenization_test.rb +43 -0
  23. data/test/basic_tokenization_test.rb +29 -0
  24. data/test/block_formatter_test.rb +47 -0
  25. data/test/class_var_token_test.rb +21 -0
  26. data/test/identifier_token_test.rb +140 -0
  27. data/test/indenter_test.rb +314 -0
  28. data/test/instance_var_token_test.rb +48 -0
  29. data/test/number_tokenization_test.rb +83 -0
  30. data/test/operator_tokenization_test.rb +180 -0
  31. data/test/regex_tokenization_test.rb +68 -0
  32. data/test/single_character_tokenization_test.rb +87 -0
  33. data/test/string_tokenization_test.rb +291 -0
  34. data/test/symbol_tokenization_test.rb +64 -0
  35. data/test/test_helper.rb +13 -0
  36. data/test/white_space_cleaner_test.rb +35 -0
  37. data/test/whitespace_token_test.rb +63 -0
  38. metadata +108 -0
@@ -0,0 +1,32 @@
1
+ module Eden
2
+ class SourceFile
3
+ attr_accessor :source, :lines
4
+
5
+ def initialize( file_name )
6
+ @file_name = file_name
7
+ @lines = []
8
+ end
9
+
10
+ def load!
11
+ file = File.open( @file_name, "r" )
12
+ @source = file.read
13
+ end
14
+
15
+ def tokenize!
16
+ tokenizer = Tokenizer.new( self )
17
+ tokenizer.tokenize!
18
+ end
19
+
20
+ def each_line
21
+ @lines.each { |l| yield l }
22
+ end
23
+
24
+ def rewrite!
25
+ File.open(@file_name, 'w') do |f|
26
+ each_line do |l|
27
+ f.write l.joined_tokens
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
data/lib/eden/token.rb ADDED
@@ -0,0 +1,62 @@
1
+ module Eden
2
+ class Token
3
+ attr_accessor :type, :content
4
+
5
+ BINARY_OPERATORS = [:matches, :identity_equality, :equality,
6
+ :not_equals, :not_matches, :plus_equals, :plus,
7
+ :plus_at, :plus, :minus_equals, :minus_at, :minus,
8
+ :exponent_equals, :exponent, :multiply_equals, :multiply,
9
+ :divide, :divide_equals,
10
+ :left_shift_equals, :left_shift, :lte, :lt,
11
+ :right_shift_equals, :right_shift, :gte, :gt,
12
+ :sort_operator,
13
+ :logical_or_equals, :logical_or,
14
+ :bitwise_or_equals, :bitwise_or,
15
+ :logical_and_equals, :logical_and,
16
+ :bitwise_and_equals, :bitwise_and]
17
+
18
+ UNARY_OPERATORS = [:plus, :minus, :multiply, :logical_not, :tilde]
19
+
20
+ KEYWORDS = [:__LINE__, :__ENCODING__, :__FILE__, :BEGIN,
21
+ :END, :alias, :and, :begin, :break, :case,
22
+ :class, :def, :defined?, :do, :else, :elsif,
23
+ :end, :ensure, :false, :for, :if, :in,
24
+ :module, :next, :nil, :not, :or, :redo,
25
+ :rescue, :retry, :return, :self, :super,
26
+ :then, :true, :undef, :unless, :until,
27
+ :when, :while, :yield]
28
+
29
+ def initialize( type, content )
30
+ @type = type
31
+ @content = content
32
+ end
33
+
34
+ def inspect
35
+ if @content.nil? || @content == "\n"
36
+ @type.to_s
37
+ else
38
+ @type.to_s + "- \"" + @content + "\""
39
+ end
40
+ end
41
+
42
+ def operator?
43
+ binary_operator? || unary_operator?
44
+ end
45
+
46
+ def unary_operator?
47
+ UNARY_OPERATORS.include?( type )
48
+ end
49
+
50
+ def binary_operator?
51
+ BINARY_OPERATORS.include?( type )
52
+ end
53
+
54
+ def keyword?
55
+ KEYWORDS.include?( type )
56
+ end
57
+
58
+ def is?( token_type )
59
+ @type == token_type
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,259 @@
1
+ require 'eden/tokenizers/basic_tokenizer'
2
+ require 'eden/tokenizers/delimited_literal_tokenizer'
3
+ require 'eden/tokenizers/number_tokenizer'
4
+ require 'eden/tokenizers/operator_tokenizer'
5
+ require 'eden/tokenizers/regex_tokenizer'
6
+ require 'eden/tokenizers/string_tokenizer'
7
+
8
+
9
+ module Eden
10
+ class Tokenizer
11
+ include BasicTokenizer
12
+ include DelimitedLiteralTokenizer
13
+ include NumberTokenizer
14
+ include OperatorTokenizer
15
+ include RegexTokenizer
16
+ include StringTokenizer
17
+
18
+ def initialize( source_file )
19
+ @sf = source_file
20
+ @interpolating = [] # Stack for state when interpolating into strings
21
+ @delimiters = [] # Stack for delimiters which we need to keep when interpolating
22
+ end
23
+
24
+ def tokenize!
25
+ @i = 0 # Current position in the source buffer
26
+ @ln = 1 # Line Number
27
+ @cp = 0 # Current Character in the line
28
+ @thunk_st = 0
29
+ @thunk_end = -1 # Start/end of the current token
30
+ @current_line = Line.new( @ln )
31
+ @length = @sf.source.length
32
+ @expr_state = :beg # Same as lex_state variable in parse.c in Ruby source
33
+ default_state_transitions!
34
+
35
+ until( @i >= @length )
36
+ case( @state )
37
+ when :newline
38
+ advance
39
+ @expr_state = :beg
40
+ @current_line.tokens << capture_token( :newline )
41
+ @current_line.tokens.flatten!
42
+ @sf.lines << @current_line
43
+ @ln += 1
44
+ @current_line = Line.new( @ln )
45
+
46
+ if @heredoc_delimiter
47
+ @current_line.tokens << tokenize_heredoc_body
48
+ end
49
+ when :whitespace
50
+ @current_line.tokens << tokenize_whitespace
51
+ when :identifier # keyword / name / etc
52
+ @current_line.tokens << tokenize_identifier
53
+ when :instancevar
54
+ @current_line.tokens << tokenize_instancevar
55
+ when :classvar
56
+ @current_line.tokens << tokenize_classvar
57
+ when :globalvar
58
+ @current_line.tokens << tokenize_globalvar
59
+ when :delimited_literal
60
+ @current_line.tokens << tokenize_delimited_literal
61
+ when :lparen, :lsquare, :lcurly
62
+ @expr_state = :beg
63
+ @current_line.tokens << tokenize_single_character
64
+ when :comma
65
+ @expr_state = :beg
66
+ @current_line.tokens << tokenize_single_character
67
+ when :rsquare, :lcurly, :rparen
68
+ @expr_state = :end
69
+ @current_line.tokens << tokenize_single_character
70
+ when :rcurly
71
+ @current_line.tokens << tokenize_rcurly
72
+ when :tilde
73
+ default_expr_state_transition!
74
+ @current_line.tokens << tokenize_single_character
75
+ when :at, :semicolon, :backslash
76
+ @current_line.tokens << tokenize_single_character
77
+ when :question_mark
78
+ @current_line.tokens << tokenize_question_mark
79
+ when :colon
80
+ @current_line.tokens << tokenize_colon
81
+ when :period
82
+ @current_line.tokens << tokenize_period
83
+ when :plus
84
+ @current_line.tokens << tokenize_plus_operators
85
+ when :minus
86
+ @current_line.tokens << tokenize_minus_operators
87
+ when :equals
88
+ @current_line.tokens << tokenize_equals_operators
89
+ when :multiply
90
+ @current_line.tokens << tokenize_multiply_operators
91
+ when :divide
92
+ @current_line.tokens << tokenize_potential_regex
93
+ when :lt
94
+ @current_line.tokens << tokenize_lt_operators
95
+ when :gt
96
+ @current_line.tokens << tokenize_gt_operators
97
+ when :pipe
98
+ @current_line.tokens << tokenize_pipe_operators
99
+ when :ampersand
100
+ @current_line.tokens << tokenize_ampersand_operators
101
+ when :modulo
102
+ @current_line.tokens << tokenize_modulo_operators
103
+ when :caret
104
+ @current_line.tokens << tokenize_caret_operators
105
+ when :bang
106
+ @current_line.tokens << tokenize_bang_operators
107
+ when :comment
108
+ @current_line.tokens << tokenize_comment
109
+ when :single_q_string
110
+ @current_line.tokens << tokenize_single_quote_string
111
+ when :double_q_string
112
+ @current_line.tokens << tokenize_double_quote_string
113
+ when :backquote_string
114
+ @current_line.tokens << tokenize_backquote_string
115
+ when :symbol
116
+ @current_line.tokens << tokenize_symbol
117
+ when :dec_literal
118
+ @current_line.tokens << tokenize_decimal_literal
119
+ when :bin_literal, :oct_literal, :hex_literal
120
+ @current_line.tokens << tokenize_integer_literal
121
+ end
122
+ end
123
+ @sf.lines << @current_line.flatten!
124
+ end
125
+
126
+ private
127
+
128
+ def thunk
129
+ @sf.source[[@thunk_st, @length-1].min..[@thunk_end, @length-1].min]
130
+ end
131
+
132
+ def default_state_transitions!
133
+ case( cchar )
134
+ when nil then @state = :eof
135
+ when ' ' then @state = :whitespace
136
+ when "\t" then @state = :whitespace
137
+ when "\n" then @state = :newline
138
+ when '"' then @state = :double_q_string
139
+ when '\'' then @state = :single_q_string
140
+ when '`' then @state = :backquote_string
141
+ when '$' then @state = :globalvar
142
+ when '@'
143
+ if peek_ahead_for( /@/ )
144
+ @state = :classvar
145
+ elsif peek_ahead_for( /[A-Za-z_]/ )
146
+ @state = :instancevar
147
+ else
148
+ @state = :at
149
+ end
150
+ when '/' then @state = :divide
151
+ when '#' then @state = :comment
152
+ when ',' then @state = :comma
153
+ when '.' then @state = :period
154
+ when '&' then @state = :ampersand
155
+ when '!' then @state = :bang
156
+ when '~' then @state = :tilde
157
+ when '^' then @state = :caret
158
+ when '|' then @state = :pipe
159
+ when '>' then @state = :gt
160
+ when '<' then @state = :lt
161
+ when '?' then @state = :question_mark
162
+ when ';' then @state = :semicolon
163
+ when '=' then @state = :equals
164
+ when '\\' then @state = :backslash
165
+ when '%'
166
+ if @expr_state == :beg && !peek_ahead_for(/ /)
167
+ @state = :delimited_literal
168
+ else
169
+ @state = :modulo
170
+ end
171
+ when '*' then @state = :multiply
172
+ when '(' then @state = :lparen
173
+ when ')' then @state = :rparen
174
+ when '{'
175
+ @interpolating << nil
176
+ @state = :lcurly
177
+ when '}' then @state = :rcurly
178
+ when '[' then @state = :lsquare
179
+ when ']' then @state = :rsquare
180
+ when ':'
181
+ if peek_ahead_for(/[: ]/)
182
+ @state = :colon
183
+ else
184
+ @state = :symbol
185
+ end
186
+ when 'a'..'z', 'A'..'Z', '_'
187
+ @state = :identifier
188
+ when '0'
189
+ @expr_state = :end
190
+ if peek_ahead_for(/[xX]/)
191
+ @state = :hex_literal
192
+ elsif peek_ahead_for(/[bB]/)
193
+ @state = :bin_literal
194
+ elsif peek_ahead_for(/[_oO0-7]/)
195
+ @state = :oct_literal
196
+ elsif peek_ahead_for(/[89]/)
197
+ puts "Illegal Octal Digit"
198
+ elsif peek_ahead_for(/[dD]/)
199
+ @state = :dec_literal
200
+ else
201
+ @state = :dec_literal
202
+ end
203
+ when '1'..'9'
204
+ @state = :dec_literal
205
+ when '+', '-'
206
+ if peek_ahead_for( /[0-9]/ )
207
+ @state = :dec_literal
208
+ else
209
+ @state = ( cchar == '+' ? :plus : :minus )
210
+ end
211
+ end
212
+ end
213
+
214
+ # Manages the expression state to match the state machine in parse.c
215
+ def default_expr_state_transition!
216
+ if @expr_state == :fname || @expr_state == :dot
217
+ @expr_state = :arg
218
+ else
219
+ @expr_state = :beg
220
+ end
221
+ end
222
+
223
+ # Helper functions for expression state, from parse.c:9334
224
+ def is_arg
225
+ [:arg, :cmd_arg].include?( @expr_state )
226
+ end
227
+
228
+ def is_beg
229
+ [:beg, :mid, :class].include?( @expr_state )
230
+ end
231
+
232
+ # Returns the current character
233
+ def cchar
234
+ @sf.source[@i..@i]
235
+ end
236
+
237
+ # Advance the current position in the source file
238
+ def advance( num=1 )
239
+ @thunk_end += num; @i += num
240
+ end
241
+
242
+ # Resets the thunk to start at the current character
243
+ def reset_thunk!
244
+ @thunk_st = @i
245
+ @thunk_end = @i - 1
246
+ end
247
+
248
+ def peek_ahead_for( regex )
249
+ @sf.source[@i+1..@i+1] && !!regex.match( @sf.source[@i+1..@i+1] )
250
+ end
251
+
252
+ def capture_token( type )
253
+ token = Token.new( type, thunk )
254
+ reset_thunk!
255
+ default_state_transitions!
256
+ return token
257
+ end
258
+ end
259
+ end
@@ -0,0 +1,167 @@
1
+ module Eden
2
+ module BasicTokenizer
3
+ def tokenize_single_character
4
+ @thunk_end += 1
5
+ token = Token.new(@state, thunk)
6
+ @i += 1
7
+ reset_thunk!
8
+ default_state_transitions!
9
+ return token
10
+ end
11
+
12
+ def tokenize_period
13
+ advance
14
+ if cchar == '.'
15
+ advance
16
+ @expr_state = :beg
17
+ return (advance and capture_token( :range_inc )) if cchar == '.'
18
+ capture_token( :range_exc )
19
+ else
20
+ @expr_state = :dot
21
+ capture_token( :period )
22
+ end
23
+ end
24
+
25
+ def tokenize_rcurly
26
+ @thunk_end += 1
27
+ old_state = @interpolating.pop
28
+ old_start_delimiter = @delimiters.pop
29
+ tokens = []
30
+ if old_state
31
+ tokens << Token.new(@state, thunk)
32
+ @i += 1
33
+ reset_thunk!
34
+ @state = old_state
35
+ tokens << tokenize_expanded_string( old_start_delimiter, true)
36
+ else
37
+ tokens << Token.new(@state, thunk)
38
+ @i += 1
39
+ reset_thunk!
40
+ end
41
+ default_state_transitions!
42
+ return tokens
43
+ end
44
+
45
+ # tokenizes operators beginning with a colon
46
+ def tokenize_colon
47
+ advance
48
+ if cchar == ':'
49
+ advance
50
+ if is_beg || (is_arg && @line.last_token_is_space?)
51
+ @expr_state = :beg
52
+ else
53
+ @expr_state = :dot
54
+ end
55
+ return capture_token( :scope_res )
56
+ else
57
+ @expr_state = :beg
58
+ return capture_token(:colon)
59
+ end
60
+ end
61
+
62
+ # tokenizes question mark / character literals
63
+ def tokenize_question_mark
64
+ advance
65
+ if @expr_state == :end || @expr_state == :endarg
66
+ @expr_state = :beg
67
+ return capture_token(:question_mark)
68
+ end
69
+
70
+ if (cchar != ' ' && cchar != "\t") && @i < @length
71
+ advance until cchar == ' ' || cchar == "\t" ||
72
+ cchar == "\r" || cchar == "\n" || @i > @length
73
+ return capture_token(:character_literal)
74
+ end
75
+
76
+ capture_token(:question_mark)
77
+ end
78
+
79
+ def tokenize_identifier
80
+ @expr_state = :end
81
+ advance until( /[A-Za-z0-9_]/.match( cchar ).nil? )
82
+ translate_keyword_tokens(capture_token( @state ))
83
+ end
84
+
85
+ def tokenize_whitespace
86
+ advance until( cchar != ' ' && cchar != "\t" )
87
+ capture_token( :whitespace )
88
+ end
89
+
90
+ def tokenize_comment
91
+ advance until( cchar == "\n" || cchar.nil?)
92
+ capture_token( :comment )
93
+ end
94
+
95
+ def tokenize_instancevar
96
+ @expr_state = :end
97
+ advance # Pass the @ symbol
98
+ advance until( /[a-z0-9_]/.match( cchar ).nil? )
99
+ capture_token( :instancevar )
100
+ end
101
+
102
+ def tokenize_classvar
103
+ @expr_state = :end
104
+ advance(2) # Pass the @@ symbol
105
+ advance until( /[a-z0-9_]/.match( cchar ).nil? )
106
+ capture_token( :classvar )
107
+ end
108
+
109
+ def tokenize_symbol
110
+ @expr_state = :end
111
+ advance # Pass the :
112
+ case cchar
113
+ when '"' then return tokenize_double_quote_string
114
+ when '\'' then return tokenize_single_quote_string
115
+ end
116
+ if /^(\^|&|\||<=>|==|===|!~|=~|>>|>=|<<|<=|>|<|\+|\-|\*\*|\/|%|\*|~|\+@|-@|\[\]|\[\]=)/x.match(@sf.source[@i..-1])
117
+ advance($1.length)
118
+ return capture_token(:symbol)
119
+ end
120
+ advance while( /[A-Za-z0-9_!=\?]/.match(cchar) )
121
+ capture_token( :symbol )
122
+ end
123
+
124
+ def tokenize_globalvar
125
+ @expr_state = :end
126
+ advance # Pass the $
127
+ if /[!@_\.&~0-9=\/\\\*$\?:'`]/.match( cchar )
128
+ advance and capture_token( :globalvar )
129
+ elsif /[A-Za-z0-9_]/.match( cchar )
130
+ advance while /[A-Za-z0-9_]/.match( cchar )
131
+ capture_token( :globalvar )
132
+ else
133
+ raise "Invalid Global Variable Name"
134
+ end
135
+ end
136
+
137
+ # Takes an identifier token, and tranforms its type to
138
+ # match Ruby keywords where the identifier is actually a keyword.
139
+ # Reserved words are defined in S.8.5.1 of the Ruby spec.
140
+ def translate_keyword_tokens( token )
141
+ keywords = ["__LINE__", "__ENCODING__", "__FILE__", "BEGIN",
142
+ "END", "alias", "and", "begin", "break", "case",
143
+ "class", "def", "defined?", "do", "else", "elsif",
144
+ "end", "ensure", "false", "for", "if", "in",
145
+ "module", "next", "nil", "not", "or", "redo",
146
+ "rescue", "retry", "return", "self", "super",
147
+ "then", "true", "undef", "unless", "until",
148
+ "when", "while", "yield"]
149
+ if keywords.include?( token.content )
150
+ token.type = token.content.downcase.to_sym
151
+ # Change the state if we match a keyword
152
+ @expr_state = :beg
153
+ end
154
+
155
+ # A couple of exceptions
156
+ if token.content == "BEGIN"
157
+ token.type = :begin_global
158
+ @expr_state = :beg
159
+ elsif token.content == "END"
160
+ token.type = :end_global
161
+ @expr_state = :beg
162
+ end
163
+
164
+ token
165
+ end
166
+ end
167
+ end