rux 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rux/lexer.rb ADDED
@@ -0,0 +1,64 @@
1
+ module Rux
2
+ class Lexer
3
+ class EOFError < StandardError; end
4
+ class TransitionError < StandardError; end
5
+
6
+ attr_reader :source_buffer
7
+
8
+ def initialize(source_buffer)
9
+ @source_buffer = source_buffer
10
+ @stack = [RubyLexer.new(source_buffer, 0)]
11
+ @generator = to_enum(:each_token)
12
+ end
13
+
14
+ def advance
15
+ @generator.next
16
+ rescue StopIteration
17
+ [nil, ['$eof']]
18
+ end
19
+
20
+ private
21
+
22
+ def each_token
23
+ @p = 0
24
+
25
+ while true
26
+ begin
27
+ token = current.advance
28
+ rescue StopIteration
29
+ # This error means the current lexer has run it's course and should
30
+ # be considered finished. The lexer should have already yielded a
31
+ # :tRESET token to position the previous lexer (i.e. the one
32
+ # logically before it in the stack) at the place it left off.
33
+ @stack.pop
34
+ break unless current # no current lexer means we're done
35
+ current.reset_to(@p)
36
+ next
37
+ end
38
+
39
+ type, (_, pos) = token
40
+ break unless pos
41
+
42
+ unless type
43
+ @stack.push(current.next_lexer(pos.begin_pos))
44
+ next
45
+ end
46
+
47
+ case type
48
+ when :tRESET
49
+ @p = pos.begin_pos
50
+ when :tSKIP
51
+ next
52
+ else
53
+ yield token
54
+
55
+ @p = pos.end_pos
56
+ end
57
+ end
58
+ end
59
+
60
+ def current
61
+ @stack.last
62
+ end
63
+ end
64
+ end
data/lib/rux/parser.rb ADDED
@@ -0,0 +1,244 @@
1
+ require 'parser'
2
+
3
+ module Rux
4
+ class Parser
5
+ class UnexpectedTokenError < StandardError; end
6
+ class TagMismatchError < StandardError; end
7
+
8
+ class << self
9
+ def parse_file(path)
10
+ buffer = ::Parser::Source::Buffer.new(path).read
11
+ lexer = ::Rux::Lexer.new(buffer)
12
+ new(lexer).parse
13
+ end
14
+
15
+ def parse(str)
16
+ buffer = ::Parser::Source::Buffer.new('(source)', source: str)
17
+ lexer = ::Rux::Lexer.new(buffer)
18
+ new(lexer).parse
19
+ end
20
+ end
21
+
22
+ def initialize(lexer)
23
+ @lexer = lexer
24
+ @stack = []
25
+ @current = get_next
26
+ end
27
+
28
+ def parse
29
+ curlies = 1
30
+ children = []
31
+
32
+ loop do
33
+ type = type_of(current)
34
+ break unless type
35
+
36
+ case type
37
+ when :tLCURLY, :tLBRACE, :tRUX_LITERAL_RUBY_CODE_START
38
+ curlies += 1
39
+ when :tRCURLY, :tRBRACE, :tRUX_LITERAL_RUBY_CODE_END
40
+ curlies -= 1
41
+ end
42
+
43
+ break if curlies == 0
44
+
45
+ if rb = ruby
46
+ children << rb
47
+ elsif type_of(current) == :tRUX_TAG_OPEN_START
48
+ children << tag
49
+ else
50
+ raise UnexpectedTokenError,
51
+ 'expected ruby code or the start of a rux tag but found '\
52
+ "#{type_of(current)} instead"
53
+ end
54
+ end
55
+
56
+ AST::ListNode.new(children)
57
+ end
58
+
59
+ private
60
+
61
+ def ruby
62
+ ruby_start = pos_of(current).begin_pos
63
+
64
+ loop do
65
+ type = type_of(current)
66
+
67
+ if type.nil? || RuxLexer.state_table.include?(type_of(current))
68
+ break
69
+ end
70
+
71
+ consume(type_of(current))
72
+ end
73
+
74
+ unless type_of(current)
75
+ return AST::RubyNode.new(
76
+ @lexer.source_buffer.source[ruby_start..-1]
77
+ )
78
+ end
79
+
80
+ if pos_of(current).begin_pos != ruby_start
81
+ AST::RubyNode.new(
82
+ @lexer.source_buffer.source[ruby_start...(pos_of(current).end_pos - 1)]
83
+ )
84
+ end
85
+ end
86
+
87
+ def tag
88
+ consume(:tRUX_TAG_OPEN_START)
89
+ tag_name = text_of(current)
90
+ tag_pos = pos_of(current)
91
+ consume(:tRUX_TAG_OPEN, :tRUX_TAG_SELF_CLOSING)
92
+ maybe_consume(:tRUX_ATTRIBUTE_SPACES)
93
+ attrs = attributes
94
+ maybe_consume(:tRUX_ATTRIBUTE_SPACES)
95
+ maybe_consume(:tRUX_TAG_OPEN_END)
96
+ tag_node = AST::TagNode.new(tag_name, attrs)
97
+
98
+ if is?(:tRUX_TAG_SELF_CLOSING_END)
99
+ consume(:tRUX_TAG_SELF_CLOSING_END)
100
+ return tag_node
101
+ end
102
+
103
+ @stack.push(tag_name)
104
+
105
+ until is?(:tRUX_TAG_CLOSE_START)
106
+ if is?(:tRUX_LITERAL, :tRUX_LITERAL_RUBY_CODE_START)
107
+ lit = literal
108
+ tag_node.children << lit if lit
109
+ else
110
+ tag_node.children << tag
111
+ end
112
+ end
113
+
114
+ consume(:tRUX_TAG_CLOSE_START)
115
+
116
+ closing_tag_name = text_of(current)
117
+
118
+ if @stack.last != closing_tag_name
119
+ closing_tag_pos = pos_of(current)
120
+
121
+ raise TagMismatchError, "closing tag '#{closing_tag_name}' on line "\
122
+ "#{closing_tag_pos.line} did not match opening tag '#{tag_name}' "\
123
+ "on line #{tag_pos.line}"
124
+ end
125
+
126
+ @stack.pop
127
+
128
+ consume(:tRUX_TAG_CLOSE)
129
+ consume(:tRUX_TAG_CLOSE_END)
130
+
131
+ tag_node
132
+ end
133
+
134
+ def attributes
135
+ {}.tap do |attrs|
136
+ while is?(:tRUX_ATTRIBUTE_NAME)
137
+ key, value = attribute
138
+ attrs[key] = value
139
+
140
+ maybe_consume(:tRUX_ATTRIBUTE_SPACES)
141
+ end
142
+ end
143
+ end
144
+
145
+ def attribute
146
+ maybe_consume(:tRUX_ATTRIBUTE_SPACES)
147
+ attr_name = text_of(current)
148
+ consume(:tRUX_ATTRIBUTE_NAME)
149
+ maybe_consume(:tRUX_ATTRIBUTE_EQUALS_SPACES)
150
+
151
+ attr_value = if maybe_consume(:tRUX_ATTRIBUTE_EQUALS)
152
+ maybe_consume(:tRUX_ATTRIBUTE_VALUE_SPACES)
153
+ attribute_value
154
+ else
155
+ # if no equals sign, assume boolean attribute
156
+ AST::StringNode.new("\"true\"")
157
+ end
158
+
159
+ [attr_name, attr_value]
160
+ end
161
+
162
+ def attribute_value
163
+ if is?(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_START)
164
+ attr_ruby_code
165
+ else
166
+ AST::StringNode.new(text_of(current)).tap do
167
+ consume(:tRUX_ATTRIBUTE_VALUE)
168
+ end
169
+ end
170
+ end
171
+
172
+ def attr_ruby_code
173
+ consume(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_START)
174
+
175
+ ruby.tap do
176
+ consume(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_END)
177
+ end
178
+ end
179
+
180
+ def literal
181
+ if is?(:tRUX_LITERAL_RUBY_CODE_START)
182
+ literal_ruby_code
183
+ else
184
+ lit = squeeze_lit(text_of(current))
185
+ consume(:tRUX_LITERAL)
186
+ AST::TextNode.new(lit) unless lit.empty?
187
+ end
188
+ end
189
+
190
+ def squeeze_lit(lit)
191
+ lit.gsub(/\s/, ' ').squeeze(' ')
192
+ end
193
+
194
+ def literal_ruby_code
195
+ consume(:tRUX_LITERAL_RUBY_CODE_START)
196
+
197
+ parse.tap do |res|
198
+ consume(:tRUX_LITERAL_RUBY_CODE_END)
199
+ end
200
+ end
201
+
202
+ def current
203
+ @current
204
+ end
205
+
206
+ def is?(*types)
207
+ types.include?(type_of(current))
208
+ end
209
+
210
+ def maybe_consume(type)
211
+ if type_of(current) == type
212
+ @current = get_next
213
+ true
214
+ else
215
+ false
216
+ end
217
+ end
218
+
219
+ def consume(*types)
220
+ if !types.include?(type_of(current))
221
+ raise UnexpectedTokenError,
222
+ "expected [#{types.map(&:to_s).join(', ')}], got '#{type_of(current)}'"
223
+ end
224
+
225
+ @current = get_next
226
+ end
227
+
228
+ def type_of(token)
229
+ token[0]
230
+ end
231
+
232
+ def text_of(token)
233
+ token[1][0]
234
+ end
235
+
236
+ def pos_of(token)
237
+ token[1][1]
238
+ end
239
+
240
+ def get_next
241
+ @lexer.advance
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,143 @@
1
+ module Rux
2
+ class RubyLexer < ::Parser::Lexer
3
+ # These are populated when ::Parser::Lexer loads and are therefore
4
+ # not inherited. We have to copy them over manually.
5
+ ::Parser::Lexer.instance_variables.each do |ivar|
6
+ instance_variable_set(ivar, ::Parser::Lexer.instance_variable_get(ivar))
7
+ end
8
+
9
+ LOOKAHEAD = 3
10
+
11
+ def initialize(source_buffer, init_pos)
12
+ super(ruby_version)
13
+
14
+ self.source_buffer = source_buffer
15
+ @generator = to_enum(:each_token)
16
+ @rux_token_queue = []
17
+ @p = init_pos
18
+ end
19
+
20
+ alias_method :advance_orig, :advance
21
+
22
+ def advance
23
+ @generator.next
24
+ end
25
+
26
+ def reset_to(pos)
27
+ @ts = @te = @p = pos
28
+ @eof = false
29
+ @rux_token_queue.clear
30
+ populate_queue
31
+ end
32
+
33
+ def next_lexer(pos)
34
+ RuxLexer.new(@source_buffer, pos)
35
+ end
36
+
37
+ private
38
+
39
+ def ruby_version
40
+ @ruby_version ||= RUBY_VERSION
41
+ .split('.')[0..-2]
42
+ .join('')
43
+ .to_i
44
+ end
45
+
46
+ def each_token(&block)
47
+ # We detect whether or not we're at the beginning of a rux tag by looking
48
+ # ahead by 1 token; that's why the first element in @rux_token_queue is
49
+ # yielded immediately. If the lexer _starts_ at a rux tag however,
50
+ # lookahead is a lot more difficult. To mitigate, we insert a dummy skip
51
+ # token here. That way, at_rux? checks the right tokens in the queue and
52
+ # correctly identifies the start of a rux tag.
53
+ @rux_token_queue << [:tSKIP, ['$skip', make_range(@p, @p)]]
54
+
55
+ @eof = false
56
+ curlies = 1
57
+ populate_queue
58
+
59
+ until @rux_token_queue.empty?
60
+ if at_rux?
61
+ yield @rux_token_queue.shift
62
+
63
+ @eof = true
64
+ _, (_, pos) = @rux_token_queue[0]
65
+
66
+ # @eof is set to false by reset_to above, which is called after
67
+ # popping the previous lexer off the lexer stack (see lexer.rb)
68
+ while @eof
69
+ yield [nil, ['$eof', pos]]
70
+ end
71
+ end
72
+
73
+ token = @rux_token_queue.shift
74
+ type, (_, pos) = token
75
+
76
+ case type
77
+ when :tLCURLY, :tLBRACE
78
+ curlies += 1
79
+ when :tRCURLY, :tRBRACE
80
+ curlies -= 1
81
+ end
82
+
83
+ # if curlies are balanced, we're done lexing ruby code, so yield a
84
+ # reset token to tell the system where we stopped, then break to stop
85
+ # our enumerator (will raise a StopIteration)
86
+ if curlies == 0
87
+ yield [:tRESET, ['$eof', pos]]
88
+ break
89
+ end
90
+
91
+ yield token
92
+
93
+ populate_queue
94
+ end
95
+ end
96
+
97
+ def populate_queue
98
+ until @rux_token_queue.size >= LOOKAHEAD
99
+ begin
100
+ cur_token = advance_orig
101
+ rescue NoMethodError
102
+ # Internal lexer errors can happen since we're asking the ruby lexer
103
+ # to start at an arbitrary position inside the source buffer. It may
104
+ # encounter foreign rux tokens it's not expecting, etc. Best to stop
105
+ # trying to look ahead and call it quits.
106
+ break
107
+ end
108
+
109
+ break unless cur_token[0]
110
+ @rux_token_queue << cur_token
111
+ end
112
+ end
113
+
114
+ def at_rux?
115
+ at_lt? && !at_inheritance?
116
+ end
117
+
118
+ def at_lt?
119
+ is?(@rux_token_queue[1], :tLT) && (
120
+ is?(@rux_token_queue[2], :tCONSTANT) ||
121
+ is?(@rux_token_queue[2], :tIDENTIFIER)
122
+ )
123
+ end
124
+
125
+ def at_inheritance?
126
+ is?(@rux_token_queue[0], :tCONSTANT) &&
127
+ is?(@rux_token_queue[1], :tLT) &&
128
+ is?(@rux_token_queue[2], :tCONSTANT)
129
+ end
130
+
131
+ def is?(tok, sym)
132
+ tok && tok[0] == sym
133
+ end
134
+
135
+ def is_not?(tok, sym)
136
+ tok && tok[0] != sym
137
+ end
138
+
139
+ def make_range(start, stop)
140
+ ::Parser::Source::Range.new(@source_buffer, start, stop)
141
+ end
142
+ end
143
+ end