rux 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rux/lexer.rb ADDED
@@ -0,0 +1,64 @@
1
+ module Rux
2
+ class Lexer
3
+ class EOFError < StandardError; end
4
+ class TransitionError < StandardError; end
5
+
6
+ attr_reader :source_buffer
7
+
8
+ def initialize(source_buffer)
9
+ @source_buffer = source_buffer
10
+ @stack = [RubyLexer.new(source_buffer, 0)]
11
+ @generator = to_enum(:each_token)
12
+ end
13
+
14
+ def advance
15
+ @generator.next
16
+ rescue StopIteration
17
+ [nil, ['$eof']]
18
+ end
19
+
20
+ private
21
+
22
+ def each_token
23
+ @p = 0
24
+
25
+ while true
26
+ begin
27
+ token = current.advance
28
+ rescue StopIteration
29
+ # This error means the current lexer has run it's course and should
30
+ # be considered finished. The lexer should have already yielded a
31
+ # :tRESET token to position the previous lexer (i.e. the one
32
+ # logically before it in the stack) at the place it left off.
33
+ @stack.pop
34
+ break unless current # no current lexer means we're done
35
+ current.reset_to(@p)
36
+ next
37
+ end
38
+
39
+ type, (_, pos) = token
40
+ break unless pos
41
+
42
+ unless type
43
+ @stack.push(current.next_lexer(pos.begin_pos))
44
+ next
45
+ end
46
+
47
+ case type
48
+ when :tRESET
49
+ @p = pos.begin_pos
50
+ when :tSKIP
51
+ next
52
+ else
53
+ yield token
54
+
55
+ @p = pos.end_pos
56
+ end
57
+ end
58
+ end
59
+
60
+ def current
61
+ @stack.last
62
+ end
63
+ end
64
+ end
data/lib/rux/parser.rb ADDED
@@ -0,0 +1,244 @@
1
+ require 'parser'
2
+
3
+ module Rux
4
+ class Parser
5
+ class UnexpectedTokenError < StandardError; end
6
+ class TagMismatchError < StandardError; end
7
+
8
+ class << self
9
+ def parse_file(path)
10
+ buffer = ::Parser::Source::Buffer.new(path).read
11
+ lexer = ::Rux::Lexer.new(buffer)
12
+ new(lexer).parse
13
+ end
14
+
15
+ def parse(str)
16
+ buffer = ::Parser::Source::Buffer.new('(source)', source: str)
17
+ lexer = ::Rux::Lexer.new(buffer)
18
+ new(lexer).parse
19
+ end
20
+ end
21
+
22
+ def initialize(lexer)
23
+ @lexer = lexer
24
+ @stack = []
25
+ @current = get_next
26
+ end
27
+
28
+ def parse
29
+ curlies = 1
30
+ children = []
31
+
32
+ loop do
33
+ type = type_of(current)
34
+ break unless type
35
+
36
+ case type
37
+ when :tLCURLY, :tLBRACE, :tRUX_LITERAL_RUBY_CODE_START
38
+ curlies += 1
39
+ when :tRCURLY, :tRBRACE, :tRUX_LITERAL_RUBY_CODE_END
40
+ curlies -= 1
41
+ end
42
+
43
+ break if curlies == 0
44
+
45
+ if rb = ruby
46
+ children << rb
47
+ elsif type_of(current) == :tRUX_TAG_OPEN_START
48
+ children << tag
49
+ else
50
+ raise UnexpectedTokenError,
51
+ 'expected ruby code or the start of a rux tag but found '\
52
+ "#{type_of(current)} instead"
53
+ end
54
+ end
55
+
56
+ AST::ListNode.new(children)
57
+ end
58
+
59
+ private
60
+
61
+ def ruby
62
+ ruby_start = pos_of(current).begin_pos
63
+
64
+ loop do
65
+ type = type_of(current)
66
+
67
+ if type.nil? || RuxLexer.state_table.include?(type_of(current))
68
+ break
69
+ end
70
+
71
+ consume(type_of(current))
72
+ end
73
+
74
+ unless type_of(current)
75
+ return AST::RubyNode.new(
76
+ @lexer.source_buffer.source[ruby_start..-1]
77
+ )
78
+ end
79
+
80
+ if pos_of(current).begin_pos != ruby_start
81
+ AST::RubyNode.new(
82
+ @lexer.source_buffer.source[ruby_start...(pos_of(current).end_pos - 1)]
83
+ )
84
+ end
85
+ end
86
+
87
+ def tag
88
+ consume(:tRUX_TAG_OPEN_START)
89
+ tag_name = text_of(current)
90
+ tag_pos = pos_of(current)
91
+ consume(:tRUX_TAG_OPEN, :tRUX_TAG_SELF_CLOSING)
92
+ maybe_consume(:tRUX_ATTRIBUTE_SPACES)
93
+ attrs = attributes
94
+ maybe_consume(:tRUX_ATTRIBUTE_SPACES)
95
+ maybe_consume(:tRUX_TAG_OPEN_END)
96
+ tag_node = AST::TagNode.new(tag_name, attrs)
97
+
98
+ if is?(:tRUX_TAG_SELF_CLOSING_END)
99
+ consume(:tRUX_TAG_SELF_CLOSING_END)
100
+ return tag_node
101
+ end
102
+
103
+ @stack.push(tag_name)
104
+
105
+ until is?(:tRUX_TAG_CLOSE_START)
106
+ if is?(:tRUX_LITERAL, :tRUX_LITERAL_RUBY_CODE_START)
107
+ lit = literal
108
+ tag_node.children << lit if lit
109
+ else
110
+ tag_node.children << tag
111
+ end
112
+ end
113
+
114
+ consume(:tRUX_TAG_CLOSE_START)
115
+
116
+ closing_tag_name = text_of(current)
117
+
118
+ if @stack.last != closing_tag_name
119
+ closing_tag_pos = pos_of(current)
120
+
121
+ raise TagMismatchError, "closing tag '#{closing_tag_name}' on line "\
122
+ "#{closing_tag_pos.line} did not match opening tag '#{tag_name}' "\
123
+ "on line #{tag_pos.line}"
124
+ end
125
+
126
+ @stack.pop
127
+
128
+ consume(:tRUX_TAG_CLOSE)
129
+ consume(:tRUX_TAG_CLOSE_END)
130
+
131
+ tag_node
132
+ end
133
+
134
+ def attributes
135
+ {}.tap do |attrs|
136
+ while is?(:tRUX_ATTRIBUTE_NAME)
137
+ key, value = attribute
138
+ attrs[key] = value
139
+
140
+ maybe_consume(:tRUX_ATTRIBUTE_SPACES)
141
+ end
142
+ end
143
+ end
144
+
145
+ def attribute
146
+ maybe_consume(:tRUX_ATTRIBUTE_SPACES)
147
+ attr_name = text_of(current)
148
+ consume(:tRUX_ATTRIBUTE_NAME)
149
+ maybe_consume(:tRUX_ATTRIBUTE_EQUALS_SPACES)
150
+
151
+ attr_value = if maybe_consume(:tRUX_ATTRIBUTE_EQUALS)
152
+ maybe_consume(:tRUX_ATTRIBUTE_VALUE_SPACES)
153
+ attribute_value
154
+ else
155
+ # if no equals sign, assume boolean attribute
156
+ AST::StringNode.new("\"true\"")
157
+ end
158
+
159
+ [attr_name, attr_value]
160
+ end
161
+
162
+ def attribute_value
163
+ if is?(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_START)
164
+ attr_ruby_code
165
+ else
166
+ AST::StringNode.new(text_of(current)).tap do
167
+ consume(:tRUX_ATTRIBUTE_VALUE)
168
+ end
169
+ end
170
+ end
171
+
172
+ def attr_ruby_code
173
+ consume(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_START)
174
+
175
+ ruby.tap do
176
+ consume(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_END)
177
+ end
178
+ end
179
+
180
+ def literal
181
+ if is?(:tRUX_LITERAL_RUBY_CODE_START)
182
+ literal_ruby_code
183
+ else
184
+ lit = squeeze_lit(text_of(current))
185
+ consume(:tRUX_LITERAL)
186
+ AST::TextNode.new(lit) unless lit.empty?
187
+ end
188
+ end
189
+
190
+ def squeeze_lit(lit)
191
+ lit.gsub(/\s/, ' ').squeeze(' ')
192
+ end
193
+
194
+ def literal_ruby_code
195
+ consume(:tRUX_LITERAL_RUBY_CODE_START)
196
+
197
+ parse.tap do |res|
198
+ consume(:tRUX_LITERAL_RUBY_CODE_END)
199
+ end
200
+ end
201
+
202
+ def current
203
+ @current
204
+ end
205
+
206
+ def is?(*types)
207
+ types.include?(type_of(current))
208
+ end
209
+
210
+ def maybe_consume(type)
211
+ if type_of(current) == type
212
+ @current = get_next
213
+ true
214
+ else
215
+ false
216
+ end
217
+ end
218
+
219
+ def consume(*types)
220
+ if !types.include?(type_of(current))
221
+ raise UnexpectedTokenError,
222
+ "expected [#{types.map(&:to_s).join(', ')}], got '#{type_of(current)}'"
223
+ end
224
+
225
+ @current = get_next
226
+ end
227
+
228
+ def type_of(token)
229
+ token[0]
230
+ end
231
+
232
+ def text_of(token)
233
+ token[1][0]
234
+ end
235
+
236
+ def pos_of(token)
237
+ token[1][1]
238
+ end
239
+
240
+ def get_next
241
+ @lexer.advance
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,143 @@
1
+ module Rux
2
+ class RubyLexer < ::Parser::Lexer
3
+ # These are populated when ::Parser::Lexer loads and are therefore
4
+ # not inherited. We have to copy them over manually.
5
+ ::Parser::Lexer.instance_variables.each do |ivar|
6
+ instance_variable_set(ivar, ::Parser::Lexer.instance_variable_get(ivar))
7
+ end
8
+
9
+ LOOKAHEAD = 3
10
+
11
+ def initialize(source_buffer, init_pos)
12
+ super(ruby_version)
13
+
14
+ self.source_buffer = source_buffer
15
+ @generator = to_enum(:each_token)
16
+ @rux_token_queue = []
17
+ @p = init_pos
18
+ end
19
+
20
+ alias_method :advance_orig, :advance
21
+
22
+ def advance
23
+ @generator.next
24
+ end
25
+
26
+ def reset_to(pos)
27
+ @ts = @te = @p = pos
28
+ @eof = false
29
+ @rux_token_queue.clear
30
+ populate_queue
31
+ end
32
+
33
+ def next_lexer(pos)
34
+ RuxLexer.new(@source_buffer, pos)
35
+ end
36
+
37
+ private
38
+
39
+ def ruby_version
40
+ @ruby_version ||= RUBY_VERSION
41
+ .split('.')[0..-2]
42
+ .join('')
43
+ .to_i
44
+ end
45
+
46
+ def each_token(&block)
47
+ # We detect whether or not we're at the beginning of a rux tag by looking
48
+ # ahead by 1 token; that's why the first element in @rux_token_queue is
49
+ # yielded immediately. If the lexer _starts_ at a rux tag however,
50
+ # lookahead is a lot more difficult. To mitigate, we insert a dummy skip
51
+ # token here. That way, at_rux? checks the right tokens in the queue and
52
+ # correctly identifies the start of a rux tag.
53
+ @rux_token_queue << [:tSKIP, ['$skip', make_range(@p, @p)]]
54
+
55
+ @eof = false
56
+ curlies = 1
57
+ populate_queue
58
+
59
+ until @rux_token_queue.empty?
60
+ if at_rux?
61
+ yield @rux_token_queue.shift
62
+
63
+ @eof = true
64
+ _, (_, pos) = @rux_token_queue[0]
65
+
66
+ # @eof is set to false by reset_to above, which is called after
67
+ # popping the previous lexer off the lexer stack (see lexer.rb)
68
+ while @eof
69
+ yield [nil, ['$eof', pos]]
70
+ end
71
+ end
72
+
73
+ token = @rux_token_queue.shift
74
+ type, (_, pos) = token
75
+
76
+ case type
77
+ when :tLCURLY, :tLBRACE
78
+ curlies += 1
79
+ when :tRCURLY, :tRBRACE
80
+ curlies -= 1
81
+ end
82
+
83
+ # if curlies are balanced, we're done lexing ruby code, so yield a
84
+ # reset token to tell the system where we stopped, then break to stop
85
+ # our enumerator (will raise a StopIteration)
86
+ if curlies == 0
87
+ yield [:tRESET, ['$eof', pos]]
88
+ break
89
+ end
90
+
91
+ yield token
92
+
93
+ populate_queue
94
+ end
95
+ end
96
+
97
+ def populate_queue
98
+ until @rux_token_queue.size >= LOOKAHEAD
99
+ begin
100
+ cur_token = advance_orig
101
+ rescue NoMethodError
102
+ # Internal lexer errors can happen since we're asking the ruby lexer
103
+ # to start at an arbitrary position inside the source buffer. It may
104
+ # encounter foreign rux tokens it's not expecting, etc. Best to stop
105
+ # trying to look ahead and call it quits.
106
+ break
107
+ end
108
+
109
+ break unless cur_token[0]
110
+ @rux_token_queue << cur_token
111
+ end
112
+ end
113
+
114
+ def at_rux?
115
+ at_lt? && !at_inheritance?
116
+ end
117
+
118
+ def at_lt?
119
+ is?(@rux_token_queue[1], :tLT) && (
120
+ is?(@rux_token_queue[2], :tCONSTANT) ||
121
+ is?(@rux_token_queue[2], :tIDENTIFIER)
122
+ )
123
+ end
124
+
125
+ def at_inheritance?
126
+ is?(@rux_token_queue[0], :tCONSTANT) &&
127
+ is?(@rux_token_queue[1], :tLT) &&
128
+ is?(@rux_token_queue[2], :tCONSTANT)
129
+ end
130
+
131
+ def is?(tok, sym)
132
+ tok && tok[0] == sym
133
+ end
134
+
135
+ def is_not?(tok, sym)
136
+ tok && tok[0] != sym
137
+ end
138
+
139
+ def make_range(start, stop)
140
+ ::Parser::Source::Range.new(@source_buffer, start, stop)
141
+ end
142
+ end
143
+ end