rux 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +9 -0
- data/LICENSE +21 -0
- data/README.md +310 -0
- data/Rakefile +14 -0
- data/bin/ruxc +96 -0
- data/lib/rux.rb +73 -0
- data/lib/rux/ast.rb +9 -0
- data/lib/rux/ast/list_node.rb +15 -0
- data/lib/rux/ast/ruby_node.rb +15 -0
- data/lib/rux/ast/string_node.rb +15 -0
- data/lib/rux/ast/tag_node.rb +17 -0
- data/lib/rux/ast/text_node.rb +17 -0
- data/lib/rux/buffer.rb +15 -0
- data/lib/rux/default_tag_builder.rb +20 -0
- data/lib/rux/default_visitor.rb +67 -0
- data/lib/rux/file.rb +27 -0
- data/lib/rux/lex.rb +9 -0
- data/lib/rux/lex/patterns.rb +41 -0
- data/lib/rux/lex/state.rb +33 -0
- data/lib/rux/lex/states.csv +39 -0
- data/lib/rux/lex/transition.rb +22 -0
- data/lib/rux/lexer.rb +64 -0
- data/lib/rux/parser.rb +244 -0
- data/lib/rux/ruby_lexer.rb +143 -0
- data/lib/rux/rux_lexer.rb +157 -0
- data/lib/rux/utils.rb +15 -0
- data/lib/rux/version.rb +3 -0
- data/lib/rux/visitor.rb +33 -0
- data/rux.gemspec +20 -0
- data/spec/parser_spec.rb +229 -0
- data/spec/spec_helper.rb +6 -0
- metadata +102 -0
data/lib/rux/lexer.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
module Rux
|
2
|
+
class Lexer
|
3
|
+
class EOFError < StandardError; end
|
4
|
+
class TransitionError < StandardError; end
|
5
|
+
|
6
|
+
attr_reader :source_buffer
|
7
|
+
|
8
|
+
def initialize(source_buffer)
|
9
|
+
@source_buffer = source_buffer
|
10
|
+
@stack = [RubyLexer.new(source_buffer, 0)]
|
11
|
+
@generator = to_enum(:each_token)
|
12
|
+
end
|
13
|
+
|
14
|
+
def advance
|
15
|
+
@generator.next
|
16
|
+
rescue StopIteration
|
17
|
+
[nil, ['$eof']]
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def each_token
|
23
|
+
@p = 0
|
24
|
+
|
25
|
+
while true
|
26
|
+
begin
|
27
|
+
token = current.advance
|
28
|
+
rescue StopIteration
|
29
|
+
# This error means the current lexer has run it's course and should
|
30
|
+
# be considered finished. The lexer should have already yielded a
|
31
|
+
# :tRESET token to position the previous lexer (i.e. the one
|
32
|
+
# logically before it in the stack) at the place it left off.
|
33
|
+
@stack.pop
|
34
|
+
break unless current # no current lexer means we're done
|
35
|
+
current.reset_to(@p)
|
36
|
+
next
|
37
|
+
end
|
38
|
+
|
39
|
+
type, (_, pos) = token
|
40
|
+
break unless pos
|
41
|
+
|
42
|
+
unless type
|
43
|
+
@stack.push(current.next_lexer(pos.begin_pos))
|
44
|
+
next
|
45
|
+
end
|
46
|
+
|
47
|
+
case type
|
48
|
+
when :tRESET
|
49
|
+
@p = pos.begin_pos
|
50
|
+
when :tSKIP
|
51
|
+
next
|
52
|
+
else
|
53
|
+
yield token
|
54
|
+
|
55
|
+
@p = pos.end_pos
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def current
|
61
|
+
@stack.last
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/rux/parser.rb
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
require 'parser'
|
2
|
+
|
3
|
+
module Rux
|
4
|
+
class Parser
|
5
|
+
class UnexpectedTokenError < StandardError; end
|
6
|
+
class TagMismatchError < StandardError; end
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def parse_file(path)
|
10
|
+
buffer = ::Parser::Source::Buffer.new(path).read
|
11
|
+
lexer = ::Rux::Lexer.new(buffer)
|
12
|
+
new(lexer).parse
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse(str)
|
16
|
+
buffer = ::Parser::Source::Buffer.new('(source)', source: str)
|
17
|
+
lexer = ::Rux::Lexer.new(buffer)
|
18
|
+
new(lexer).parse
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(lexer)
|
23
|
+
@lexer = lexer
|
24
|
+
@stack = []
|
25
|
+
@current = get_next
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse
|
29
|
+
curlies = 1
|
30
|
+
children = []
|
31
|
+
|
32
|
+
loop do
|
33
|
+
type = type_of(current)
|
34
|
+
break unless type
|
35
|
+
|
36
|
+
case type
|
37
|
+
when :tLCURLY, :tLBRACE, :tRUX_LITERAL_RUBY_CODE_START
|
38
|
+
curlies += 1
|
39
|
+
when :tRCURLY, :tRBRACE, :tRUX_LITERAL_RUBY_CODE_END
|
40
|
+
curlies -= 1
|
41
|
+
end
|
42
|
+
|
43
|
+
break if curlies == 0
|
44
|
+
|
45
|
+
if rb = ruby
|
46
|
+
children << rb
|
47
|
+
elsif type_of(current) == :tRUX_TAG_OPEN_START
|
48
|
+
children << tag
|
49
|
+
else
|
50
|
+
raise UnexpectedTokenError,
|
51
|
+
'expected ruby code or the start of a rux tag but found '\
|
52
|
+
"#{type_of(current)} instead"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
AST::ListNode.new(children)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def ruby
|
62
|
+
ruby_start = pos_of(current).begin_pos
|
63
|
+
|
64
|
+
loop do
|
65
|
+
type = type_of(current)
|
66
|
+
|
67
|
+
if type.nil? || RuxLexer.state_table.include?(type_of(current))
|
68
|
+
break
|
69
|
+
end
|
70
|
+
|
71
|
+
consume(type_of(current))
|
72
|
+
end
|
73
|
+
|
74
|
+
unless type_of(current)
|
75
|
+
return AST::RubyNode.new(
|
76
|
+
@lexer.source_buffer.source[ruby_start..-1]
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
if pos_of(current).begin_pos != ruby_start
|
81
|
+
AST::RubyNode.new(
|
82
|
+
@lexer.source_buffer.source[ruby_start...(pos_of(current).end_pos - 1)]
|
83
|
+
)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def tag
|
88
|
+
consume(:tRUX_TAG_OPEN_START)
|
89
|
+
tag_name = text_of(current)
|
90
|
+
tag_pos = pos_of(current)
|
91
|
+
consume(:tRUX_TAG_OPEN, :tRUX_TAG_SELF_CLOSING)
|
92
|
+
maybe_consume(:tRUX_ATTRIBUTE_SPACES)
|
93
|
+
attrs = attributes
|
94
|
+
maybe_consume(:tRUX_ATTRIBUTE_SPACES)
|
95
|
+
maybe_consume(:tRUX_TAG_OPEN_END)
|
96
|
+
tag_node = AST::TagNode.new(tag_name, attrs)
|
97
|
+
|
98
|
+
if is?(:tRUX_TAG_SELF_CLOSING_END)
|
99
|
+
consume(:tRUX_TAG_SELF_CLOSING_END)
|
100
|
+
return tag_node
|
101
|
+
end
|
102
|
+
|
103
|
+
@stack.push(tag_name)
|
104
|
+
|
105
|
+
until is?(:tRUX_TAG_CLOSE_START)
|
106
|
+
if is?(:tRUX_LITERAL, :tRUX_LITERAL_RUBY_CODE_START)
|
107
|
+
lit = literal
|
108
|
+
tag_node.children << lit if lit
|
109
|
+
else
|
110
|
+
tag_node.children << tag
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
consume(:tRUX_TAG_CLOSE_START)
|
115
|
+
|
116
|
+
closing_tag_name = text_of(current)
|
117
|
+
|
118
|
+
if @stack.last != closing_tag_name
|
119
|
+
closing_tag_pos = pos_of(current)
|
120
|
+
|
121
|
+
raise TagMismatchError, "closing tag '#{closing_tag_name}' on line "\
|
122
|
+
"#{closing_tag_pos.line} did not match opening tag '#{tag_name}' "\
|
123
|
+
"on line #{tag_pos.line}"
|
124
|
+
end
|
125
|
+
|
126
|
+
@stack.pop
|
127
|
+
|
128
|
+
consume(:tRUX_TAG_CLOSE)
|
129
|
+
consume(:tRUX_TAG_CLOSE_END)
|
130
|
+
|
131
|
+
tag_node
|
132
|
+
end
|
133
|
+
|
134
|
+
def attributes
|
135
|
+
{}.tap do |attrs|
|
136
|
+
while is?(:tRUX_ATTRIBUTE_NAME)
|
137
|
+
key, value = attribute
|
138
|
+
attrs[key] = value
|
139
|
+
|
140
|
+
maybe_consume(:tRUX_ATTRIBUTE_SPACES)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def attribute
|
146
|
+
maybe_consume(:tRUX_ATTRIBUTE_SPACES)
|
147
|
+
attr_name = text_of(current)
|
148
|
+
consume(:tRUX_ATTRIBUTE_NAME)
|
149
|
+
maybe_consume(:tRUX_ATTRIBUTE_EQUALS_SPACES)
|
150
|
+
|
151
|
+
attr_value = if maybe_consume(:tRUX_ATTRIBUTE_EQUALS)
|
152
|
+
maybe_consume(:tRUX_ATTRIBUTE_VALUE_SPACES)
|
153
|
+
attribute_value
|
154
|
+
else
|
155
|
+
# if no equals sign, assume boolean attribute
|
156
|
+
AST::StringNode.new("\"true\"")
|
157
|
+
end
|
158
|
+
|
159
|
+
[attr_name, attr_value]
|
160
|
+
end
|
161
|
+
|
162
|
+
def attribute_value
|
163
|
+
if is?(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_START)
|
164
|
+
attr_ruby_code
|
165
|
+
else
|
166
|
+
AST::StringNode.new(text_of(current)).tap do
|
167
|
+
consume(:tRUX_ATTRIBUTE_VALUE)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def attr_ruby_code
|
173
|
+
consume(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_START)
|
174
|
+
|
175
|
+
ruby.tap do
|
176
|
+
consume(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_END)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def literal
|
181
|
+
if is?(:tRUX_LITERAL_RUBY_CODE_START)
|
182
|
+
literal_ruby_code
|
183
|
+
else
|
184
|
+
lit = squeeze_lit(text_of(current))
|
185
|
+
consume(:tRUX_LITERAL)
|
186
|
+
AST::TextNode.new(lit) unless lit.empty?
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def squeeze_lit(lit)
|
191
|
+
lit.gsub(/\s/, ' ').squeeze(' ')
|
192
|
+
end
|
193
|
+
|
194
|
+
def literal_ruby_code
|
195
|
+
consume(:tRUX_LITERAL_RUBY_CODE_START)
|
196
|
+
|
197
|
+
parse.tap do |res|
|
198
|
+
consume(:tRUX_LITERAL_RUBY_CODE_END)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def current
|
203
|
+
@current
|
204
|
+
end
|
205
|
+
|
206
|
+
def is?(*types)
|
207
|
+
types.include?(type_of(current))
|
208
|
+
end
|
209
|
+
|
210
|
+
def maybe_consume(type)
|
211
|
+
if type_of(current) == type
|
212
|
+
@current = get_next
|
213
|
+
true
|
214
|
+
else
|
215
|
+
false
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def consume(*types)
|
220
|
+
if !types.include?(type_of(current))
|
221
|
+
raise UnexpectedTokenError,
|
222
|
+
"expected [#{types.map(&:to_s).join(', ')}], got '#{type_of(current)}'"
|
223
|
+
end
|
224
|
+
|
225
|
+
@current = get_next
|
226
|
+
end
|
227
|
+
|
228
|
+
def type_of(token)
|
229
|
+
token[0]
|
230
|
+
end
|
231
|
+
|
232
|
+
def text_of(token)
|
233
|
+
token[1][0]
|
234
|
+
end
|
235
|
+
|
236
|
+
def pos_of(token)
|
237
|
+
token[1][1]
|
238
|
+
end
|
239
|
+
|
240
|
+
def get_next
|
241
|
+
@lexer.advance
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
module Rux
|
2
|
+
class RubyLexer < ::Parser::Lexer
|
3
|
+
# These are populated when ::Parser::Lexer loads and are therefore
|
4
|
+
# not inherited. We have to copy them over manually.
|
5
|
+
::Parser::Lexer.instance_variables.each do |ivar|
|
6
|
+
instance_variable_set(ivar, ::Parser::Lexer.instance_variable_get(ivar))
|
7
|
+
end
|
8
|
+
|
9
|
+
LOOKAHEAD = 3
|
10
|
+
|
11
|
+
def initialize(source_buffer, init_pos)
|
12
|
+
super(ruby_version)
|
13
|
+
|
14
|
+
self.source_buffer = source_buffer
|
15
|
+
@generator = to_enum(:each_token)
|
16
|
+
@rux_token_queue = []
|
17
|
+
@p = init_pos
|
18
|
+
end
|
19
|
+
|
20
|
+
alias_method :advance_orig, :advance
|
21
|
+
|
22
|
+
def advance
|
23
|
+
@generator.next
|
24
|
+
end
|
25
|
+
|
26
|
+
def reset_to(pos)
|
27
|
+
@ts = @te = @p = pos
|
28
|
+
@eof = false
|
29
|
+
@rux_token_queue.clear
|
30
|
+
populate_queue
|
31
|
+
end
|
32
|
+
|
33
|
+
def next_lexer(pos)
|
34
|
+
RuxLexer.new(@source_buffer, pos)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def ruby_version
|
40
|
+
@ruby_version ||= RUBY_VERSION
|
41
|
+
.split('.')[0..-2]
|
42
|
+
.join('')
|
43
|
+
.to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
def each_token(&block)
|
47
|
+
# We detect whether or not we're at the beginning of a rux tag by looking
|
48
|
+
# ahead by 1 token; that's why the first element in @rux_token_queue is
|
49
|
+
# yielded immediately. If the lexer _starts_ at a rux tag however,
|
50
|
+
# lookahead is a lot more difficult. To mitigate, we insert a dummy skip
|
51
|
+
# token here. That way, at_rux? checks the right tokens in the queue and
|
52
|
+
# correctly identifies the start of a rux tag.
|
53
|
+
@rux_token_queue << [:tSKIP, ['$skip', make_range(@p, @p)]]
|
54
|
+
|
55
|
+
@eof = false
|
56
|
+
curlies = 1
|
57
|
+
populate_queue
|
58
|
+
|
59
|
+
until @rux_token_queue.empty?
|
60
|
+
if at_rux?
|
61
|
+
yield @rux_token_queue.shift
|
62
|
+
|
63
|
+
@eof = true
|
64
|
+
_, (_, pos) = @rux_token_queue[0]
|
65
|
+
|
66
|
+
# @eof is set to false by reset_to above, which is called after
|
67
|
+
# popping the previous lexer off the lexer stack (see lexer.rb)
|
68
|
+
while @eof
|
69
|
+
yield [nil, ['$eof', pos]]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
token = @rux_token_queue.shift
|
74
|
+
type, (_, pos) = token
|
75
|
+
|
76
|
+
case type
|
77
|
+
when :tLCURLY, :tLBRACE
|
78
|
+
curlies += 1
|
79
|
+
when :tRCURLY, :tRBRACE
|
80
|
+
curlies -= 1
|
81
|
+
end
|
82
|
+
|
83
|
+
# if curlies are balanced, we're done lexing ruby code, so yield a
|
84
|
+
# reset token to tell the system where we stopped, then break to stop
|
85
|
+
# our enumerator (will raise a StopIteration)
|
86
|
+
if curlies == 0
|
87
|
+
yield [:tRESET, ['$eof', pos]]
|
88
|
+
break
|
89
|
+
end
|
90
|
+
|
91
|
+
yield token
|
92
|
+
|
93
|
+
populate_queue
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def populate_queue
|
98
|
+
until @rux_token_queue.size >= LOOKAHEAD
|
99
|
+
begin
|
100
|
+
cur_token = advance_orig
|
101
|
+
rescue NoMethodError
|
102
|
+
# Internal lexer errors can happen since we're asking the ruby lexer
|
103
|
+
# to start at an arbitrary position inside the source buffer. It may
|
104
|
+
# encounter foreign rux tokens it's not expecting, etc. Best to stop
|
105
|
+
# trying to look ahead and call it quits.
|
106
|
+
break
|
107
|
+
end
|
108
|
+
|
109
|
+
break unless cur_token[0]
|
110
|
+
@rux_token_queue << cur_token
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def at_rux?
|
115
|
+
at_lt? && !at_inheritance?
|
116
|
+
end
|
117
|
+
|
118
|
+
def at_lt?
|
119
|
+
is?(@rux_token_queue[1], :tLT) && (
|
120
|
+
is?(@rux_token_queue[2], :tCONSTANT) ||
|
121
|
+
is?(@rux_token_queue[2], :tIDENTIFIER)
|
122
|
+
)
|
123
|
+
end
|
124
|
+
|
125
|
+
def at_inheritance?
|
126
|
+
is?(@rux_token_queue[0], :tCONSTANT) &&
|
127
|
+
is?(@rux_token_queue[1], :tLT) &&
|
128
|
+
is?(@rux_token_queue[2], :tCONSTANT)
|
129
|
+
end
|
130
|
+
|
131
|
+
def is?(tok, sym)
|
132
|
+
tok && tok[0] == sym
|
133
|
+
end
|
134
|
+
|
135
|
+
def is_not?(tok, sym)
|
136
|
+
tok && tok[0] != sym
|
137
|
+
end
|
138
|
+
|
139
|
+
def make_range(start, stop)
|
140
|
+
::Parser::Source::Range.new(@source_buffer, start, stop)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|