rux 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +9 -0
- data/LICENSE +21 -0
- data/README.md +310 -0
- data/Rakefile +14 -0
- data/bin/ruxc +96 -0
- data/lib/rux.rb +73 -0
- data/lib/rux/ast.rb +9 -0
- data/lib/rux/ast/list_node.rb +15 -0
- data/lib/rux/ast/ruby_node.rb +15 -0
- data/lib/rux/ast/string_node.rb +15 -0
- data/lib/rux/ast/tag_node.rb +17 -0
- data/lib/rux/ast/text_node.rb +17 -0
- data/lib/rux/buffer.rb +15 -0
- data/lib/rux/default_tag_builder.rb +20 -0
- data/lib/rux/default_visitor.rb +67 -0
- data/lib/rux/file.rb +27 -0
- data/lib/rux/lex.rb +9 -0
- data/lib/rux/lex/patterns.rb +41 -0
- data/lib/rux/lex/state.rb +33 -0
- data/lib/rux/lex/states.csv +39 -0
- data/lib/rux/lex/transition.rb +22 -0
- data/lib/rux/lexer.rb +64 -0
- data/lib/rux/parser.rb +244 -0
- data/lib/rux/ruby_lexer.rb +143 -0
- data/lib/rux/rux_lexer.rb +157 -0
- data/lib/rux/utils.rb +15 -0
- data/lib/rux/version.rb +3 -0
- data/lib/rux/visitor.rb +33 -0
- data/rux.gemspec +20 -0
- data/spec/parser_spec.rb +229 -0
- data/spec/spec_helper.rb +6 -0
- metadata +102 -0
data/lib/rux/lexer.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
module Rux
|
2
|
+
class Lexer
|
3
|
+
class EOFError < StandardError; end
|
4
|
+
class TransitionError < StandardError; end
|
5
|
+
|
6
|
+
attr_reader :source_buffer
|
7
|
+
|
8
|
+
def initialize(source_buffer)
|
9
|
+
@source_buffer = source_buffer
|
10
|
+
@stack = [RubyLexer.new(source_buffer, 0)]
|
11
|
+
@generator = to_enum(:each_token)
|
12
|
+
end
|
13
|
+
|
14
|
+
def advance
|
15
|
+
@generator.next
|
16
|
+
rescue StopIteration
|
17
|
+
[nil, ['$eof']]
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def each_token
|
23
|
+
@p = 0
|
24
|
+
|
25
|
+
while true
|
26
|
+
begin
|
27
|
+
token = current.advance
|
28
|
+
rescue StopIteration
|
29
|
+
# This error means the current lexer has run it's course and should
|
30
|
+
# be considered finished. The lexer should have already yielded a
|
31
|
+
# :tRESET token to position the previous lexer (i.e. the one
|
32
|
+
# logically before it in the stack) at the place it left off.
|
33
|
+
@stack.pop
|
34
|
+
break unless current # no current lexer means we're done
|
35
|
+
current.reset_to(@p)
|
36
|
+
next
|
37
|
+
end
|
38
|
+
|
39
|
+
type, (_, pos) = token
|
40
|
+
break unless pos
|
41
|
+
|
42
|
+
unless type
|
43
|
+
@stack.push(current.next_lexer(pos.begin_pos))
|
44
|
+
next
|
45
|
+
end
|
46
|
+
|
47
|
+
case type
|
48
|
+
when :tRESET
|
49
|
+
@p = pos.begin_pos
|
50
|
+
when :tSKIP
|
51
|
+
next
|
52
|
+
else
|
53
|
+
yield token
|
54
|
+
|
55
|
+
@p = pos.end_pos
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def current
|
61
|
+
@stack.last
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/rux/parser.rb
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
require 'parser'
|
2
|
+
|
3
|
+
module Rux
|
4
|
+
class Parser
|
5
|
+
class UnexpectedTokenError < StandardError; end
|
6
|
+
class TagMismatchError < StandardError; end
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def parse_file(path)
|
10
|
+
buffer = ::Parser::Source::Buffer.new(path).read
|
11
|
+
lexer = ::Rux::Lexer.new(buffer)
|
12
|
+
new(lexer).parse
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse(str)
|
16
|
+
buffer = ::Parser::Source::Buffer.new('(source)', source: str)
|
17
|
+
lexer = ::Rux::Lexer.new(buffer)
|
18
|
+
new(lexer).parse
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(lexer)
|
23
|
+
@lexer = lexer
|
24
|
+
@stack = []
|
25
|
+
@current = get_next
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse
|
29
|
+
curlies = 1
|
30
|
+
children = []
|
31
|
+
|
32
|
+
loop do
|
33
|
+
type = type_of(current)
|
34
|
+
break unless type
|
35
|
+
|
36
|
+
case type
|
37
|
+
when :tLCURLY, :tLBRACE, :tRUX_LITERAL_RUBY_CODE_START
|
38
|
+
curlies += 1
|
39
|
+
when :tRCURLY, :tRBRACE, :tRUX_LITERAL_RUBY_CODE_END
|
40
|
+
curlies -= 1
|
41
|
+
end
|
42
|
+
|
43
|
+
break if curlies == 0
|
44
|
+
|
45
|
+
if rb = ruby
|
46
|
+
children << rb
|
47
|
+
elsif type_of(current) == :tRUX_TAG_OPEN_START
|
48
|
+
children << tag
|
49
|
+
else
|
50
|
+
raise UnexpectedTokenError,
|
51
|
+
'expected ruby code or the start of a rux tag but found '\
|
52
|
+
"#{type_of(current)} instead"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
AST::ListNode.new(children)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def ruby
|
62
|
+
ruby_start = pos_of(current).begin_pos
|
63
|
+
|
64
|
+
loop do
|
65
|
+
type = type_of(current)
|
66
|
+
|
67
|
+
if type.nil? || RuxLexer.state_table.include?(type_of(current))
|
68
|
+
break
|
69
|
+
end
|
70
|
+
|
71
|
+
consume(type_of(current))
|
72
|
+
end
|
73
|
+
|
74
|
+
unless type_of(current)
|
75
|
+
return AST::RubyNode.new(
|
76
|
+
@lexer.source_buffer.source[ruby_start..-1]
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
if pos_of(current).begin_pos != ruby_start
|
81
|
+
AST::RubyNode.new(
|
82
|
+
@lexer.source_buffer.source[ruby_start...(pos_of(current).end_pos - 1)]
|
83
|
+
)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def tag
|
88
|
+
consume(:tRUX_TAG_OPEN_START)
|
89
|
+
tag_name = text_of(current)
|
90
|
+
tag_pos = pos_of(current)
|
91
|
+
consume(:tRUX_TAG_OPEN, :tRUX_TAG_SELF_CLOSING)
|
92
|
+
maybe_consume(:tRUX_ATTRIBUTE_SPACES)
|
93
|
+
attrs = attributes
|
94
|
+
maybe_consume(:tRUX_ATTRIBUTE_SPACES)
|
95
|
+
maybe_consume(:tRUX_TAG_OPEN_END)
|
96
|
+
tag_node = AST::TagNode.new(tag_name, attrs)
|
97
|
+
|
98
|
+
if is?(:tRUX_TAG_SELF_CLOSING_END)
|
99
|
+
consume(:tRUX_TAG_SELF_CLOSING_END)
|
100
|
+
return tag_node
|
101
|
+
end
|
102
|
+
|
103
|
+
@stack.push(tag_name)
|
104
|
+
|
105
|
+
until is?(:tRUX_TAG_CLOSE_START)
|
106
|
+
if is?(:tRUX_LITERAL, :tRUX_LITERAL_RUBY_CODE_START)
|
107
|
+
lit = literal
|
108
|
+
tag_node.children << lit if lit
|
109
|
+
else
|
110
|
+
tag_node.children << tag
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
consume(:tRUX_TAG_CLOSE_START)
|
115
|
+
|
116
|
+
closing_tag_name = text_of(current)
|
117
|
+
|
118
|
+
if @stack.last != closing_tag_name
|
119
|
+
closing_tag_pos = pos_of(current)
|
120
|
+
|
121
|
+
raise TagMismatchError, "closing tag '#{closing_tag_name}' on line "\
|
122
|
+
"#{closing_tag_pos.line} did not match opening tag '#{tag_name}' "\
|
123
|
+
"on line #{tag_pos.line}"
|
124
|
+
end
|
125
|
+
|
126
|
+
@stack.pop
|
127
|
+
|
128
|
+
consume(:tRUX_TAG_CLOSE)
|
129
|
+
consume(:tRUX_TAG_CLOSE_END)
|
130
|
+
|
131
|
+
tag_node
|
132
|
+
end
|
133
|
+
|
134
|
+
def attributes
|
135
|
+
{}.tap do |attrs|
|
136
|
+
while is?(:tRUX_ATTRIBUTE_NAME)
|
137
|
+
key, value = attribute
|
138
|
+
attrs[key] = value
|
139
|
+
|
140
|
+
maybe_consume(:tRUX_ATTRIBUTE_SPACES)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def attribute
|
146
|
+
maybe_consume(:tRUX_ATTRIBUTE_SPACES)
|
147
|
+
attr_name = text_of(current)
|
148
|
+
consume(:tRUX_ATTRIBUTE_NAME)
|
149
|
+
maybe_consume(:tRUX_ATTRIBUTE_EQUALS_SPACES)
|
150
|
+
|
151
|
+
attr_value = if maybe_consume(:tRUX_ATTRIBUTE_EQUALS)
|
152
|
+
maybe_consume(:tRUX_ATTRIBUTE_VALUE_SPACES)
|
153
|
+
attribute_value
|
154
|
+
else
|
155
|
+
# if no equals sign, assume boolean attribute
|
156
|
+
AST::StringNode.new("\"true\"")
|
157
|
+
end
|
158
|
+
|
159
|
+
[attr_name, attr_value]
|
160
|
+
end
|
161
|
+
|
162
|
+
def attribute_value
|
163
|
+
if is?(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_START)
|
164
|
+
attr_ruby_code
|
165
|
+
else
|
166
|
+
AST::StringNode.new(text_of(current)).tap do
|
167
|
+
consume(:tRUX_ATTRIBUTE_VALUE)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def attr_ruby_code
|
173
|
+
consume(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_START)
|
174
|
+
|
175
|
+
ruby.tap do
|
176
|
+
consume(:tRUX_ATTRIBUTE_VALUE_RUBY_CODE_END)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def literal
|
181
|
+
if is?(:tRUX_LITERAL_RUBY_CODE_START)
|
182
|
+
literal_ruby_code
|
183
|
+
else
|
184
|
+
lit = squeeze_lit(text_of(current))
|
185
|
+
consume(:tRUX_LITERAL)
|
186
|
+
AST::TextNode.new(lit) unless lit.empty?
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def squeeze_lit(lit)
|
191
|
+
lit.gsub(/\s/, ' ').squeeze(' ')
|
192
|
+
end
|
193
|
+
|
194
|
+
def literal_ruby_code
|
195
|
+
consume(:tRUX_LITERAL_RUBY_CODE_START)
|
196
|
+
|
197
|
+
parse.tap do |res|
|
198
|
+
consume(:tRUX_LITERAL_RUBY_CODE_END)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def current
|
203
|
+
@current
|
204
|
+
end
|
205
|
+
|
206
|
+
def is?(*types)
|
207
|
+
types.include?(type_of(current))
|
208
|
+
end
|
209
|
+
|
210
|
+
def maybe_consume(type)
|
211
|
+
if type_of(current) == type
|
212
|
+
@current = get_next
|
213
|
+
true
|
214
|
+
else
|
215
|
+
false
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def consume(*types)
|
220
|
+
if !types.include?(type_of(current))
|
221
|
+
raise UnexpectedTokenError,
|
222
|
+
"expected [#{types.map(&:to_s).join(', ')}], got '#{type_of(current)}'"
|
223
|
+
end
|
224
|
+
|
225
|
+
@current = get_next
|
226
|
+
end
|
227
|
+
|
228
|
+
def type_of(token)
|
229
|
+
token[0]
|
230
|
+
end
|
231
|
+
|
232
|
+
def text_of(token)
|
233
|
+
token[1][0]
|
234
|
+
end
|
235
|
+
|
236
|
+
def pos_of(token)
|
237
|
+
token[1][1]
|
238
|
+
end
|
239
|
+
|
240
|
+
def get_next
|
241
|
+
@lexer.advance
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
module Rux
|
2
|
+
class RubyLexer < ::Parser::Lexer
|
3
|
+
# These are populated when ::Parser::Lexer loads and are therefore
|
4
|
+
# not inherited. We have to copy them over manually.
|
5
|
+
::Parser::Lexer.instance_variables.each do |ivar|
|
6
|
+
instance_variable_set(ivar, ::Parser::Lexer.instance_variable_get(ivar))
|
7
|
+
end
|
8
|
+
|
9
|
+
LOOKAHEAD = 3
|
10
|
+
|
11
|
+
def initialize(source_buffer, init_pos)
|
12
|
+
super(ruby_version)
|
13
|
+
|
14
|
+
self.source_buffer = source_buffer
|
15
|
+
@generator = to_enum(:each_token)
|
16
|
+
@rux_token_queue = []
|
17
|
+
@p = init_pos
|
18
|
+
end
|
19
|
+
|
20
|
+
alias_method :advance_orig, :advance
|
21
|
+
|
22
|
+
def advance
|
23
|
+
@generator.next
|
24
|
+
end
|
25
|
+
|
26
|
+
def reset_to(pos)
|
27
|
+
@ts = @te = @p = pos
|
28
|
+
@eof = false
|
29
|
+
@rux_token_queue.clear
|
30
|
+
populate_queue
|
31
|
+
end
|
32
|
+
|
33
|
+
def next_lexer(pos)
|
34
|
+
RuxLexer.new(@source_buffer, pos)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def ruby_version
|
40
|
+
@ruby_version ||= RUBY_VERSION
|
41
|
+
.split('.')[0..-2]
|
42
|
+
.join('')
|
43
|
+
.to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
def each_token(&block)
|
47
|
+
# We detect whether or not we're at the beginning of a rux tag by looking
|
48
|
+
# ahead by 1 token; that's why the first element in @rux_token_queue is
|
49
|
+
# yielded immediately. If the lexer _starts_ at a rux tag however,
|
50
|
+
# lookahead is a lot more difficult. To mitigate, we insert a dummy skip
|
51
|
+
# token here. That way, at_rux? checks the right tokens in the queue and
|
52
|
+
# correctly identifies the start of a rux tag.
|
53
|
+
@rux_token_queue << [:tSKIP, ['$skip', make_range(@p, @p)]]
|
54
|
+
|
55
|
+
@eof = false
|
56
|
+
curlies = 1
|
57
|
+
populate_queue
|
58
|
+
|
59
|
+
until @rux_token_queue.empty?
|
60
|
+
if at_rux?
|
61
|
+
yield @rux_token_queue.shift
|
62
|
+
|
63
|
+
@eof = true
|
64
|
+
_, (_, pos) = @rux_token_queue[0]
|
65
|
+
|
66
|
+
# @eof is set to false by reset_to above, which is called after
|
67
|
+
# popping the previous lexer off the lexer stack (see lexer.rb)
|
68
|
+
while @eof
|
69
|
+
yield [nil, ['$eof', pos]]
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
token = @rux_token_queue.shift
|
74
|
+
type, (_, pos) = token
|
75
|
+
|
76
|
+
case type
|
77
|
+
when :tLCURLY, :tLBRACE
|
78
|
+
curlies += 1
|
79
|
+
when :tRCURLY, :tRBRACE
|
80
|
+
curlies -= 1
|
81
|
+
end
|
82
|
+
|
83
|
+
# if curlies are balanced, we're done lexing ruby code, so yield a
|
84
|
+
# reset token to tell the system where we stopped, then break to stop
|
85
|
+
# our enumerator (will raise a StopIteration)
|
86
|
+
if curlies == 0
|
87
|
+
yield [:tRESET, ['$eof', pos]]
|
88
|
+
break
|
89
|
+
end
|
90
|
+
|
91
|
+
yield token
|
92
|
+
|
93
|
+
populate_queue
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def populate_queue
|
98
|
+
until @rux_token_queue.size >= LOOKAHEAD
|
99
|
+
begin
|
100
|
+
cur_token = advance_orig
|
101
|
+
rescue NoMethodError
|
102
|
+
# Internal lexer errors can happen since we're asking the ruby lexer
|
103
|
+
# to start at an arbitrary position inside the source buffer. It may
|
104
|
+
# encounter foreign rux tokens it's not expecting, etc. Best to stop
|
105
|
+
# trying to look ahead and call it quits.
|
106
|
+
break
|
107
|
+
end
|
108
|
+
|
109
|
+
break unless cur_token[0]
|
110
|
+
@rux_token_queue << cur_token
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def at_rux?
|
115
|
+
at_lt? && !at_inheritance?
|
116
|
+
end
|
117
|
+
|
118
|
+
def at_lt?
|
119
|
+
is?(@rux_token_queue[1], :tLT) && (
|
120
|
+
is?(@rux_token_queue[2], :tCONSTANT) ||
|
121
|
+
is?(@rux_token_queue[2], :tIDENTIFIER)
|
122
|
+
)
|
123
|
+
end
|
124
|
+
|
125
|
+
def at_inheritance?
|
126
|
+
is?(@rux_token_queue[0], :tCONSTANT) &&
|
127
|
+
is?(@rux_token_queue[1], :tLT) &&
|
128
|
+
is?(@rux_token_queue[2], :tCONSTANT)
|
129
|
+
end
|
130
|
+
|
131
|
+
def is?(tok, sym)
|
132
|
+
tok && tok[0] == sym
|
133
|
+
end
|
134
|
+
|
135
|
+
def is_not?(tok, sym)
|
136
|
+
tok && tok[0] != sym
|
137
|
+
end
|
138
|
+
|
139
|
+
def make_range(start, stop)
|
140
|
+
::Parser::Source::Range.new(@source_buffer, start, stop)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|