ruby-ll 1.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +13 -0
- data/LICENSE +19 -0
- data/README.md +380 -0
- data/bin/ruby-ll +5 -0
- data/doc/DCO.md +25 -0
- data/doc/changelog.md +8 -0
- data/doc/css/common.css +77 -0
- data/ext/c/driver.c +258 -0
- data/ext/c/driver.h +28 -0
- data/ext/c/driver_config.c +209 -0
- data/ext/c/driver_config.h +53 -0
- data/ext/c/extconf.rb +13 -0
- data/ext/c/khash.h +619 -0
- data/ext/c/kvec.h +90 -0
- data/ext/c/libll.c +7 -0
- data/ext/c/libll.h +9 -0
- data/ext/c/macros.h +6 -0
- data/ext/java/Libll.java +12 -0
- data/ext/java/org/libll/Driver.java +247 -0
- data/ext/java/org/libll/DriverConfig.java +193 -0
- data/lib/libll.jar +0 -0
- data/lib/ll/ast/node.rb +13 -0
- data/lib/ll/branch.rb +57 -0
- data/lib/ll/cli.rb +118 -0
- data/lib/ll/code_generator.rb +32 -0
- data/lib/ll/compiled_configuration.rb +35 -0
- data/lib/ll/compiled_grammar.rb +167 -0
- data/lib/ll/configuration_compiler.rb +204 -0
- data/lib/ll/driver.rb +46 -0
- data/lib/ll/driver_config.rb +36 -0
- data/lib/ll/driver_template.erb +51 -0
- data/lib/ll/epsilon.rb +23 -0
- data/lib/ll/erb_context.rb +23 -0
- data/lib/ll/grammar_compiler.rb +359 -0
- data/lib/ll/lexer.rb +582 -0
- data/lib/ll/message.rb +102 -0
- data/lib/ll/parser.rb +280 -0
- data/lib/ll/parser_error.rb +8 -0
- data/lib/ll/rule.rb +53 -0
- data/lib/ll/setup.rb +11 -0
- data/lib/ll/source_line.rb +46 -0
- data/lib/ll/terminal.rb +29 -0
- data/lib/ll/token.rb +30 -0
- data/lib/ll/version.rb +3 -0
- data/lib/ll.rb +26 -0
- data/ruby-ll.gemspec +47 -0
- metadata +217 -0
data/lib/ll/driver.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
module LL
|
2
|
+
##
|
3
|
+
# Parser driver for generated parsers.
|
4
|
+
#
|
5
|
+
class Driver
|
6
|
+
##
|
7
|
+
# Error method that is called when no rule was found for a table index.
|
8
|
+
#
|
9
|
+
# @param [Fixnum] stack_value
|
10
|
+
# @param [Array] token
|
11
|
+
#
|
12
|
+
def stack_input_error(stack_value, token)
|
13
|
+
type = token[0].inspect
|
14
|
+
|
15
|
+
raise ParserError, "Unexpected rule #{stack_value} for #{type}"
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Error method that is called when the stack has been consumed but there's
|
20
|
+
# still input being sent to the parser.
|
21
|
+
#
|
22
|
+
# @param [Array] token
|
23
|
+
#
|
24
|
+
def unexpected_input_error(token)
|
25
|
+
raise(
|
26
|
+
ParserError,
|
27
|
+
"Received token #{token[0].inspect} but there's nothing left to parse"
|
28
|
+
)
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# Error method that is called when an invalid terminal was specified as the
|
33
|
+
# input.
|
34
|
+
#
|
35
|
+
# @param [Fixnum] got_id The ID of the received terminal.
|
36
|
+
# @param [Fixnum] expected_id The ID of the expected terminal.
|
37
|
+
#
|
38
|
+
def invalid_terminal_error(got_id, expected_id)
|
39
|
+
terminals = self.class::CONFIG.terminals
|
40
|
+
expected = terminals[expected_id].inspect
|
41
|
+
got = terminals[got_id].inspect
|
42
|
+
|
43
|
+
raise ParserError, "Invalid terminal #{got}, expected #{expected}"
|
44
|
+
end
|
45
|
+
end # Driver
|
46
|
+
end # LL
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module LL
|
2
|
+
##
|
3
|
+
# Class containing C/Java data for a Driver class.
|
4
|
+
#
|
5
|
+
class DriverConfig
|
6
|
+
attr_reader :terminals, :rules, :table, :actions
|
7
|
+
|
8
|
+
##
|
9
|
+
# @param [Array] array
|
10
|
+
#
|
11
|
+
def terminals=(array)
|
12
|
+
self.terminals_native = @terminals = array
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
# @param [Array] array
|
17
|
+
#
|
18
|
+
def rules=(array)
|
19
|
+
self.rules_native = @rules = array
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
# @param [Array] array
|
24
|
+
#
|
25
|
+
def table=(array)
|
26
|
+
self.table_native = @table = array
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# @param [Array] array
|
31
|
+
#
|
32
|
+
def actions=(array)
|
33
|
+
self.actions_native = @actions = array
|
34
|
+
end
|
35
|
+
end # DriverConfig
|
36
|
+
end # LL
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# This file is automatically generated by ruby-ll. Manually changing this file
|
2
|
+
# is not recommended as any changes will be lost the next time this parser is
|
3
|
+
# re-generated.
|
4
|
+
<%- if @add_requires -%>
|
5
|
+
require 'll/setup'
|
6
|
+
<%- end -%>
|
7
|
+
|
8
|
+
<%- @config.namespace.each do |part| -%>
|
9
|
+
module <%= part %>
|
10
|
+
<%- end -%>
|
11
|
+
<%- if @config.header -%>
|
12
|
+
<%= @config.header.strip -%>
|
13
|
+
|
14
|
+
<%- end -%>
|
15
|
+
class <%= @config.name -%> < LL::Driver
|
16
|
+
CONFIG = LL::DriverConfig.new
|
17
|
+
|
18
|
+
CONFIG.terminals = [
|
19
|
+
<%- @config.terminals.each_with_index do |terminal, index| -%>
|
20
|
+
<%= terminal.inspect -%>, # <%= index %>
|
21
|
+
<%- end -%>
|
22
|
+
].freeze
|
23
|
+
|
24
|
+
CONFIG.rules = [
|
25
|
+
<%- @config.rules.each_with_index do |row, index| -%>
|
26
|
+
<%= row.inspect -%>, # <%= index %>
|
27
|
+
<%- end -%>
|
28
|
+
].freeze
|
29
|
+
|
30
|
+
CONFIG.table = [
|
31
|
+
<%- @config.table.each_with_index do |row, index| -%>
|
32
|
+
<%= row.inspect -%>, # <%= index %>
|
33
|
+
<%- end -%>
|
34
|
+
].freeze
|
35
|
+
|
36
|
+
CONFIG.actions = [
|
37
|
+
<%- @config.actions.each_with_index do |row, index| -%>
|
38
|
+
<%= row.inspect -%>, # <%= index %>
|
39
|
+
<%- end -%>
|
40
|
+
].freeze
|
41
|
+
<%= @config.inner.rstrip if @config.inner %>
|
42
|
+
<%- @config.action_bodies.each do |name, body| -%>
|
43
|
+
|
44
|
+
def <%= name %>(val)
|
45
|
+
<%= body %>
|
46
|
+
end
|
47
|
+
<%- end -%>
|
48
|
+
end
|
49
|
+
<%- @config.namespace.each do |part| -%>
|
50
|
+
end
|
51
|
+
<%- end -%>
|
data/lib/ll/epsilon.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module LL
|
2
|
+
##
|
3
|
+
# Class used for indicating an epsilon in a grammar. Epsilon objects are
|
4
|
+
# primarily used to break out of recursion.
|
5
|
+
#
|
6
|
+
class Epsilon
|
7
|
+
attr_reader :source_line
|
8
|
+
|
9
|
+
##
|
10
|
+
# @param [LL::SourceLine] source_line
|
11
|
+
#
|
12
|
+
def initialize(source_line)
|
13
|
+
@source_line = source_line
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# @return [String]
|
18
|
+
#
|
19
|
+
def inspect
|
20
|
+
return 'Epsilon()'
|
21
|
+
end
|
22
|
+
end # Epsilon
|
23
|
+
end # LL
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module LL
|
2
|
+
##
|
3
|
+
# A context for a single ERB template, used for storing variables and
|
4
|
+
# retrieving the binding for a template.
|
5
|
+
#
|
6
|
+
class ERBContext
|
7
|
+
##
|
8
|
+
# @param [Hash] variables
|
9
|
+
#
|
10
|
+
def initialize(variables = {})
|
11
|
+
variables.each do |name, value|
|
12
|
+
instance_variable_set("@#{name}", value)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# @return [Binding]
|
18
|
+
#
|
19
|
+
def get_binding
|
20
|
+
return binding
|
21
|
+
end
|
22
|
+
end # ERBContext
|
23
|
+
end # LL
|
@@ -0,0 +1,359 @@
|
|
1
|
+
module LL
|
2
|
+
##
|
3
|
+
# The GrammarCompiler class processes an AST (as parsed from an LL(1) grammar)
|
4
|
+
# and returns an {LL::CompiledGrammar} instance.
|
5
|
+
#
|
6
|
+
class GrammarCompiler
|
7
|
+
##
|
8
|
+
# @param [LL::AST::Node] ast
|
9
|
+
# @return [LL::CompiledGrammar]
|
10
|
+
#
|
11
|
+
def compile(ast)
|
12
|
+
compiled = CompiledGrammar.new
|
13
|
+
|
14
|
+
process(ast, compiled)
|
15
|
+
|
16
|
+
warn_for_unused_terminals(compiled)
|
17
|
+
warn_for_unused_rules(compiled)
|
18
|
+
|
19
|
+
verify_first_first(compiled)
|
20
|
+
verify_first_follow(compiled)
|
21
|
+
|
22
|
+
return compiled
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# @param [LL::AST::Node] node
|
27
|
+
# @param [LL::CompiledGrammar] compiled_grammar
|
28
|
+
# @return [LL::CompiledGrammar]
|
29
|
+
#
|
30
|
+
def process(node, compiled_grammar)
|
31
|
+
handler = "on_#{node.type}"
|
32
|
+
|
33
|
+
return send(handler, node, compiled_grammar)
|
34
|
+
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# Adds warnings for any unused rules. The first defined rule is skipped
|
38
|
+
# since it's the root rule.
|
39
|
+
#
|
40
|
+
# @param [LL::CompiledGrammar] compiled_grammar
|
41
|
+
#
|
42
|
+
def warn_for_unused_rules(compiled_grammar)
|
43
|
+
compiled_grammar.rules.each_with_index do |rule, index|
|
44
|
+
next if index == 0 || rule.references > 0
|
45
|
+
|
46
|
+
compiled_grammar.add_warning(
|
47
|
+
"Unused rule #{rule.name.inspect}",
|
48
|
+
rule.source_line
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Adds warnings for any unused terminals.
|
55
|
+
#
|
56
|
+
# @param [LL::CompiledGrammar] compiled_grammar
|
57
|
+
#
|
58
|
+
def warn_for_unused_terminals(compiled_grammar)
|
59
|
+
compiled_grammar.terminals.each do |terminal|
|
60
|
+
next if terminal.references > 0
|
61
|
+
|
62
|
+
compiled_grammar.add_warning(
|
63
|
+
"Unused terminal #{terminal.name.inspect}",
|
64
|
+
terminal.source_line
|
65
|
+
)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# Verifies all rules to see if they don't have any first/first conflicts.
|
71
|
+
# Errors are added for every rule where this _is_ the case.
|
72
|
+
#
|
73
|
+
# @param [LL::CompiledGrammar] compiled_grammar
|
74
|
+
#
|
75
|
+
def verify_first_first(compiled_grammar)
|
76
|
+
compiled_grammar.rules.each do |rule|
|
77
|
+
conflicting = Set.new
|
78
|
+
|
79
|
+
rule.branches.each do |branch|
|
80
|
+
next if conflicting.include?(branch)
|
81
|
+
|
82
|
+
rule.branches.each do |other_branch|
|
83
|
+
next if branch == other_branch || conflicting.include?(other_branch)
|
84
|
+
|
85
|
+
overlapping = branch.first_set & other_branch.first_set
|
86
|
+
|
87
|
+
unless overlapping.empty?
|
88
|
+
conflicting << branch
|
89
|
+
conflicting << other_branch
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
unless conflicting.empty?
|
95
|
+
compiled_grammar.add_error(
|
96
|
+
'first/first conflict, multiple branches start with the same terminals',
|
97
|
+
rule.source_line
|
98
|
+
)
|
99
|
+
|
100
|
+
conflicting.each do |branch|
|
101
|
+
labels = branch.first_set.map do |token|
|
102
|
+
token.is_a?(Epsilon) ? 'epsilon' : token.name
|
103
|
+
end
|
104
|
+
|
105
|
+
compiled_grammar.add_error(
|
106
|
+
"branch starts with: #{labels.join(', ')}",
|
107
|
+
branch.source_line
|
108
|
+
)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
##
|
115
|
+
# Adds errors for any rules containing first/follow conflicts.
|
116
|
+
#
|
117
|
+
# @param [LL::CompiledGrammar] compiled_grammar
|
118
|
+
#
|
119
|
+
def verify_first_follow(compiled_grammar)
|
120
|
+
compiled_grammar.rules.each do |rule|
|
121
|
+
rule.branches.each do |branch|
|
122
|
+
has_epsilon = branch.first_set.find { |step| step.is_a?(Epsilon) }
|
123
|
+
|
124
|
+
if has_epsilon and !branch.follow_set.empty?
|
125
|
+
compiled_grammar.add_error(
|
126
|
+
'first/follow conflict, branch can start with epsilon and is ' \
|
127
|
+
'followed by (non) terminals',
|
128
|
+
branch.source_line
|
129
|
+
)
|
130
|
+
|
131
|
+
compiled_grammar.add_error(
|
132
|
+
'epsilon originates from here',
|
133
|
+
has_epsilon.source_line
|
134
|
+
)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
##
|
141
|
+
# Processes the root node of a grammar.
|
142
|
+
#
|
143
|
+
# @param [LL::AST::Node] node
|
144
|
+
# @param [LL::CompiledGrammar] compiled_grammar
|
145
|
+
#
|
146
|
+
def on_grammar(node, compiled_grammar)
|
147
|
+
# Create the prototypes for all rules since rules can be referenced before
|
148
|
+
# they are defined.
|
149
|
+
node.children.each do |child|
|
150
|
+
if child.type == :rule
|
151
|
+
on_rule_prototype(child, compiled_grammar)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
node.children.each do |child|
|
156
|
+
process(child, compiled_grammar)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
##
|
161
|
+
# Sets the name of the parser.
|
162
|
+
#
|
163
|
+
# @param [LL::AST::Node] node
|
164
|
+
# @param [LL::CompiledGrammar] compiled_grammar
|
165
|
+
#
|
166
|
+
def on_name(node, compiled_grammar)
|
167
|
+
if compiled_grammar.name
|
168
|
+
compiled_grammar.add_warning(
|
169
|
+
"Overwriting existing parser name #{compiled_grammar.name.inspect}",
|
170
|
+
node.source_line
|
171
|
+
)
|
172
|
+
end
|
173
|
+
|
174
|
+
parts = node.children.map { |child| process(child, compiled_grammar) }
|
175
|
+
|
176
|
+
compiled_grammar.name = parts.join('::')
|
177
|
+
end
|
178
|
+
|
179
|
+
##
|
180
|
+
# Processes the assignment of terminals.
|
181
|
+
#
|
182
|
+
# @see #process
|
183
|
+
#
|
184
|
+
def on_terminals(node, compiled_grammar)
|
185
|
+
node.children.each do |child|
|
186
|
+
name = process(child, compiled_grammar)
|
187
|
+
|
188
|
+
if compiled_grammar.has_terminal?(name)
|
189
|
+
compiled_grammar.add_error(
|
190
|
+
"The terminal #{name.inspect} has already been defined",
|
191
|
+
child.source_line
|
192
|
+
)
|
193
|
+
else
|
194
|
+
compiled_grammar.add_terminal(name, child.source_line)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
##
|
200
|
+
# Processes an %inner directive.
|
201
|
+
#
|
202
|
+
# @see #process
|
203
|
+
#
|
204
|
+
def on_inner(node, compiled_grammar)
|
205
|
+
compiled_grammar.inner = process(node.children[0], compiled_grammar)
|
206
|
+
end
|
207
|
+
|
208
|
+
##
|
209
|
+
# Processes a %header directive.
|
210
|
+
#
|
211
|
+
# @see #process
|
212
|
+
#
|
213
|
+
def on_header(node, compiled_grammar)
|
214
|
+
compiled_grammar.header = process(node.children[0], compiled_grammar)
|
215
|
+
end
|
216
|
+
|
217
|
+
##
|
218
|
+
# Processes a node containing Ruby source code.
|
219
|
+
#
|
220
|
+
# @see #process
|
221
|
+
# @return [String]
|
222
|
+
#
|
223
|
+
def on_ruby(node, compiled_grammar)
|
224
|
+
return node.children[0]
|
225
|
+
end
|
226
|
+
|
227
|
+
##
|
228
|
+
# Extracts the name from an identifier.
|
229
|
+
#
|
230
|
+
# @see #process
|
231
|
+
# @return [String]
|
232
|
+
#
|
233
|
+
def on_ident(node, compiled_grammar)
|
234
|
+
return node.children[0]
|
235
|
+
end
|
236
|
+
|
237
|
+
##
|
238
|
+
# Processes an epsilon.
|
239
|
+
#
|
240
|
+
# @see #process
|
241
|
+
# @return [LL::Epsilon]
|
242
|
+
#
|
243
|
+
def on_epsilon(node, compiled_grammar)
|
244
|
+
return Epsilon.new(node.source_line)
|
245
|
+
end
|
246
|
+
|
247
|
+
##
|
248
|
+
# Processes the assignment of a rule.
|
249
|
+
#
|
250
|
+
# @see #process
|
251
|
+
#
|
252
|
+
def on_rule(node, compiled_grammar)
|
253
|
+
name = process(node.children[0], compiled_grammar)
|
254
|
+
|
255
|
+
if compiled_grammar.has_terminal?(name)
|
256
|
+
compiled_grammar.add_error(
|
257
|
+
"the rule name #{name.inspect} is already used as a terminal name",
|
258
|
+
node.source_line
|
259
|
+
)
|
260
|
+
end
|
261
|
+
|
262
|
+
if compiled_grammar.has_rule_with_branches?(name)
|
263
|
+
compiled_grammar.add_error(
|
264
|
+
"the rule #{name.inspect} has already been defined",
|
265
|
+
node.source_line
|
266
|
+
)
|
267
|
+
|
268
|
+
return
|
269
|
+
end
|
270
|
+
|
271
|
+
branches = node.children[1..-1].map do |child|
|
272
|
+
process(child, compiled_grammar)
|
273
|
+
end
|
274
|
+
|
275
|
+
rule = compiled_grammar.lookup_rule(name)
|
276
|
+
|
277
|
+
rule.branches.concat(branches)
|
278
|
+
end
|
279
|
+
|
280
|
+
##
|
281
|
+
# Creates a basic prototype for a rule.
|
282
|
+
#
|
283
|
+
# @see #process
|
284
|
+
#
|
285
|
+
def on_rule_prototype(node, compiled_grammar)
|
286
|
+
name = process(node.children[0], compiled_grammar)
|
287
|
+
|
288
|
+
return if compiled_grammar.has_rule?(name)
|
289
|
+
|
290
|
+
rule = Rule.new(name, node.source_line)
|
291
|
+
|
292
|
+
compiled_grammar.add_rule(rule)
|
293
|
+
end
|
294
|
+
|
295
|
+
##
|
296
|
+
# Processes a single rule branch.
|
297
|
+
#
|
298
|
+
# @see #process
|
299
|
+
# @return [LL::Branch]
|
300
|
+
#
|
301
|
+
def on_branch(node, compiled_grammar)
|
302
|
+
steps = process(node.children[0], compiled_grammar)
|
303
|
+
|
304
|
+
if node.children[1]
|
305
|
+
code = process(node.children[1], compiled_grammar)
|
306
|
+
else
|
307
|
+
code = nil
|
308
|
+
end
|
309
|
+
|
310
|
+
return Branch.new(steps, node.source_line, code)
|
311
|
+
end
|
312
|
+
|
313
|
+
##
|
314
|
+
# Processes the steps of a branch.
|
315
|
+
#
|
316
|
+
# @see #process
|
317
|
+
# @return [Array]
|
318
|
+
#
|
319
|
+
def on_steps(node, compiled_grammar)
|
320
|
+
steps = []
|
321
|
+
|
322
|
+
node.children.each do |step_node|
|
323
|
+
retval = process(step_node, compiled_grammar)
|
324
|
+
|
325
|
+
# Literal rule/terminal names.
|
326
|
+
if retval.is_a?(String)
|
327
|
+
step = compiled_grammar.lookup_identifier(retval)
|
328
|
+
|
329
|
+
undefined_identifier!(retval, step_node, compiled_grammar) unless step
|
330
|
+
# Epsilon
|
331
|
+
else
|
332
|
+
step = retval
|
333
|
+
end
|
334
|
+
|
335
|
+
if step
|
336
|
+
step.increment_references if step.respond_to?(:increment_references)
|
337
|
+
|
338
|
+
steps << step
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
return steps
|
343
|
+
end
|
344
|
+
|
345
|
+
private
|
346
|
+
|
347
|
+
##
|
348
|
+
# @param [String] name
|
349
|
+
# @param [LL::AST::Node] node
|
350
|
+
# @param [LL::CompiledGrammar] compiled_grammar
|
351
|
+
#
|
352
|
+
def undefined_identifier!(name, node, compiled_grammar)
|
353
|
+
compiled_grammar.add_error(
|
354
|
+
"Undefined terminal or rule #{name.inspect}",
|
355
|
+
node.source_line
|
356
|
+
)
|
357
|
+
end
|
358
|
+
end # Compiler
|
359
|
+
end # LL
|