ruby-ll 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ module LL
2
+ ##
3
+ # Compiles an instance of {LL::CompiledConfiguration} which is used by
4
+ # {LL::CodeGenerator} to actually generate Ruby source code.
5
+ #
6
+ class ConfigurationCompiler
7
+ ##
8
+ # @return [Hash]
9
+ #
10
+ TYPES = {
11
+ :rule => 0,
12
+ :terminal => 1,
13
+ :epsilon => 2,
14
+ :action => 3
15
+ }.freeze
16
+
17
+ ##
18
+ # @return [String]
19
+ #
20
+ DEFAULT_RUBY_CODE = 'val'.freeze
21
+
22
+ ##
23
+ # @param [LL::CompiledGrammar] grammar
24
+ # @return [LL::CompiledConfiguration]
25
+ #
26
+ def generate(grammar)
27
+ return CompiledConfiguration.new(
28
+ :name => generate_name(grammar),
29
+ :namespace => generate_namespace(grammar),
30
+ :inner => grammar.inner,
31
+ :header => grammar.header,
32
+ :terminals => generate_terminals(grammar),
33
+ :actions => generate_actions(grammar),
34
+ :action_bodies => generate_action_bodies(grammar),
35
+ :rules => generate_rules(grammar),
36
+ :table => generate_table(grammar)
37
+ )
38
+ end
39
+
40
+ ##
41
+ # @param [LL::CompiledGrammar] grammar
42
+ # @return [String]
43
+ #
44
+ def generate_name(grammar)
45
+ return grammar.name.split('::').last
46
+ end
47
+
48
+ ##
49
+ # @param [LL::CompiledGrammar] grammar
50
+ # @return [Array]
51
+ #
52
+ def generate_namespace(grammar)
53
+ parts = grammar.name.split('::')
54
+
55
+ return parts.length > 1 ? parts[0..-2] : []
56
+ end
57
+
58
+ ##
59
+ # Returns an Array containing all the terminal names as symbols. The first
60
+ # terminal is always `:$EOF` to ensure the array has the same amount of rows
61
+ # as there are columns in the `table` array.
62
+ #
63
+ # @param [LL::CompiledGrammar] grammar
64
+ # @return [Array]
65
+ #
66
+ def generate_terminals(grammar)
67
+ terminals = [:$EOF]
68
+
69
+ grammar.terminals.each do |term|
70
+ terminals << term.name.to_sym
71
+ end
72
+
73
+ return terminals
74
+ end
75
+
76
+ ##
77
+ # @param [LL::CompiledGrammar] grammar
78
+ # @return [Array]
79
+ #
80
+ def generate_actions(grammar)
81
+ actions = []
82
+ index = 0
83
+
84
+ grammar.rules.each do |rule|
85
+ rule.branches.each do |branch|
86
+ args = branch.steps.reject { |step| step.is_a?(Epsilon) }.length
87
+
88
+ actions << [:"_rule_#{index}", args]
89
+
90
+ index += 1
91
+ end
92
+ end
93
+
94
+ return actions
95
+ end
96
+
97
+ ##
98
+ # @param [LL::CompiledGrammar] grammar
99
+ # @return [Hash]
100
+ #
101
+ def generate_action_bodies(grammar)
102
+ bodies = {}
103
+ index = 0
104
+
105
+ grammar.rules.each do |rule|
106
+ rule.branches.each do |branch|
107
+ bodies[:"_rule_#{index}"] = branch.ruby_code || DEFAULT_RUBY_CODE
108
+
109
+ index += 1
110
+ end
111
+ end
112
+
113
+ return bodies
114
+ end
115
+
116
+ ##
117
+ # Builds the rules table of the parser. Each row is built in reverse order.
118
+ #
119
+ # @param [LL::CompiledGrammar] grammar
120
+ # @return [Array]
121
+ #
122
+ def generate_rules(grammar)
123
+ rules = []
124
+ action_index = 0
125
+ rule_indices = grammar.rule_indices
126
+ term_indices = grammar.terminal_indices
127
+
128
+ grammar.rules.each_with_index do |rule, rule_index|
129
+ rule.branches.each do |branch|
130
+ row = [TYPES[:action], action_index]
131
+
132
+ action_index += 1
133
+
134
+ branch.steps.reverse_each do |step|
135
+ if step.is_a?(LL::Terminal)
136
+ row << TYPES[:terminal]
137
+ row << term_indices[step] + 1
138
+
139
+ elsif step.is_a?(LL::Rule)
140
+ row << TYPES[:rule]
141
+ row << rule_indices[step]
142
+
143
+ elsif step.is_a?(LL::Epsilon)
144
+ row << TYPES[:epsilon]
145
+ row << 0
146
+ end
147
+ end
148
+
149
+ rules << row
150
+ end
151
+ end
152
+
153
+ return rules
154
+ end
155
+
156
+ ##
157
+ # Generates the table array for the parser. This array has the following
158
+ # structure:
159
+ #
160
+ # [
161
+ # [EOF, TERMINAL 1, TERMINAL 2, TERMINAL 3, ...]
162
+ # ]
163
+ #
164
+ # EOF is always the first column and is used when running out of input while
165
+ # processing a rule.
166
+ #
167
+ # @param [LL::CompiledGrammar] grammar
168
+ # @return [Array]
169
+ #
170
+ def generate_table(grammar)
171
+ branch_index = 0
172
+ term_indices = grammar.terminal_indices
173
+ columns = grammar.terminals.length + 1
174
+
175
+ table = Array.new(grammar.rules.length) do
176
+ Array.new(columns, -1)
177
+ end
178
+
179
+ grammar.rules.each_with_index do |rule, rule_index|
180
+ rule.branches.each do |branch|
181
+ branch.first_set.each do |step|
182
+ # For terminals we'll base the column index on the terminal index.
183
+ if step.is_a?(Terminal)
184
+ terminal_index = term_indices[step]
185
+
186
+ table[rule_index][terminal_index + 1] = branch_index
187
+
188
+ # For the rest (= epsilon) we'll update all columns that haven't
189
+ # been updated yet.
190
+ else
191
+ table[rule_index].each_with_index do |col, col_index|
192
+ table[rule_index][col_index] = branch_index if col == -1
193
+ end
194
+ end
195
+ end
196
+
197
+ branch_index += 1
198
+ end
199
+ end
200
+
201
+ return table
202
+ end
203
+ end # ConfigurationCompiler
204
+ end # LL
@@ -0,0 +1,46 @@
1
+ module LL
2
+ ##
3
+ # Parser driver for generated parsers.
4
+ #
5
+ class Driver
6
+ ##
7
+ # Error method that is called when no rule was found for a table index.
8
+ #
9
+ # @param [Fixnum] stack_value
10
+ # @param [Array] token
11
+ #
12
+ def stack_input_error(stack_value, token)
13
+ type = token[0].inspect
14
+
15
+ raise ParserError, "Unexpected rule #{stack_value} for #{type}"
16
+ end
17
+
18
+ ##
19
+ # Error method that is called when the stack has been consumed but there's
20
+ # still input being sent to the parser.
21
+ #
22
+ # @param [Array] token
23
+ #
24
+ def unexpected_input_error(token)
25
+ raise(
26
+ ParserError,
27
+ "Received token #{token[0].inspect} but there's nothing left to parse"
28
+ )
29
+ end
30
+
31
+ ##
32
+ # Error method that is called when an invalid terminal was specified as the
33
+ # input.
34
+ #
35
+ # @param [Fixnum] got_id The ID of the received terminal.
36
+ # @param [Fixnum] expected_id The ID of the expected terminal.
37
+ #
38
+ def invalid_terminal_error(got_id, expected_id)
39
+ terminals = self.class::CONFIG.terminals
40
+ expected = terminals[expected_id].inspect
41
+ got = terminals[got_id].inspect
42
+
43
+ raise ParserError, "Invalid terminal #{got}, expected #{expected}"
44
+ end
45
+ end # Driver
46
+ end # LL
@@ -0,0 +1,36 @@
1
+ module LL
2
+ ##
3
+ # Class containing C/Java data for a Driver class.
4
+ #
5
+ class DriverConfig
6
+ attr_reader :terminals, :rules, :table, :actions
7
+
8
+ ##
9
+ # @param [Array] array
10
+ #
11
+ def terminals=(array)
12
+ self.terminals_native = @terminals = array
13
+ end
14
+
15
+ ##
16
+ # @param [Array] array
17
+ #
18
+ def rules=(array)
19
+ self.rules_native = @rules = array
20
+ end
21
+
22
+ ##
23
+ # @param [Array] array
24
+ #
25
+ def table=(array)
26
+ self.table_native = @table = array
27
+ end
28
+
29
+ ##
30
+ # @param [Array] array
31
+ #
32
+ def actions=(array)
33
+ self.actions_native = @actions = array
34
+ end
35
+ end # DriverConfig
36
+ end # LL
@@ -0,0 +1,51 @@
1
+ # This file is automatically generated by ruby-ll. Manually changing this file
2
+ # is not recommended as any changes will be lost the next time this parser is
3
+ # re-generated.
4
+ <%- if @add_requires -%>
5
+ require 'll/setup'
6
+ <%- end -%>
7
+
8
+ <%- @config.namespace.each do |part| -%>
9
+ module <%= part %>
10
+ <%- end -%>
11
+ <%- if @config.header -%>
12
+ <%= @config.header.strip -%>
13
+
14
+ <%- end -%>
15
+ class <%= @config.name -%> < LL::Driver
16
+ CONFIG = LL::DriverConfig.new
17
+
18
+ CONFIG.terminals = [
19
+ <%- @config.terminals.each_with_index do |terminal, index| -%>
20
+ <%= terminal.inspect -%>, # <%= index %>
21
+ <%- end -%>
22
+ ].freeze
23
+
24
+ CONFIG.rules = [
25
+ <%- @config.rules.each_with_index do |row, index| -%>
26
+ <%= row.inspect -%>, # <%= index %>
27
+ <%- end -%>
28
+ ].freeze
29
+
30
+ CONFIG.table = [
31
+ <%- @config.table.each_with_index do |row, index| -%>
32
+ <%= row.inspect -%>, # <%= index %>
33
+ <%- end -%>
34
+ ].freeze
35
+
36
+ CONFIG.actions = [
37
+ <%- @config.actions.each_with_index do |row, index| -%>
38
+ <%= row.inspect -%>, # <%= index %>
39
+ <%- end -%>
40
+ ].freeze
41
+ <%= @config.inner.rstrip if @config.inner %>
42
+ <%- @config.action_bodies.each do |name, body| -%>
43
+
44
+ def <%= name %>(val)
45
+ <%= body %>
46
+ end
47
+ <%- end -%>
48
+ end
49
+ <%- @config.namespace.each do |part| -%>
50
+ end
51
+ <%- end -%>
@@ -0,0 +1,23 @@
1
+ module LL
2
+ ##
3
+ # Class used for indicating an epsilon in a grammar. Epsilon objects are
4
+ # primarily used to break out of recursion.
5
+ #
6
+ class Epsilon
7
+ attr_reader :source_line
8
+
9
+ ##
10
+ # @param [LL::SourceLine] source_line
11
+ #
12
+ def initialize(source_line)
13
+ @source_line = source_line
14
+ end
15
+
16
+ ##
17
+ # @return [String]
18
+ #
19
+ def inspect
20
+ return 'Epsilon()'
21
+ end
22
+ end # Epsilon
23
+ end # LL
@@ -0,0 +1,23 @@
1
+ module LL
2
+ ##
3
+ # A context for a single ERB template, used for storing variables and
4
+ # retrieving the binding for a template.
5
+ #
6
+ class ERBContext
7
+ ##
8
+ # @param [Hash] variables
9
+ #
10
+ def initialize(variables = {})
11
+ variables.each do |name, value|
12
+ instance_variable_set("@#{name}", value)
13
+ end
14
+ end
15
+
16
+ ##
17
+ # @return [Binding]
18
+ #
19
+ def get_binding
20
+ return binding
21
+ end
22
+ end # ERBContext
23
+ end # LL
@@ -0,0 +1,359 @@
1
+ module LL
2
+ ##
3
+ # The GrammarCompiler class processes an AST (as parsed from an LL(1) grammar)
4
+ # and returns an {LL::CompiledGrammar} instance.
5
+ #
6
+ class GrammarCompiler
7
+ ##
8
+ # @param [LL::AST::Node] ast
9
+ # @return [LL::CompiledGrammar]
10
+ #
11
+ def compile(ast)
12
+ compiled = CompiledGrammar.new
13
+
14
+ process(ast, compiled)
15
+
16
+ warn_for_unused_terminals(compiled)
17
+ warn_for_unused_rules(compiled)
18
+
19
+ verify_first_first(compiled)
20
+ verify_first_follow(compiled)
21
+
22
+ return compiled
23
+ end
24
+
25
+ ##
26
+ # @param [LL::AST::Node] node
27
+ # @param [LL::CompiledGrammar] compiled_grammar
28
+ # @return [LL::CompiledGrammar]
29
+ #
30
+ def process(node, compiled_grammar)
31
+ handler = "on_#{node.type}"
32
+
33
+ return send(handler, node, compiled_grammar)
34
+ end
35
+
36
+ ##
37
+ # Adds warnings for any unused rules. The first defined rule is skipped
38
+ # since it's the root rule.
39
+ #
40
+ # @param [LL::CompiledGrammar] compiled_grammar
41
+ #
42
+ def warn_for_unused_rules(compiled_grammar)
43
+ compiled_grammar.rules.each_with_index do |rule, index|
44
+ next if index == 0 || rule.references > 0
45
+
46
+ compiled_grammar.add_warning(
47
+ "Unused rule #{rule.name.inspect}",
48
+ rule.source_line
49
+ )
50
+ end
51
+ end
52
+
53
+ ##
54
+ # Adds warnings for any unused terminals.
55
+ #
56
+ # @param [LL::CompiledGrammar] compiled_grammar
57
+ #
58
+ def warn_for_unused_terminals(compiled_grammar)
59
+ compiled_grammar.terminals.each do |terminal|
60
+ next if terminal.references > 0
61
+
62
+ compiled_grammar.add_warning(
63
+ "Unused terminal #{terminal.name.inspect}",
64
+ terminal.source_line
65
+ )
66
+ end
67
+ end
68
+
69
+ ##
70
+ # Verifies all rules to see if they don't have any first/first conflicts.
71
+ # Errors are added for every rule where this _is_ the case.
72
+ #
73
+ # @param [LL::CompiledGrammar] compiled_grammar
74
+ #
75
+ def verify_first_first(compiled_grammar)
76
+ compiled_grammar.rules.each do |rule|
77
+ conflicting = Set.new
78
+
79
+ rule.branches.each do |branch|
80
+ next if conflicting.include?(branch)
81
+
82
+ rule.branches.each do |other_branch|
83
+ next if branch == other_branch || conflicting.include?(other_branch)
84
+
85
+ overlapping = branch.first_set & other_branch.first_set
86
+
87
+ unless overlapping.empty?
88
+ conflicting << branch
89
+ conflicting << other_branch
90
+ end
91
+ end
92
+ end
93
+
94
+ unless conflicting.empty?
95
+ compiled_grammar.add_error(
96
+ 'first/first conflict, multiple branches start with the same terminals',
97
+ rule.source_line
98
+ )
99
+
100
+ conflicting.each do |branch|
101
+ labels = branch.first_set.map do |token|
102
+ token.is_a?(Epsilon) ? 'epsilon' : token.name
103
+ end
104
+
105
+ compiled_grammar.add_error(
106
+ "branch starts with: #{labels.join(', ')}",
107
+ branch.source_line
108
+ )
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ ##
115
+ # Adds errors for any rules containing first/follow conflicts.
116
+ #
117
+ # @param [LL::CompiledGrammar] compiled_grammar
118
+ #
119
+ def verify_first_follow(compiled_grammar)
120
+ compiled_grammar.rules.each do |rule|
121
+ rule.branches.each do |branch|
122
+ has_epsilon = branch.first_set.find { |step| step.is_a?(Epsilon) }
123
+
124
+ if has_epsilon and !branch.follow_set.empty?
125
+ compiled_grammar.add_error(
126
+ 'first/follow conflict, branch can start with epsilon and is ' \
127
+ 'followed by (non) terminals',
128
+ branch.source_line
129
+ )
130
+
131
+ compiled_grammar.add_error(
132
+ 'epsilon originates from here',
133
+ has_epsilon.source_line
134
+ )
135
+ end
136
+ end
137
+ end
138
+ end
139
+
140
+ ##
141
+ # Processes the root node of a grammar.
142
+ #
143
+ # @param [LL::AST::Node] node
144
+ # @param [LL::CompiledGrammar] compiled_grammar
145
+ #
146
+ def on_grammar(node, compiled_grammar)
147
+ # Create the prototypes for all rules since rules can be referenced before
148
+ # they are defined.
149
+ node.children.each do |child|
150
+ if child.type == :rule
151
+ on_rule_prototype(child, compiled_grammar)
152
+ end
153
+ end
154
+
155
+ node.children.each do |child|
156
+ process(child, compiled_grammar)
157
+ end
158
+ end
159
+
160
+ ##
161
+ # Sets the name of the parser.
162
+ #
163
+ # @param [LL::AST::Node] node
164
+ # @param [LL::CompiledGrammar] compiled_grammar
165
+ #
166
+ def on_name(node, compiled_grammar)
167
+ if compiled_grammar.name
168
+ compiled_grammar.add_warning(
169
+ "Overwriting existing parser name #{compiled_grammar.name.inspect}",
170
+ node.source_line
171
+ )
172
+ end
173
+
174
+ parts = node.children.map { |child| process(child, compiled_grammar) }
175
+
176
+ compiled_grammar.name = parts.join('::')
177
+ end
178
+
179
+ ##
180
+ # Processes the assignment of terminals.
181
+ #
182
+ # @see #process
183
+ #
184
+ def on_terminals(node, compiled_grammar)
185
+ node.children.each do |child|
186
+ name = process(child, compiled_grammar)
187
+
188
+ if compiled_grammar.has_terminal?(name)
189
+ compiled_grammar.add_error(
190
+ "The terminal #{name.inspect} has already been defined",
191
+ child.source_line
192
+ )
193
+ else
194
+ compiled_grammar.add_terminal(name, child.source_line)
195
+ end
196
+ end
197
+ end
198
+
199
+ ##
200
+ # Processes an %inner directive.
201
+ #
202
+ # @see #process
203
+ #
204
+ def on_inner(node, compiled_grammar)
205
+ compiled_grammar.inner = process(node.children[0], compiled_grammar)
206
+ end
207
+
208
+ ##
209
+ # Processes a %header directive.
210
+ #
211
+ # @see #process
212
+ #
213
+ def on_header(node, compiled_grammar)
214
+ compiled_grammar.header = process(node.children[0], compiled_grammar)
215
+ end
216
+
217
+ ##
218
+ # Processes a node containing Ruby source code.
219
+ #
220
+ # @see #process
221
+ # @return [String]
222
+ #
223
+ def on_ruby(node, compiled_grammar)
224
+ return node.children[0]
225
+ end
226
+
227
+ ##
228
+ # Extracts the name from an identifier.
229
+ #
230
+ # @see #process
231
+ # @return [String]
232
+ #
233
+ def on_ident(node, compiled_grammar)
234
+ return node.children[0]
235
+ end
236
+
237
+ ##
238
+ # Processes an epsilon.
239
+ #
240
+ # @see #process
241
+ # @return [LL::Epsilon]
242
+ #
243
+ def on_epsilon(node, compiled_grammar)
244
+ return Epsilon.new(node.source_line)
245
+ end
246
+
247
+ ##
248
+ # Processes the assignment of a rule.
249
+ #
250
+ # @see #process
251
+ #
252
+ def on_rule(node, compiled_grammar)
253
+ name = process(node.children[0], compiled_grammar)
254
+
255
+ if compiled_grammar.has_terminal?(name)
256
+ compiled_grammar.add_error(
257
+ "the rule name #{name.inspect} is already used as a terminal name",
258
+ node.source_line
259
+ )
260
+ end
261
+
262
+ if compiled_grammar.has_rule_with_branches?(name)
263
+ compiled_grammar.add_error(
264
+ "the rule #{name.inspect} has already been defined",
265
+ node.source_line
266
+ )
267
+
268
+ return
269
+ end
270
+
271
+ branches = node.children[1..-1].map do |child|
272
+ process(child, compiled_grammar)
273
+ end
274
+
275
+ rule = compiled_grammar.lookup_rule(name)
276
+
277
+ rule.branches.concat(branches)
278
+ end
279
+
280
+ ##
281
+ # Creates a basic prototype for a rule.
282
+ #
283
+ # @see #process
284
+ #
285
+ def on_rule_prototype(node, compiled_grammar)
286
+ name = process(node.children[0], compiled_grammar)
287
+
288
+ return if compiled_grammar.has_rule?(name)
289
+
290
+ rule = Rule.new(name, node.source_line)
291
+
292
+ compiled_grammar.add_rule(rule)
293
+ end
294
+
295
+ ##
296
+ # Processes a single rule branch.
297
+ #
298
+ # @see #process
299
+ # @return [LL::Branch]
300
+ #
301
+ def on_branch(node, compiled_grammar)
302
+ steps = process(node.children[0], compiled_grammar)
303
+
304
+ if node.children[1]
305
+ code = process(node.children[1], compiled_grammar)
306
+ else
307
+ code = nil
308
+ end
309
+
310
+ return Branch.new(steps, node.source_line, code)
311
+ end
312
+
313
+ ##
314
+ # Processes the steps of a branch.
315
+ #
316
+ # @see #process
317
+ # @return [Array]
318
+ #
319
+ def on_steps(node, compiled_grammar)
320
+ steps = []
321
+
322
+ node.children.each do |step_node|
323
+ retval = process(step_node, compiled_grammar)
324
+
325
+ # Literal rule/terminal names.
326
+ if retval.is_a?(String)
327
+ step = compiled_grammar.lookup_identifier(retval)
328
+
329
+ undefined_identifier!(retval, step_node, compiled_grammar) unless step
330
+ # Epsilon
331
+ else
332
+ step = retval
333
+ end
334
+
335
+ if step
336
+ step.increment_references if step.respond_to?(:increment_references)
337
+
338
+ steps << step
339
+ end
340
+ end
341
+
342
+ return steps
343
+ end
344
+
345
+ private
346
+
347
+ ##
348
+ # @param [String] name
349
+ # @param [LL::AST::Node] node
350
+ # @param [LL::CompiledGrammar] compiled_grammar
351
+ #
352
+ def undefined_identifier!(name, node, compiled_grammar)
353
+ compiled_grammar.add_error(
354
+ "Undefined terminal or rule #{name.inspect}",
355
+ node.source_line
356
+ )
357
+ end
358
+ end # Compiler
359
+ end # LL