ruby-ll 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,204 @@
1
+ module LL
2
+ ##
3
+ # Compiles an instance of {LL::CompiledConfiguration} which is used by
4
+ # {LL::CodeGenerator} to actually generate Ruby source code.
5
+ #
6
+ class ConfigurationCompiler
7
+ ##
8
+ # @return [Hash]
9
+ #
10
+ TYPES = {
11
+ :rule => 0,
12
+ :terminal => 1,
13
+ :epsilon => 2,
14
+ :action => 3
15
+ }.freeze
16
+
17
+ ##
18
+ # @return [String]
19
+ #
20
+ DEFAULT_RUBY_CODE = 'val'.freeze
21
+
22
+ ##
23
+ # @param [LL::CompiledGrammar] grammar
24
+ # @return [LL::CompiledConfiguration]
25
+ #
26
+ def generate(grammar)
27
+ return CompiledConfiguration.new(
28
+ :name => generate_name(grammar),
29
+ :namespace => generate_namespace(grammar),
30
+ :inner => grammar.inner,
31
+ :header => grammar.header,
32
+ :terminals => generate_terminals(grammar),
33
+ :actions => generate_actions(grammar),
34
+ :action_bodies => generate_action_bodies(grammar),
35
+ :rules => generate_rules(grammar),
36
+ :table => generate_table(grammar)
37
+ )
38
+ end
39
+
40
+ ##
41
+ # @param [LL::CompiledGrammar] grammar
42
+ # @return [String]
43
+ #
44
+ def generate_name(grammar)
45
+ return grammar.name.split('::').last
46
+ end
47
+
48
+ ##
49
+ # @param [LL::CompiledGrammar] grammar
50
+ # @return [Array]
51
+ #
52
+ def generate_namespace(grammar)
53
+ parts = grammar.name.split('::')
54
+
55
+ return parts.length > 1 ? parts[0..-2] : []
56
+ end
57
+
58
+ ##
59
+ # Returns an Array containing all the terminal names as symbols. The first
60
+ # terminal is always `:$EOF` to ensure the array has the same amount of rows
61
+ # as there are columns in the `table` array.
62
+ #
63
+ # @param [LL::CompiledGrammar] grammar
64
+ # @return [Array]
65
+ #
66
+ def generate_terminals(grammar)
67
+ terminals = [:$EOF]
68
+
69
+ grammar.terminals.each do |term|
70
+ terminals << term.name.to_sym
71
+ end
72
+
73
+ return terminals
74
+ end
75
+
76
+ ##
77
+ # @param [LL::CompiledGrammar] grammar
78
+ # @return [Array]
79
+ #
80
+ def generate_actions(grammar)
81
+ actions = []
82
+ index = 0
83
+
84
+ grammar.rules.each do |rule|
85
+ rule.branches.each do |branch|
86
+ args = branch.steps.reject { |step| step.is_a?(Epsilon) }.length
87
+
88
+ actions << [:"_rule_#{index}", args]
89
+
90
+ index += 1
91
+ end
92
+ end
93
+
94
+ return actions
95
+ end
96
+
97
+ ##
98
+ # @param [LL::CompiledGrammar] grammar
99
+ # @return [Hash]
100
+ #
101
+ def generate_action_bodies(grammar)
102
+ bodies = {}
103
+ index = 0
104
+
105
+ grammar.rules.each do |rule|
106
+ rule.branches.each do |branch|
107
+ bodies[:"_rule_#{index}"] = branch.ruby_code || DEFAULT_RUBY_CODE
108
+
109
+ index += 1
110
+ end
111
+ end
112
+
113
+ return bodies
114
+ end
115
+
116
+ ##
117
+ # Builds the rules table of the parser. Each row is built in reverse order.
118
+ #
119
+ # @param [LL::CompiledGrammar] grammar
120
+ # @return [Array]
121
+ #
122
+ def generate_rules(grammar)
123
+ rules = []
124
+ action_index = 0
125
+ rule_indices = grammar.rule_indices
126
+ term_indices = grammar.terminal_indices
127
+
128
+ grammar.rules.each_with_index do |rule, rule_index|
129
+ rule.branches.each do |branch|
130
+ row = [TYPES[:action], action_index]
131
+
132
+ action_index += 1
133
+
134
+ branch.steps.reverse_each do |step|
135
+ if step.is_a?(LL::Terminal)
136
+ row << TYPES[:terminal]
137
+ row << term_indices[step] + 1
138
+
139
+ elsif step.is_a?(LL::Rule)
140
+ row << TYPES[:rule]
141
+ row << rule_indices[step]
142
+
143
+ elsif step.is_a?(LL::Epsilon)
144
+ row << TYPES[:epsilon]
145
+ row << 0
146
+ end
147
+ end
148
+
149
+ rules << row
150
+ end
151
+ end
152
+
153
+ return rules
154
+ end
155
+
156
+ ##
157
+ # Generates the table array for the parser. This array has the following
158
+ # structure:
159
+ #
160
+ # [
161
+ # [EOF, TERMINAL 1, TERMINAL 2, TERMINAL 3, ...]
162
+ # ]
163
+ #
164
+ # EOF is always the first column and is used when running out of input while
165
+ # processing a rule.
166
+ #
167
+ # @param [LL::CompiledGrammar] grammar
168
+ # @return [Array]
169
+ #
170
+ def generate_table(grammar)
171
+ branch_index = 0
172
+ term_indices = grammar.terminal_indices
173
+ columns = grammar.terminals.length + 1
174
+
175
+ table = Array.new(grammar.rules.length) do
176
+ Array.new(columns, -1)
177
+ end
178
+
179
+ grammar.rules.each_with_index do |rule, rule_index|
180
+ rule.branches.each do |branch|
181
+ branch.first_set.each do |step|
182
+ # For terminals we'll base the column index on the terminal index.
183
+ if step.is_a?(Terminal)
184
+ terminal_index = term_indices[step]
185
+
186
+ table[rule_index][terminal_index + 1] = branch_index
187
+
188
+ # For the rest (= epsilon) we'll update all columns that haven't
189
+ # been updated yet.
190
+ else
191
+ table[rule_index].each_with_index do |col, col_index|
192
+ table[rule_index][col_index] = branch_index if col == -1
193
+ end
194
+ end
195
+ end
196
+
197
+ branch_index += 1
198
+ end
199
+ end
200
+
201
+ return table
202
+ end
203
+ end # ConfigurationCompiler
204
+ end # LL
@@ -0,0 +1,46 @@
1
+ module LL
2
+ ##
3
+ # Parser driver for generated parsers.
4
+ #
5
+ class Driver
6
+ ##
7
+ # Error method that is called when no rule was found for a table index.
8
+ #
9
+ # @param [Fixnum] stack_value
10
+ # @param [Array] token
11
+ #
12
+ def stack_input_error(stack_value, token)
13
+ type = token[0].inspect
14
+
15
+ raise ParserError, "Unexpected rule #{stack_value} for #{type}"
16
+ end
17
+
18
+ ##
19
+ # Error method that is called when the stack has been consumed but there's
20
+ # still input being sent to the parser.
21
+ #
22
+ # @param [Array] token
23
+ #
24
+ def unexpected_input_error(token)
25
+ raise(
26
+ ParserError,
27
+ "Received token #{token[0].inspect} but there's nothing left to parse"
28
+ )
29
+ end
30
+
31
+ ##
32
+ # Error method that is called when an invalid terminal was specified as the
33
+ # input.
34
+ #
35
+ # @param [Fixnum] got_id The ID of the received terminal.
36
+ # @param [Fixnum] expected_id The ID of the expected terminal.
37
+ #
38
+ def invalid_terminal_error(got_id, expected_id)
39
+ terminals = self.class::CONFIG.terminals
40
+ expected = terminals[expected_id].inspect
41
+ got = terminals[got_id].inspect
42
+
43
+ raise ParserError, "Invalid terminal #{got}, expected #{expected}"
44
+ end
45
+ end # Driver
46
+ end # LL
@@ -0,0 +1,36 @@
1
+ module LL
2
+ ##
3
+ # Class containing C/Java data for a Driver class.
4
+ #
5
+ class DriverConfig
6
+ attr_reader :terminals, :rules, :table, :actions
7
+
8
+ ##
9
+ # @param [Array] array
10
+ #
11
+ def terminals=(array)
12
+ self.terminals_native = @terminals = array
13
+ end
14
+
15
+ ##
16
+ # @param [Array] array
17
+ #
18
+ def rules=(array)
19
+ self.rules_native = @rules = array
20
+ end
21
+
22
+ ##
23
+ # @param [Array] array
24
+ #
25
+ def table=(array)
26
+ self.table_native = @table = array
27
+ end
28
+
29
+ ##
30
+ # @param [Array] array
31
+ #
32
+ def actions=(array)
33
+ self.actions_native = @actions = array
34
+ end
35
+ end # DriverConfig
36
+ end # LL
@@ -0,0 +1,51 @@
1
+ # This file is automatically generated by ruby-ll. Manually changing this file
2
+ # is not recommended as any changes will be lost the next time this parser is
3
+ # re-generated.
4
+ <%- if @add_requires -%>
5
+ require 'll/setup'
6
+ <%- end -%>
7
+
8
+ <%- @config.namespace.each do |part| -%>
9
+ module <%= part %>
10
+ <%- end -%>
11
+ <%- if @config.header -%>
12
+ <%= @config.header.strip -%>
13
+
14
+ <%- end -%>
15
+ class <%= @config.name -%> < LL::Driver
16
+ CONFIG = LL::DriverConfig.new
17
+
18
+ CONFIG.terminals = [
19
+ <%- @config.terminals.each_with_index do |terminal, index| -%>
20
+ <%= terminal.inspect -%>, # <%= index %>
21
+ <%- end -%>
22
+ ].freeze
23
+
24
+ CONFIG.rules = [
25
+ <%- @config.rules.each_with_index do |row, index| -%>
26
+ <%= row.inspect -%>, # <%= index %>
27
+ <%- end -%>
28
+ ].freeze
29
+
30
+ CONFIG.table = [
31
+ <%- @config.table.each_with_index do |row, index| -%>
32
+ <%= row.inspect -%>, # <%= index %>
33
+ <%- end -%>
34
+ ].freeze
35
+
36
+ CONFIG.actions = [
37
+ <%- @config.actions.each_with_index do |row, index| -%>
38
+ <%= row.inspect -%>, # <%= index %>
39
+ <%- end -%>
40
+ ].freeze
41
+ <%= @config.inner.rstrip if @config.inner %>
42
+ <%- @config.action_bodies.each do |name, body| -%>
43
+
44
+ def <%= name %>(val)
45
+ <%= body %>
46
+ end
47
+ <%- end -%>
48
+ end
49
+ <%- @config.namespace.each do |part| -%>
50
+ end
51
+ <%- end -%>
@@ -0,0 +1,23 @@
1
+ module LL
2
+ ##
3
+ # Class used for indicating an epsilon in a grammar. Epsilon objects are
4
+ # primarily used to break out of recursion.
5
+ #
6
+ class Epsilon
7
+ attr_reader :source_line
8
+
9
+ ##
10
+ # @param [LL::SourceLine] source_line
11
+ #
12
+ def initialize(source_line)
13
+ @source_line = source_line
14
+ end
15
+
16
+ ##
17
+ # @return [String]
18
+ #
19
+ def inspect
20
+ return 'Epsilon()'
21
+ end
22
+ end # Epsilon
23
+ end # LL
@@ -0,0 +1,23 @@
1
+ module LL
2
+ ##
3
+ # A context for a single ERB template, used for storing variables and
4
+ # retrieving the binding for a template.
5
+ #
6
+ class ERBContext
7
+ ##
8
+ # @param [Hash] variables
9
+ #
10
+ def initialize(variables = {})
11
+ variables.each do |name, value|
12
+ instance_variable_set("@#{name}", value)
13
+ end
14
+ end
15
+
16
+ ##
17
+ # @return [Binding]
18
+ #
19
+ def get_binding
20
+ return binding
21
+ end
22
+ end # ERBContext
23
+ end # LL
@@ -0,0 +1,359 @@
1
+ module LL
2
+ ##
3
+ # The GrammarCompiler class processes an AST (as parsed from an LL(1) grammar)
4
+ # and returns an {LL::CompiledGrammar} instance.
5
+ #
6
+ class GrammarCompiler
7
+ ##
8
+ # @param [LL::AST::Node] ast
9
+ # @return [LL::CompiledGrammar]
10
+ #
11
+ def compile(ast)
12
+ compiled = CompiledGrammar.new
13
+
14
+ process(ast, compiled)
15
+
16
+ warn_for_unused_terminals(compiled)
17
+ warn_for_unused_rules(compiled)
18
+
19
+ verify_first_first(compiled)
20
+ verify_first_follow(compiled)
21
+
22
+ return compiled
23
+ end
24
+
25
+ ##
26
+ # @param [LL::AST::Node] node
27
+ # @param [LL::CompiledGrammar] compiled_grammar
28
+ # @return [LL::CompiledGrammar]
29
+ #
30
+ def process(node, compiled_grammar)
31
+ handler = "on_#{node.type}"
32
+
33
+ return send(handler, node, compiled_grammar)
34
+ end
35
+
36
+ ##
37
+ # Adds warnings for any unused rules. The first defined rule is skipped
38
+ # since it's the root rule.
39
+ #
40
+ # @param [LL::CompiledGrammar] compiled_grammar
41
+ #
42
+ def warn_for_unused_rules(compiled_grammar)
43
+ compiled_grammar.rules.each_with_index do |rule, index|
44
+ next if index == 0 || rule.references > 0
45
+
46
+ compiled_grammar.add_warning(
47
+ "Unused rule #{rule.name.inspect}",
48
+ rule.source_line
49
+ )
50
+ end
51
+ end
52
+
53
+ ##
54
+ # Adds warnings for any unused terminals.
55
+ #
56
+ # @param [LL::CompiledGrammar] compiled_grammar
57
+ #
58
+ def warn_for_unused_terminals(compiled_grammar)
59
+ compiled_grammar.terminals.each do |terminal|
60
+ next if terminal.references > 0
61
+
62
+ compiled_grammar.add_warning(
63
+ "Unused terminal #{terminal.name.inspect}",
64
+ terminal.source_line
65
+ )
66
+ end
67
+ end
68
+
69
+ ##
70
+ # Verifies all rules to see if they don't have any first/first conflicts.
71
+ # Errors are added for every rule where this _is_ the case.
72
+ #
73
+ # @param [LL::CompiledGrammar] compiled_grammar
74
+ #
75
+ def verify_first_first(compiled_grammar)
76
+ compiled_grammar.rules.each do |rule|
77
+ conflicting = Set.new
78
+
79
+ rule.branches.each do |branch|
80
+ next if conflicting.include?(branch)
81
+
82
+ rule.branches.each do |other_branch|
83
+ next if branch == other_branch || conflicting.include?(other_branch)
84
+
85
+ overlapping = branch.first_set & other_branch.first_set
86
+
87
+ unless overlapping.empty?
88
+ conflicting << branch
89
+ conflicting << other_branch
90
+ end
91
+ end
92
+ end
93
+
94
+ unless conflicting.empty?
95
+ compiled_grammar.add_error(
96
+ 'first/first conflict, multiple branches start with the same terminals',
97
+ rule.source_line
98
+ )
99
+
100
+ conflicting.each do |branch|
101
+ labels = branch.first_set.map do |token|
102
+ token.is_a?(Epsilon) ? 'epsilon' : token.name
103
+ end
104
+
105
+ compiled_grammar.add_error(
106
+ "branch starts with: #{labels.join(', ')}",
107
+ branch.source_line
108
+ )
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ ##
115
+ # Adds errors for any rules containing first/follow conflicts.
116
+ #
117
+ # @param [LL::CompiledGrammar] compiled_grammar
118
+ #
119
+ def verify_first_follow(compiled_grammar)
120
+ compiled_grammar.rules.each do |rule|
121
+ rule.branches.each do |branch|
122
+ has_epsilon = branch.first_set.find { |step| step.is_a?(Epsilon) }
123
+
124
+ if has_epsilon and !branch.follow_set.empty?
125
+ compiled_grammar.add_error(
126
+ 'first/follow conflict, branch can start with epsilon and is ' \
127
+ 'followed by (non) terminals',
128
+ branch.source_line
129
+ )
130
+
131
+ compiled_grammar.add_error(
132
+ 'epsilon originates from here',
133
+ has_epsilon.source_line
134
+ )
135
+ end
136
+ end
137
+ end
138
+ end
139
+
140
+ ##
141
+ # Processes the root node of a grammar.
142
+ #
143
+ # @param [LL::AST::Node] node
144
+ # @param [LL::CompiledGrammar] compiled_grammar
145
+ #
146
+ def on_grammar(node, compiled_grammar)
147
+ # Create the prototypes for all rules since rules can be referenced before
148
+ # they are defined.
149
+ node.children.each do |child|
150
+ if child.type == :rule
151
+ on_rule_prototype(child, compiled_grammar)
152
+ end
153
+ end
154
+
155
+ node.children.each do |child|
156
+ process(child, compiled_grammar)
157
+ end
158
+ end
159
+
160
+ ##
161
+ # Sets the name of the parser.
162
+ #
163
+ # @param [LL::AST::Node] node
164
+ # @param [LL::CompiledGrammar] compiled_grammar
165
+ #
166
+ def on_name(node, compiled_grammar)
167
+ if compiled_grammar.name
168
+ compiled_grammar.add_warning(
169
+ "Overwriting existing parser name #{compiled_grammar.name.inspect}",
170
+ node.source_line
171
+ )
172
+ end
173
+
174
+ parts = node.children.map { |child| process(child, compiled_grammar) }
175
+
176
+ compiled_grammar.name = parts.join('::')
177
+ end
178
+
179
+ ##
180
+ # Processes the assignment of terminals.
181
+ #
182
+ # @see #process
183
+ #
184
+ def on_terminals(node, compiled_grammar)
185
+ node.children.each do |child|
186
+ name = process(child, compiled_grammar)
187
+
188
+ if compiled_grammar.has_terminal?(name)
189
+ compiled_grammar.add_error(
190
+ "The terminal #{name.inspect} has already been defined",
191
+ child.source_line
192
+ )
193
+ else
194
+ compiled_grammar.add_terminal(name, child.source_line)
195
+ end
196
+ end
197
+ end
198
+
199
+ ##
200
+ # Processes an %inner directive.
201
+ #
202
+ # @see #process
203
+ #
204
+ def on_inner(node, compiled_grammar)
205
+ compiled_grammar.inner = process(node.children[0], compiled_grammar)
206
+ end
207
+
208
+ ##
209
+ # Processes a %header directive.
210
+ #
211
+ # @see #process
212
+ #
213
+ def on_header(node, compiled_grammar)
214
+ compiled_grammar.header = process(node.children[0], compiled_grammar)
215
+ end
216
+
217
+ ##
218
+ # Processes a node containing Ruby source code.
219
+ #
220
+ # @see #process
221
+ # @return [String]
222
+ #
223
+ def on_ruby(node, compiled_grammar)
224
+ return node.children[0]
225
+ end
226
+
227
+ ##
228
+ # Extracts the name from an identifier.
229
+ #
230
+ # @see #process
231
+ # @return [String]
232
+ #
233
+ def on_ident(node, compiled_grammar)
234
+ return node.children[0]
235
+ end
236
+
237
+ ##
238
+ # Processes an epsilon.
239
+ #
240
+ # @see #process
241
+ # @return [LL::Epsilon]
242
+ #
243
+ def on_epsilon(node, compiled_grammar)
244
+ return Epsilon.new(node.source_line)
245
+ end
246
+
247
+ ##
248
+ # Processes the assignment of a rule.
249
+ #
250
+ # @see #process
251
+ #
252
+ def on_rule(node, compiled_grammar)
253
+ name = process(node.children[0], compiled_grammar)
254
+
255
+ if compiled_grammar.has_terminal?(name)
256
+ compiled_grammar.add_error(
257
+ "the rule name #{name.inspect} is already used as a terminal name",
258
+ node.source_line
259
+ )
260
+ end
261
+
262
+ if compiled_grammar.has_rule_with_branches?(name)
263
+ compiled_grammar.add_error(
264
+ "the rule #{name.inspect} has already been defined",
265
+ node.source_line
266
+ )
267
+
268
+ return
269
+ end
270
+
271
+ branches = node.children[1..-1].map do |child|
272
+ process(child, compiled_grammar)
273
+ end
274
+
275
+ rule = compiled_grammar.lookup_rule(name)
276
+
277
+ rule.branches.concat(branches)
278
+ end
279
+
280
+ ##
281
+ # Creates a basic prototype for a rule.
282
+ #
283
+ # @see #process
284
+ #
285
+ def on_rule_prototype(node, compiled_grammar)
286
+ name = process(node.children[0], compiled_grammar)
287
+
288
+ return if compiled_grammar.has_rule?(name)
289
+
290
+ rule = Rule.new(name, node.source_line)
291
+
292
+ compiled_grammar.add_rule(rule)
293
+ end
294
+
295
+ ##
296
+ # Processes a single rule branch.
297
+ #
298
+ # @see #process
299
+ # @return [LL::Branch]
300
+ #
301
+ def on_branch(node, compiled_grammar)
302
+ steps = process(node.children[0], compiled_grammar)
303
+
304
+ if node.children[1]
305
+ code = process(node.children[1], compiled_grammar)
306
+ else
307
+ code = nil
308
+ end
309
+
310
+ return Branch.new(steps, node.source_line, code)
311
+ end
312
+
313
+ ##
314
+ # Processes the steps of a branch.
315
+ #
316
+ # @see #process
317
+ # @return [Array]
318
+ #
319
+ def on_steps(node, compiled_grammar)
320
+ steps = []
321
+
322
+ node.children.each do |step_node|
323
+ retval = process(step_node, compiled_grammar)
324
+
325
+ # Literal rule/terminal names.
326
+ if retval.is_a?(String)
327
+ step = compiled_grammar.lookup_identifier(retval)
328
+
329
+ undefined_identifier!(retval, step_node, compiled_grammar) unless step
330
+ # Epsilon
331
+ else
332
+ step = retval
333
+ end
334
+
335
+ if step
336
+ step.increment_references if step.respond_to?(:increment_references)
337
+
338
+ steps << step
339
+ end
340
+ end
341
+
342
+ return steps
343
+ end
344
+
345
+ private
346
+
347
+ ##
348
+ # @param [String] name
349
+ # @param [LL::AST::Node] node
350
+ # @param [LL::CompiledGrammar] compiled_grammar
351
+ #
352
+ def undefined_identifier!(name, node, compiled_grammar)
353
+ compiled_grammar.add_error(
354
+ "Undefined terminal or rule #{name.inspect}",
355
+ node.source_line
356
+ )
357
+ end
358
+ end # Compiler
359
+ end # LL