ruby-ll 1.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
data/lib/libll.jar ADDED
Binary file
@@ -0,0 +1,13 @@
1
+ module LL
2
+ module AST
3
+ ##
4
+ # Class containing details of a single node in an LL grammar AST.
5
+ #
6
+ class Node < ::AST::Node
7
+ ##
8
+ # @return [LL::SourceLine]
9
+ #
10
+ attr_reader :source_line
11
+ end # Node
12
+ end # AST
13
+ end # LL
data/lib/ll/branch.rb ADDED
@@ -0,0 +1,57 @@
1
+ module LL
2
+ ##
3
+ # The Branch class contains information of a single rule branch such as the
4
+ # steps and the associated callback code.
5
+ #
6
+ class Branch
7
+ attr_reader :steps, :source_line, :ruby_code
8
+
9
+ ##
10
+ # @param [Array] steps
11
+ # @param [LL::SourceLine] source_line
12
+ # @param [String] ruby_code
13
+ #
14
+ def initialize(steps, source_line, ruby_code = nil)
15
+ @steps = steps
16
+ @source_line = source_line
17
+ @ruby_code = ruby_code
18
+ end
19
+
20
+ ##
21
+ # Returns the FIRST() set of this branch.
22
+ #
23
+ # @return [Array<LL::Terminal>]
24
+ #
25
+ def first_set
26
+ first = steps[0]
27
+
28
+ return first.is_a?(Rule) ? first.first_set : [first]
29
+ end
30
+
31
+ ##
32
+ # Returns the FOLLOW() set of this branch.
33
+ #
34
+ # @return [Array<LL::Terminal>]
35
+ #
36
+ def follow_set
37
+ follow = steps[1]
38
+
39
+ if follow.is_a?(Rule)
40
+ set = follow.first_set
41
+ elsif follow
42
+ set = [follow]
43
+ else
44
+ set = []
45
+ end
46
+
47
+ return set
48
+ end
49
+
50
+ ##
51
+ # @return [String]
52
+ #
53
+ def inspect
54
+ return "Branch(steps: #{steps.inspect}, ruby_code: #{ruby_code.inspect})"
55
+ end
56
+ end # Branch
57
+ end # LL
data/lib/ll/cli.rb ADDED
@@ -0,0 +1,118 @@
1
+ module LL
2
+ ##
3
+ # CLI that can be used to generate ruby-ll parsers from a grammar file.
4
+ #
5
+ class CLI
6
+ ##
7
+ # @param [Array] argv
8
+ #
9
+ def run(argv = ARGV)
10
+ options, leftovers = parse(argv)
11
+
12
+ if leftovers.empty?
13
+ abort <<-EOF.strip
14
+ Error: you must specify a grammar input file'
15
+
16
+ #{parser}
17
+ EOF
18
+ end
19
+
20
+ input = File.expand_path(leftovers[0])
21
+
22
+ unless options[:output]
23
+ options[:output] = output_from_input(input)
24
+ end
25
+
26
+ generate(input, options)
27
+ end
28
+
29
+ ##
30
+ # @param [String] input
31
+ # @return [String]
32
+ #
33
+ def output_from_input(input)
34
+ input_ext = File.extname(input)
35
+
36
+ return input.gsub(/#{Regexp.compile(input_ext)}$/, '.rb')
37
+ end
38
+
39
+ ##
40
+ # @param [String] input
41
+ # @param [Hash] options
42
+ #
43
+ def generate(input, options)
44
+ raw_grammar = File.read(input)
45
+ parser = Parser.new(raw_grammar, input)
46
+ gcompiler = GrammarCompiler.new
47
+ codegen = CodeGenerator.new
48
+ configcompiler = ConfigurationCompiler.new
49
+
50
+ ast = parser.parse
51
+ cgrammar = gcompiler.compile(ast)
52
+
53
+ cgrammar.display_messages
54
+
55
+ if cgrammar.valid?
56
+ config = configcompiler.generate(cgrammar)
57
+ output = codegen.generate(config, options[:requires])
58
+
59
+ File.open(options[:output], 'w') do |file|
60
+ file.write(output)
61
+ end
62
+ else
63
+ exit 1
64
+ end
65
+ end
66
+
67
+ ##
68
+ # @param [Array] argv
69
+ # @return [Array]
70
+ #
71
+ def parse(argv)
72
+ options = {
73
+ :requires => true,
74
+ :output => nil
75
+ }
76
+
77
+ parser = OptionParser.new do |opt|
78
+ opt.summary_indent = ' '
79
+
80
+ opt.banner = <<-EOF.strip
81
+ Usage: ruby-ll [INPUT-GRAMMAR] [OPTIONS]
82
+
83
+ About:
84
+
85
+ Generates a Ruby LL(1) parser from a ruby-ll compatible grammar file.
86
+
87
+ Examples:
88
+
89
+ ruby-ll lib/ll/parser.rll # output goes to lib/ll/parser.rl
90
+ ruby-ll lib/ll/parser.rll -o /tmp/parser.rb # output goes to /tmp/parser.rb
91
+ EOF
92
+
93
+ opt.separator "\nOptions:\n\n"
94
+
95
+ opt.on '-h', '--help', 'Shows this help message' do
96
+ abort parser.to_s
97
+ end
98
+
99
+ opt.on '--no-requires', 'Disables adding of require calls' do
100
+ options[:requires] = false
101
+ end
102
+
103
+ opt.on '-o [PATH]', '--output [PATH]', 'Writes output to PATH' do |val|
104
+ options[:output] = val
105
+ end
106
+
107
+ opt.on '-v', '--version', 'Shows the current version' do
108
+ puts "ruby-ll #{VERSION} on #{RUBY_DESCRIPTION}"
109
+ exit
110
+ end
111
+ end
112
+
113
+ leftovers = parser.parse(argv)
114
+
115
+ return options, leftovers
116
+ end
117
+ end # CLI
118
+ end # LL
@@ -0,0 +1,32 @@
1
+ module LL
2
+ ##
3
+ # The CodeGenerator class takes a {LL::CompiledConfiguration} instance and
4
+ # turns it into a block of Ruby source code that can be used as an actual
5
+ # LL(1) parser.
6
+ #
7
+ class CodeGenerator
8
+ ##
9
+ # The ERB template to use for code generation.
10
+ #
11
+ # @return [String]
12
+ #
13
+ TEMPLATE = File.expand_path('../driver_template.erb', __FILE__)
14
+
15
+ ##
16
+ # @param [LL::CompiledConfiguration] config
17
+ # @param [TrueClass|FalseClass] add_requires
18
+ # @return [String]
19
+ #
20
+ def generate(config, add_requires = true)
21
+ context = ERBContext.new(
22
+ :config => config,
23
+ :add_requires => add_requires
24
+ )
25
+
26
+ template = File.read(TEMPLATE)
27
+ erb = ERB.new(template, nil, '-').result(context.get_binding)
28
+
29
+ return erb
30
+ end
31
+ end # CodeGenerator
32
+ end # LL
@@ -0,0 +1,35 @@
1
+ module LL
2
+ ##
3
+ # Class for storing the compiled state/lookup/action tables and the likes.
4
+ #
5
+ class CompiledConfiguration
6
+ attr_reader :name, :namespace, :inner, :header, :terminals, :rules, :table,
7
+ :actions, :action_bodies
8
+
9
+ ##
10
+ # @param [Hash] options
11
+ #
12
+ # @option options [String] :name
13
+ # @option options [Array] :namespace
14
+ # @option options [String] :inner
15
+ # @option options [String] :header
16
+ # @option options [Array] :terminals
17
+ # @option options [Array] :rules
18
+ # @option options [Array] :table
19
+ # @option options [Array] :actions
20
+ # @option options [Hash] :action_bodies
21
+ #
22
+ def initialize(options = {})
23
+ options.each do |key, value|
24
+ instance_variable_set("@#{key}", value) if respond_to?(key)
25
+ end
26
+
27
+ @namespace ||= []
28
+ @terminals ||= []
29
+ @rules ||= []
30
+ @table ||= []
31
+ @actions ||= []
32
+ @action_bodies ||= {}
33
+ end
34
+ end # CompiledConfiguration
35
+ end # LL
@@ -0,0 +1,167 @@
1
+ module LL
2
+ ##
3
+ # The CompiledGrammar class contains compilation results such as the parser
4
+ # name, the rules of the grammar, the terminals, etc.
5
+ #
6
+ class CompiledGrammar
7
+ attr_accessor :name, :inner, :header
8
+
9
+ attr_reader :warnings, :errors
10
+
11
+ def initialize
12
+ @warnings = []
13
+ @errors = []
14
+ @terminals = {}
15
+ @rules = {}
16
+ @inner = nil
17
+ @header = nil
18
+ end
19
+
20
+ ##
21
+ # @param [String] message
22
+ # @param [LL::SourceLine] source_line
23
+ #
24
+ def add_error(message, source_line)
25
+ @errors << Message.new(:error, message, source_line)
26
+ end
27
+
28
+ ##
29
+ # @param [String] message
30
+ # @param [LL::SourceLine] source_line
31
+ #
32
+ def add_warning(message, source_line)
33
+ @warnings << Message.new(:warning, message, source_line)
34
+ end
35
+
36
+ ##
37
+ # @param [String] name
38
+ # @return [TrueClass|FalseClass]
39
+ #
40
+ def has_terminal?(name)
41
+ return @terminals.key?(name)
42
+ end
43
+
44
+ ##
45
+ # @param [String] name
46
+ # @param [LL::SourceLine] source_line
47
+ # @return [LL::Terminal]
48
+ #
49
+ def add_terminal(name, source_line)
50
+ return @terminals[name] = Terminal.new(name, source_line)
51
+ end
52
+
53
+ ##
54
+ # Returns true if a rule for the given name has already been assigned.
55
+ #
56
+ # @param [String] name
57
+ # @return [TrueClass|FalseClass]
58
+ #
59
+ def has_rule?(name)
60
+ return @rules.key?(name)
61
+ end
62
+
63
+ ##
64
+ # Returns true if a rule already exists for a given name _and_ has at least
65
+ # 1 branch.
66
+ #
67
+ # @see [#has_rule?]
68
+ #
69
+ def has_rule_with_branches?(name)
70
+ return has_rule?(name) && !@rules[name].branches.empty?
71
+ end
72
+
73
+ ##
74
+ # @param [LL::Rule] rule
75
+ # @return [LL::Rule]
76
+ #
77
+ def add_rule(rule)
78
+ return @rules[rule.name] = rule
79
+ end
80
+
81
+ ##
82
+ # @param [String] name
83
+ # @return [LL::Rule]
84
+ #
85
+ def lookup_rule(name)
86
+ return @rules[name]
87
+ end
88
+
89
+ ##
90
+ # Looks up an identifier from the list of terminals and/or rules. Rules take
91
+ # precedence over terminals.
92
+ #
93
+ # If no rule/terminal could be found nil is returned instead.
94
+ #
95
+ # @param [String] name
96
+ # @return [LL::Rule|LL::Terminal|NilClass]
97
+ #
98
+ def lookup_identifier(name)
99
+ if has_rule?(name)
100
+ ident = lookup_rule(name)
101
+ elsif has_terminal?(name)
102
+ ident = @terminals[name]
103
+ else
104
+ ident = nil
105
+ end
106
+
107
+ return ident
108
+ end
109
+
110
+ ##
111
+ # @return [Array]
112
+ #
113
+ def rules
114
+ return @rules.values
115
+ end
116
+
117
+ ##
118
+ # @return [Hash]
119
+ #
120
+ def rule_indices
121
+ return rules.each_with_index.each_with_object({}) do |(rule, idx), h|
122
+ h[rule] = idx
123
+ end
124
+ end
125
+
126
+ ##
127
+ # @return [Array]
128
+ #
129
+ def terminals
130
+ return @terminals.values
131
+ end
132
+
133
+ ##
134
+ # @return [Hash]
135
+ #
136
+ def terminal_indices
137
+ return terminals.each_with_index.each_with_object({}) do |(term, idx), h|
138
+ h[term] = idx
139
+ end
140
+ end
141
+
142
+ ##
143
+ # @return [TrueClass|FalseClass]
144
+ #
145
+ def valid?
146
+ return @errors.empty?
147
+ end
148
+
149
+ ##
150
+ # Displays all warnings and errors.
151
+ #
152
+ def display_messages
153
+ [:errors, :warnings].each do |type|
154
+ send(type).each do |msg|
155
+ output.puts(msg.to_s)
156
+ end
157
+ end
158
+ end
159
+
160
+ ##
161
+ # @return [IO]
162
+ #
163
+ def output
164
+ return STDERR
165
+ end
166
+ end # CompiledGrammar
167
+ end # LL
@@ -0,0 +1,204 @@
1
+ module LL
2
+ ##
3
+ # Compiles an instance of {LL::CompiledConfiguration} which is used by
4
+ # {LL::CodeGenerator} to actually generate Ruby source code.
5
+ #
6
+ class ConfigurationCompiler
7
+ ##
8
+ # @return [Hash]
9
+ #
10
+ TYPES = {
11
+ :rule => 0,
12
+ :terminal => 1,
13
+ :epsilon => 2,
14
+ :action => 3
15
+ }.freeze
16
+
17
+ ##
18
+ # @return [String]
19
+ #
20
+ DEFAULT_RUBY_CODE = 'val'.freeze
21
+
22
+ ##
23
+ # @param [LL::CompiledGrammar] grammar
24
+ # @return [LL::CompiledConfiguration]
25
+ #
26
+ def generate(grammar)
27
+ return CompiledConfiguration.new(
28
+ :name => generate_name(grammar),
29
+ :namespace => generate_namespace(grammar),
30
+ :inner => grammar.inner,
31
+ :header => grammar.header,
32
+ :terminals => generate_terminals(grammar),
33
+ :actions => generate_actions(grammar),
34
+ :action_bodies => generate_action_bodies(grammar),
35
+ :rules => generate_rules(grammar),
36
+ :table => generate_table(grammar)
37
+ )
38
+ end
39
+
40
+ ##
41
+ # @param [LL::CompiledGrammar] grammar
42
+ # @return [String]
43
+ #
44
+ def generate_name(grammar)
45
+ return grammar.name.split('::').last
46
+ end
47
+
48
+ ##
49
+ # @param [LL::CompiledGrammar] grammar
50
+ # @return [Array]
51
+ #
52
+ def generate_namespace(grammar)
53
+ parts = grammar.name.split('::')
54
+
55
+ return parts.length > 1 ? parts[0..-2] : []
56
+ end
57
+
58
+ ##
59
+ # Returns an Array containing all the terminal names as symbols. The first
60
+ # terminal is always `:$EOF` to ensure the array has the same amount of rows
61
+ # as there are columns in the `table` array.
62
+ #
63
+ # @param [LL::CompiledGrammar] grammar
64
+ # @return [Array]
65
+ #
66
+ def generate_terminals(grammar)
67
+ terminals = [:$EOF]
68
+
69
+ grammar.terminals.each do |term|
70
+ terminals << term.name.to_sym
71
+ end
72
+
73
+ return terminals
74
+ end
75
+
76
+ ##
77
+ # @param [LL::CompiledGrammar] grammar
78
+ # @return [Array]
79
+ #
80
+ def generate_actions(grammar)
81
+ actions = []
82
+ index = 0
83
+
84
+ grammar.rules.each do |rule|
85
+ rule.branches.each do |branch|
86
+ args = branch.steps.reject { |step| step.is_a?(Epsilon) }.length
87
+
88
+ actions << [:"_rule_#{index}", args]
89
+
90
+ index += 1
91
+ end
92
+ end
93
+
94
+ return actions
95
+ end
96
+
97
+ ##
98
+ # @param [LL::CompiledGrammar] grammar
99
+ # @return [Hash]
100
+ #
101
+ def generate_action_bodies(grammar)
102
+ bodies = {}
103
+ index = 0
104
+
105
+ grammar.rules.each do |rule|
106
+ rule.branches.each do |branch|
107
+ bodies[:"_rule_#{index}"] = branch.ruby_code || DEFAULT_RUBY_CODE
108
+
109
+ index += 1
110
+ end
111
+ end
112
+
113
+ return bodies
114
+ end
115
+
116
+ ##
117
+ # Builds the rules table of the parser. Each row is built in reverse order.
118
+ #
119
+ # @param [LL::CompiledGrammar] grammar
120
+ # @return [Array]
121
+ #
122
+ def generate_rules(grammar)
123
+ rules = []
124
+ action_index = 0
125
+ rule_indices = grammar.rule_indices
126
+ term_indices = grammar.terminal_indices
127
+
128
+ grammar.rules.each_with_index do |rule, rule_index|
129
+ rule.branches.each do |branch|
130
+ row = [TYPES[:action], action_index]
131
+
132
+ action_index += 1
133
+
134
+ branch.steps.reverse_each do |step|
135
+ if step.is_a?(LL::Terminal)
136
+ row << TYPES[:terminal]
137
+ row << term_indices[step] + 1
138
+
139
+ elsif step.is_a?(LL::Rule)
140
+ row << TYPES[:rule]
141
+ row << rule_indices[step]
142
+
143
+ elsif step.is_a?(LL::Epsilon)
144
+ row << TYPES[:epsilon]
145
+ row << 0
146
+ end
147
+ end
148
+
149
+ rules << row
150
+ end
151
+ end
152
+
153
+ return rules
154
+ end
155
+
156
+ ##
157
+ # Generates the table array for the parser. This array has the following
158
+ # structure:
159
+ #
160
+ # [
161
+ # [EOF, TERMINAL 1, TERMINAL 2, TERMINAL 3, ...]
162
+ # ]
163
+ #
164
+ # EOF is always the first column and is used when running out of input while
165
+ # processing a rule.
166
+ #
167
+ # @param [LL::CompiledGrammar] grammar
168
+ # @return [Array]
169
+ #
170
+ def generate_table(grammar)
171
+ branch_index = 0
172
+ term_indices = grammar.terminal_indices
173
+ columns = grammar.terminals.length + 1
174
+
175
+ table = Array.new(grammar.rules.length) do
176
+ Array.new(columns, -1)
177
+ end
178
+
179
+ grammar.rules.each_with_index do |rule, rule_index|
180
+ rule.branches.each do |branch|
181
+ branch.first_set.each do |step|
182
+ # For terminals we'll base the column index on the terminal index.
183
+ if step.is_a?(Terminal)
184
+ terminal_index = term_indices[step]
185
+
186
+ table[rule_index][terminal_index + 1] = branch_index
187
+
188
+ # For the rest (= epsilon) we'll update all columns that haven't
189
+ # been updated yet.
190
+ else
191
+ table[rule_index].each_with_index do |col, col_index|
192
+ table[rule_index][col_index] = branch_index if col == -1
193
+ end
194
+ end
195
+ end
196
+
197
+ branch_index += 1
198
+ end
199
+ end
200
+
201
+ return table
202
+ end
203
+ end # ConfigurationCompiler
204
+ end # LL