foreverman-dhaka 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +64 -0
- data/lib/dhaka.rb +62 -0
- data/lib/dhaka/dot/dot.rb +29 -0
- data/lib/dhaka/evaluator/evaluator.rb +133 -0
- data/lib/dhaka/grammar/closure_hash.rb +15 -0
- data/lib/dhaka/grammar/grammar.rb +236 -0
- data/lib/dhaka/grammar/grammar_symbol.rb +27 -0
- data/lib/dhaka/grammar/precedence.rb +19 -0
- data/lib/dhaka/grammar/production.rb +36 -0
- data/lib/dhaka/lexer/accept_actions.rb +36 -0
- data/lib/dhaka/lexer/alphabet.rb +21 -0
- data/lib/dhaka/lexer/compiled_lexer.rb +46 -0
- data/lib/dhaka/lexer/dfa.rb +121 -0
- data/lib/dhaka/lexer/lexeme.rb +32 -0
- data/lib/dhaka/lexer/lexer.rb +70 -0
- data/lib/dhaka/lexer/lexer_run.rb +78 -0
- data/lib/dhaka/lexer/regex_grammar.rb +393 -0
- data/lib/dhaka/lexer/regex_parser.rb +2010 -0
- data/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
- data/lib/dhaka/lexer/specification.rb +96 -0
- data/lib/dhaka/lexer/state.rb +68 -0
- data/lib/dhaka/lexer/state_machine.rb +37 -0
- data/lib/dhaka/parser/action.rb +55 -0
- data/lib/dhaka/parser/channel.rb +58 -0
- data/lib/dhaka/parser/compiled_parser.rb +51 -0
- data/lib/dhaka/parser/conflict.rb +54 -0
- data/lib/dhaka/parser/item.rb +43 -0
- data/lib/dhaka/parser/parse_result.rb +50 -0
- data/lib/dhaka/parser/parse_tree.rb +66 -0
- data/lib/dhaka/parser/parser.rb +165 -0
- data/lib/dhaka/parser/parser_methods.rb +11 -0
- data/lib/dhaka/parser/parser_run.rb +39 -0
- data/lib/dhaka/parser/parser_state.rb +74 -0
- data/lib/dhaka/parser/token.rb +22 -0
- data/lib/dhaka/runtime.rb +51 -0
- data/lib/dhaka/tokenizer/tokenizer.rb +190 -0
- data/test/all_tests.rb +5 -0
- data/test/arithmetic/arithmetic_evaluator.rb +64 -0
- data/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
- data/test/arithmetic/arithmetic_grammar.rb +41 -0
- data/test/arithmetic/arithmetic_grammar_test.rb +9 -0
- data/test/arithmetic/arithmetic_test_methods.rb +9 -0
- data/test/arithmetic/arithmetic_tokenizer.rb +39 -0
- data/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
- data/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
- data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
- data/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
- data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
- data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
- data/test/brackets/bracket_grammar.rb +23 -0
- data/test/brackets/bracket_tokenizer.rb +22 -0
- data/test/brackets/brackets_test.rb +28 -0
- data/test/chittagong/chittagong_driver.rb +46 -0
- data/test/chittagong/chittagong_driver_test.rb +276 -0
- data/test/chittagong/chittagong_evaluator.rb +284 -0
- data/test/chittagong/chittagong_evaluator_test.rb +38 -0
- data/test/chittagong/chittagong_grammar.rb +104 -0
- data/test/chittagong/chittagong_lexer.rb +109 -0
- data/test/chittagong/chittagong_lexer_specification.rb +37 -0
- data/test/chittagong/chittagong_lexer_test.rb +58 -0
- data/test/chittagong/chittagong_parser.rb +879 -0
- data/test/chittagong/chittagong_parser_test.rb +55 -0
- data/test/chittagong/chittagong_test.rb +170 -0
- data/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
- data/test/core/compiled_parser_test.rb +44 -0
- data/test/core/dfa_test.rb +170 -0
- data/test/core/evaluator_test.rb +22 -0
- data/test/core/grammar_test.rb +83 -0
- data/test/core/lalr_but_not_slr_grammar.rb +19 -0
- data/test/core/lexer_test.rb +139 -0
- data/test/core/malformed_grammar.rb +7 -0
- data/test/core/malformed_grammar_test.rb +8 -0
- data/test/core/nullable_grammar.rb +21 -0
- data/test/core/parse_result_test.rb +44 -0
- data/test/core/parser_state_test.rb +24 -0
- data/test/core/parser_test.rb +131 -0
- data/test/core/precedence_grammar.rb +17 -0
- data/test/core/precedence_grammar_test.rb +9 -0
- data/test/core/rr_conflict_grammar.rb +21 -0
- data/test/core/simple_grammar.rb +22 -0
- data/test/core/sr_conflict_grammar.rb +16 -0
- data/test/dhaka_test_helper.rb +18 -0
- data/test/fake_logger.rb +17 -0
- metadata +137 -0
data/Rakefile
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'rake/rdoctask'
|
2
|
+
require 'rake/gempackagetask'
|
3
|
+
require 'rake/testtask'
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
Rake::RDocTask.new do |rdoc|
|
7
|
+
rdoc.rdoc_files.include('README', 'lib/**/*.rb')
|
8
|
+
rdoc.rdoc_files.exclude("dhaka/lib/lexer/regex_parser.rb")
|
9
|
+
rdoc.main = "README"
|
10
|
+
rdoc.rdoc_dir = '../doc'
|
11
|
+
end
|
12
|
+
|
13
|
+
spec = Gem::Specification.new do |s|
|
14
|
+
s.name = "dhaka"
|
15
|
+
s.author = "Mushfeq Khan"
|
16
|
+
s.email = "mushfeq dot khan at gmail dot com"
|
17
|
+
s.version = ENV['VERSION'] || "0.0.0"
|
18
|
+
s.platform = Gem::Platform::RUBY
|
19
|
+
s.summary = "An LALR1 parser generator written in Ruby"
|
20
|
+
s.files = Dir.glob("{lib,test}/**/*").select {|file| file.include?('.rb') || file.include?('.txt')} + ['Rakefile']
|
21
|
+
s.require_path = 'lib'
|
22
|
+
s.autorequire = 'dhaka'
|
23
|
+
s.has_rdoc = true
|
24
|
+
end
|
25
|
+
|
26
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
27
|
+
pkg.package_dir = "../gems"
|
28
|
+
end
|
29
|
+
|
30
|
+
Rake::TestTask.new do |t|
|
31
|
+
t.libs << "test"
|
32
|
+
t.test_files = FileList['test/**/*test.rb']
|
33
|
+
t.verbose = true
|
34
|
+
end
|
35
|
+
|
36
|
+
task :generate_regex_parser do
|
37
|
+
require 'lib/dhaka'
|
38
|
+
File.open('lib/dhaka/lexer/regex_parser.rb', 'w') do |file|
|
39
|
+
file << Dhaka::Parser.new(Dhaka::LexerSupport::RegexGrammar).compile_to_ruby_source_as('Dhaka::LexerSupport::RegexParser')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
task :gem => [:test, :generate_regex_parser]
|
44
|
+
|
45
|
+
task :default => :test
|
46
|
+
|
47
|
+
task :test => [:generate_chittagong_parser, :generate_chittagong_lexer]
|
48
|
+
|
49
|
+
task :generate_chittagong_parser do
|
50
|
+
require 'lib/dhaka'
|
51
|
+
require 'test/chittagong/chittagong_grammar'
|
52
|
+
require 'test/fake_logger'
|
53
|
+
File.open('test/chittagong/chittagong_parser.rb', 'w') do |file|
|
54
|
+
file << Dhaka::Parser.new(ChittagongGrammar, FakeLogger.new).compile_to_ruby_source_as(:ChittagongParser)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
task :generate_chittagong_lexer do
|
59
|
+
require 'lib/dhaka'
|
60
|
+
require 'test/chittagong/chittagong_lexer_specification'
|
61
|
+
File.open('test/chittagong/chittagong_lexer.rb', 'w') do |file|
|
62
|
+
file << Dhaka::Lexer.new(ChittagongLexerSpecification).compile_to_ruby_source_as(:ChittagongLexer)
|
63
|
+
end
|
64
|
+
end
|
data/lib/dhaka.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2006, 2007 Mushfeq Khan
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'set'
|
25
|
+
require 'logger'
|
26
|
+
require 'delegate'
|
27
|
+
|
28
|
+
%w[
|
29
|
+
dot/dot
|
30
|
+
grammar/grammar_symbol
|
31
|
+
grammar/production
|
32
|
+
grammar/closure_hash
|
33
|
+
grammar/grammar
|
34
|
+
grammar/precedence
|
35
|
+
parser/parse_tree
|
36
|
+
parser/parse_result
|
37
|
+
parser/item
|
38
|
+
parser/channel
|
39
|
+
parser/parser_methods
|
40
|
+
parser/parser_state
|
41
|
+
parser/conflict
|
42
|
+
parser/token
|
43
|
+
parser/action
|
44
|
+
parser/parser_run
|
45
|
+
parser/parser
|
46
|
+
parser/compiled_parser
|
47
|
+
tokenizer/tokenizer
|
48
|
+
evaluator/evaluator
|
49
|
+
lexer/accept_actions
|
50
|
+
lexer/alphabet
|
51
|
+
lexer/regex_grammar
|
52
|
+
lexer/regex_tokenizer
|
53
|
+
lexer/regex_parser
|
54
|
+
lexer/state_machine
|
55
|
+
lexer/dfa
|
56
|
+
lexer/state
|
57
|
+
lexer/specification
|
58
|
+
lexer/lexeme
|
59
|
+
lexer/lexer_run
|
60
|
+
lexer/lexer
|
61
|
+
lexer/compiled_lexer
|
62
|
+
].each {|path| require File.join(File.dirname(__FILE__), 'dhaka/' + path)}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Dhaka
|
2
|
+
module Dot #:nodoc:
|
3
|
+
class Digraph #:nodoc:
|
4
|
+
def initialize(node_attributes = {})
|
5
|
+
@result = ["digraph x {"]
|
6
|
+
@result << %(node #{dotify_hash(node_attributes)})
|
7
|
+
yield(self)
|
8
|
+
@result << '}'
|
9
|
+
end
|
10
|
+
|
11
|
+
def node(obj, attributes = {})
|
12
|
+
@result << "#{obj.object_id} #{dotify_hash(attributes)}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def edge(src, dest, attributes = {})
|
16
|
+
@result << "#{src.object_id} -> #{dest.object_id} #{dotify_hash(attributes)}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def dotify_hash hash
|
20
|
+
sorted_key_value_pairs = hash.collect {|key, value| [key.to_s, value.to_s]}.sort
|
21
|
+
hash.empty? ? "" : '[' + sorted_key_value_pairs.collect {|key, value| "#{key}=#{value.to_s.inspect}"}.join(' ') + ']'
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_dot
|
25
|
+
@result.join("\n")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# Abstract base class for evaluators.
|
3
|
+
#
|
4
|
+
# Defining an evaluator is an easy way to perform syntax-directed evaluation without having to generate an abstract
|
5
|
+
# syntax tree representation of the input.
|
6
|
+
#
|
7
|
+
# An evaluation rule for a given production named +bar+ is defined by calling +for_bar+ with
|
8
|
+
# a block that performs the evaluation. For detailed examples, see the evaluators in the
|
9
|
+
# test suite.
|
10
|
+
#
|
11
|
+
# The following is an evaluator for arithmetic expressions. When a parse tree node is encountered that
|
12
|
+
# corresponds to the production named +addition+, the block passed to +for_addition+ is invoked. The +evaluate+
|
13
|
+
# method is then recursively called on the child nodes, in this case the operands to the addition operation. The
|
14
|
+
# result is obtained by adding the evaluation results of the child nodes.
|
15
|
+
#
|
16
|
+
# class ArithmeticPrecedenceEvaluator < Dhaka::Evaluator
|
17
|
+
#
|
18
|
+
# self.grammar = ArithmeticPrecedenceGrammar
|
19
|
+
#
|
20
|
+
# define_evaluation_rules do
|
21
|
+
#
|
22
|
+
# for_subtraction do
|
23
|
+
# evaluate(child_nodes[0]) - evaluate(child_nodes[2])
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# for_addition do
|
27
|
+
# evaluate(child_nodes[0]) + evaluate(child_nodes[2])
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# for_division do
|
31
|
+
# evaluate(child_nodes[0]).to_f/evaluate(child_nodes[2])
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# for_multiplication do
|
35
|
+
# evaluate(child_nodes[0]) * evaluate(child_nodes[2])
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# for_literal do
|
39
|
+
# child_nodes[0].token.value.to_i
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
# for_parenthetized_expression do
|
43
|
+
# evaluate(child_nodes[1])
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# for_negated_expression do
|
47
|
+
# -evaluate(child_nodes[1])
|
48
|
+
# end
|
49
|
+
#
|
50
|
+
# for_power do
|
51
|
+
# evaluate(child_nodes[0])**evaluate(child_nodes[2])
|
52
|
+
# end
|
53
|
+
#
|
54
|
+
# end
|
55
|
+
#
|
56
|
+
# end
|
57
|
+
|
58
|
+
class Evaluator < SimpleDelegator
|
59
|
+
class << self
|
60
|
+
# Define evaluation rules within a block passed to this method. The evaluator will define
|
61
|
+
# default evaluation rules for pass-through productions (i.e. productions with expansions
|
62
|
+
# consisting of exactly one grammar symbol). The default evaluation rule for such productions
|
63
|
+
# is to simply return the result of calling +evaluate+ on the unique child node. Setting the
|
64
|
+
# <tt>:raise_error</tt> option to true tells the evaluator to throw an exception if you neglect
|
65
|
+
# to define a rule for a non-pass-through production (one where the expansion consists of
|
66
|
+
# multiple symbols), listing all the productions that absolutely need to be defined before you
|
67
|
+
# can continue.
|
68
|
+
def define_evaluation_rules(options = {})
|
69
|
+
yield
|
70
|
+
check_definitions(options)
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def check_definitions(options)
|
76
|
+
filter = lambda {|productions| productions.map {|production| production.name} - actions}
|
77
|
+
pass_through_productions_without_rules = filter[grammar.productions.select {|production| production.expansion.size == 1}]
|
78
|
+
pass_through_productions_without_rules.each do |rule_name|
|
79
|
+
send(:define_method, rule_name) do
|
80
|
+
evaluate(child_nodes.first)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
non_trivial_productions_with_rules_undefined = filter[grammar.productions.select {|production| production.expansion.size != 1}]
|
84
|
+
raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty? || !options[:raise_error]
|
85
|
+
end
|
86
|
+
|
87
|
+
def inherited(evaluator)
|
88
|
+
class << evaluator
|
89
|
+
attr_accessor :grammar, :actions
|
90
|
+
end
|
91
|
+
evaluator.actions = []
|
92
|
+
end
|
93
|
+
|
94
|
+
def method_missing(method_name, *args, &blk)
|
95
|
+
name = method_name.to_s
|
96
|
+
if name =~ /^for_(.+)$/
|
97
|
+
rule_name = $1
|
98
|
+
raise "Attempted to define evaluation rule for non-existent production '#{rule_name}'" unless grammar.production_named(rule_name)
|
99
|
+
actions << rule_name
|
100
|
+
send(:define_method, rule_name, &blk)
|
101
|
+
else
|
102
|
+
super
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Evaluate a parse tree node.
|
108
|
+
def evaluate node
|
109
|
+
@node_stack ||= []
|
110
|
+
@node_stack << node
|
111
|
+
__setobj__(@node_stack.last)
|
112
|
+
result = send(node.production.name)
|
113
|
+
@node_stack.pop
|
114
|
+
__setobj__(@node_stack.last)
|
115
|
+
result
|
116
|
+
end
|
117
|
+
|
118
|
+
def initialize
|
119
|
+
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
class EvaluatorDefinitionError < StandardError #:nodoc:
|
124
|
+
def initialize(non_trivial_productions_with_rules_undefined)
|
125
|
+
@non_trivial_productions_with_rules_undefined = non_trivial_productions_with_rules_undefined
|
126
|
+
end
|
127
|
+
|
128
|
+
def to_s
|
129
|
+
result = "The following non-trivial productions do not have any evaluation rules defined:\n"
|
130
|
+
result << @non_trivial_productions_with_rules_undefined.join("\n")
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
module Dhaka
|
2
|
+
|
3
|
+
# Reserved name for the start symbol for all grammars.
|
4
|
+
START_SYMBOL_NAME = "_Start_"
|
5
|
+
END_SYMBOL_NAME = "_End_" #:nodoc:
|
6
|
+
|
7
|
+
# Productions for specific grammar symbols are defined in the context of this class.
|
8
|
+
class ProductionBuilder
|
9
|
+
# +symbol+ is the grammar symbol that productions are being defined for.
|
10
|
+
def initialize(grammar, symbol)
|
11
|
+
@grammar = grammar
|
12
|
+
@symbol = symbol
|
13
|
+
end
|
14
|
+
|
15
|
+
# Creates a new production for +symbol+ with an expansion of +expansion+. The options hash can include
|
16
|
+
# a directive <tt>:prec</tt>, the value of which is a grammar symbol name. The precedence of the production is then
|
17
|
+
# set to the precedence of the grammar symbol corresponding to that name.
|
18
|
+
#
|
19
|
+
# See the arithmetic precedence grammar in the test suites for an example.
|
20
|
+
def method_missing(production_name, expansion, options = {}, &blk)
|
21
|
+
expansion_symbols = (expansion.respond_to?('lines') ? expansion.lines : expansion).collect {|name| @grammar.symbols[name]}
|
22
|
+
production_args = [@symbol, expansion_symbols, production_name.to_s, blk, @grammar.production_index]
|
23
|
+
if precedence_symbol_name = options[:prec]
|
24
|
+
production_args << @grammar.symbol_for_name(precedence_symbol_name).precedence
|
25
|
+
end
|
26
|
+
|
27
|
+
production = Production.new(*production_args)
|
28
|
+
@grammar.production_index += 1
|
29
|
+
|
30
|
+
@symbol.nullable = true if expansion_symbols.empty?
|
31
|
+
@grammar.productions_by_symbol[production.symbol] << production
|
32
|
+
raise "Duplicate production named #{production.name}" if @grammar.productions_by_name[production.name]
|
33
|
+
@grammar.productions_by_name[production.name] = production
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# The precedence builder defines three methods, +left+, +right+ and +nonassoc+. These accept arrays of grammar
|
38
|
+
# symbols all of which have the same precedence level and associativity.
|
39
|
+
#
|
40
|
+
# See the arithmetic precedence grammar in the test suites for an example of how this works.
|
41
|
+
class PrecedenceBuilder
|
42
|
+
def initialize(grammar) #:nodoc:
|
43
|
+
@grammar = grammar
|
44
|
+
@precedence_level = 0
|
45
|
+
end
|
46
|
+
|
47
|
+
[:left, :right, :nonassoc].each do |associativity|
|
48
|
+
define_method(associativity) do |symbols|
|
49
|
+
assign_precedences associativity, symbols
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
def assign_precedences(associativity, symbol_names)
|
55
|
+
symbol_names.each do |symbol_name|
|
56
|
+
symbol = @grammar.symbols[symbol_name]
|
57
|
+
symbol.precedence = Precedence.new(@precedence_level, associativity)
|
58
|
+
end
|
59
|
+
@precedence_level += 1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Abstract base class for grammar specifications.
|
64
|
+
#
|
65
|
+
# The following is a grammar specification for simple arithmetic. Precedences are specified as in Yacc -
|
66
|
+
# in ascending order of binding strength, with equal-strength symbols on the same level.
|
67
|
+
# Production rules are specified for each symbol by specifying the name of the production (used when
|
68
|
+
# encoding the Evaluator) and the expansion for that particular production. For example, the production named
|
69
|
+
# +addition+ expands the symbol <tt>'E'</tt> to the list of symbols <tt>['E', '+', 'E']</tt>.
|
70
|
+
#
|
71
|
+
# class ArithmeticPrecedenceGrammar < Dhaka::Grammar
|
72
|
+
# precedences do
|
73
|
+
# left ['+', '-']
|
74
|
+
# left ['*', '/']
|
75
|
+
# nonassoc ['^']
|
76
|
+
# end
|
77
|
+
#
|
78
|
+
# for_symbol(Dhaka::START_SYMBOL_NAME) do
|
79
|
+
# expression ['E']
|
80
|
+
# end
|
81
|
+
#
|
82
|
+
# for_symbol('E') do
|
83
|
+
# addition ['E', '+', 'E']
|
84
|
+
# subtraction ['E', '-', 'E']
|
85
|
+
# multiplication ['E', '*', 'E']
|
86
|
+
# division ['E', '/', 'E']
|
87
|
+
# power ['E', '^', 'E']
|
88
|
+
# literal ['n']
|
89
|
+
# parenthetized_expression ['(', 'E', ')']
|
90
|
+
# negated_expression ['-', 'E'], :prec => '*'
|
91
|
+
# end
|
92
|
+
# end
|
93
|
+
#
|
94
|
+
# In the above grammar, the symbols <tt>+</tt> and <tt>-</tt> are declared as being +left+-associative, meaning that
|
95
|
+
# 1 + 2 + 3 is parsed as (1 + 2) + 3 as opposed to 1 + (2 + 3) (+right+-associativity). The symbol <tt>^</tt> is declared
|
96
|
+
# +nonassoc+ which means that expressions such as 2 ^ 3 ^ 4 are not allowed (non-associative). <tt>+</tt> and <tt>-</tt> are listed
|
97
|
+
# before <tt>^</tt> which means that they bind lower, and an expression such as 2 + 3 ^ 5 will be always be parsed as
|
98
|
+
# 2 + (3 ^ 5) and not (2 + 3) ^ 5.
|
99
|
+
class Grammar
|
100
|
+
class << self
|
101
|
+
# Used for defining the Production-s for the symbol with name +symbol+. The block +blk+ is
|
102
|
+
# evaluated in the context of a ProductionBuilder.
|
103
|
+
def for_symbol symbol, &blk
|
104
|
+
symbol = symbols[symbol]
|
105
|
+
symbol.non_terminal = true
|
106
|
+
ProductionBuilder.new(self, symbol).instance_eval(&blk)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Used for defining the precedences and associativities of symbols. The block +blk+ is
|
110
|
+
# evaluated in the context of a PrecedenceBuilder.
|
111
|
+
def precedences &blk
|
112
|
+
PrecedenceBuilder.new(self).instance_eval(&blk)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Returns the grammar symbol identified by +name+
|
116
|
+
def symbol_for_name(name)
|
117
|
+
if symbols.has_key? name
|
118
|
+
symbols[name]
|
119
|
+
else
|
120
|
+
raise "No symbol with name #{name} found"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Returns a list of all the Production-s in this grammar.
|
125
|
+
def productions
|
126
|
+
productions_by_name.values
|
127
|
+
end
|
128
|
+
|
129
|
+
def productions_for_symbol(symbol) #:nodoc:
|
130
|
+
productions_by_symbol[symbol]
|
131
|
+
end
|
132
|
+
|
133
|
+
def closure(kernel) #:nodoc:
|
134
|
+
channels = Hash.new {|hash, start_item| hash[start_item] = Set.new}
|
135
|
+
result = compute_closure(kernel) do |hash, item|
|
136
|
+
if item.next_symbol and item.next_symbol.non_terminal
|
137
|
+
productions_by_symbol[item.next_symbol].each do |production|
|
138
|
+
new_channel = spontaneous_channel(item, hash[Item.new(production, 0)])
|
139
|
+
channels[item] << new_channel
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
[result, channels]
|
144
|
+
end
|
145
|
+
|
146
|
+
def passive_channel(start_item, end_item) #:nodoc:
|
147
|
+
PassiveChannel.new(self, start_item, end_item)
|
148
|
+
end
|
149
|
+
|
150
|
+
def first(given_symbol) #:nodoc:
|
151
|
+
cached_result = __first_cache[given_symbol]
|
152
|
+
return cached_result if cached_result
|
153
|
+
result = compute_closure([given_symbol]) do |hash, symbol|
|
154
|
+
productions_by_symbol[symbol].each do |production|
|
155
|
+
symbol_index = 0
|
156
|
+
while next_symbol = production.expansion[symbol_index]
|
157
|
+
hash[next_symbol]
|
158
|
+
break unless next_symbol.nullable
|
159
|
+
symbol_index += 1
|
160
|
+
end
|
161
|
+
end if symbol.non_terminal
|
162
|
+
end.values.select {|symbol| symbol.terminal}.to_set
|
163
|
+
__first_cache[given_symbol] = result
|
164
|
+
result
|
165
|
+
end
|
166
|
+
|
167
|
+
# Returns the Production identified by +name+.
|
168
|
+
def production_named(name)
|
169
|
+
productions_by_name[name]
|
170
|
+
end
|
171
|
+
|
172
|
+
# Returns the set of terminal symbols in the grammar.
|
173
|
+
def terminal_symbols
|
174
|
+
symbols.values.select {|symbol| symbol.terminal}
|
175
|
+
end
|
176
|
+
|
177
|
+
# Returns the set of non-terminal symbols in the grammar.
|
178
|
+
def non_terminal_symbols
|
179
|
+
symbols.values.select {|symbol| symbol.non_terminal}
|
180
|
+
end
|
181
|
+
|
182
|
+
# Export the grammar to a BNF-like format
|
183
|
+
def to_bnf
|
184
|
+
result = []
|
185
|
+
last_symbol = nil
|
186
|
+
productions.sort.each do |production|
|
187
|
+
if production.symbol != last_symbol
|
188
|
+
result << ""
|
189
|
+
result << "#{production.symbol.name.inspect} :"
|
190
|
+
last_symbol = production.symbol
|
191
|
+
end
|
192
|
+
result << " | #{production.expansion.collect{|symbol| symbol.name.inspect}.join(' ')}"
|
193
|
+
end
|
194
|
+
result.join("\n")
|
195
|
+
end
|
196
|
+
|
197
|
+
private
|
198
|
+
|
199
|
+
def inherited(grammar)
|
200
|
+
class << grammar
|
201
|
+
attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache, :production_index
|
202
|
+
end
|
203
|
+
grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
|
204
|
+
grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
|
205
|
+
grammar.productions_by_name = {}
|
206
|
+
grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
|
207
|
+
grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
|
208
|
+
grammar.__first_cache = {}
|
209
|
+
grammar.production_index = 0
|
210
|
+
end
|
211
|
+
|
212
|
+
def spontaneous_channel(start_item, end_item)
|
213
|
+
SpontaneousChannel.new(self, start_item, end_item)
|
214
|
+
end
|
215
|
+
|
216
|
+
def compute_closure(initial)
|
217
|
+
closure_hash = ClosureHash.new do |hash, item|
|
218
|
+
hash.dirty = true
|
219
|
+
hash[item] = item
|
220
|
+
end
|
221
|
+
|
222
|
+
closure_hash.load_set(initial)
|
223
|
+
|
224
|
+
loop do
|
225
|
+
closure_hash.keys.each do |element|
|
226
|
+
yield closure_hash, element
|
227
|
+
end
|
228
|
+
break unless closure_hash.dirty
|
229
|
+
closure_hash.dirty = false
|
230
|
+
end
|
231
|
+
closure_hash
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|