foreverman-dhaka 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +64 -0
- data/lib/dhaka.rb +62 -0
- data/lib/dhaka/dot/dot.rb +29 -0
- data/lib/dhaka/evaluator/evaluator.rb +133 -0
- data/lib/dhaka/grammar/closure_hash.rb +15 -0
- data/lib/dhaka/grammar/grammar.rb +236 -0
- data/lib/dhaka/grammar/grammar_symbol.rb +27 -0
- data/lib/dhaka/grammar/precedence.rb +19 -0
- data/lib/dhaka/grammar/production.rb +36 -0
- data/lib/dhaka/lexer/accept_actions.rb +36 -0
- data/lib/dhaka/lexer/alphabet.rb +21 -0
- data/lib/dhaka/lexer/compiled_lexer.rb +46 -0
- data/lib/dhaka/lexer/dfa.rb +121 -0
- data/lib/dhaka/lexer/lexeme.rb +32 -0
- data/lib/dhaka/lexer/lexer.rb +70 -0
- data/lib/dhaka/lexer/lexer_run.rb +78 -0
- data/lib/dhaka/lexer/regex_grammar.rb +393 -0
- data/lib/dhaka/lexer/regex_parser.rb +2010 -0
- data/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
- data/lib/dhaka/lexer/specification.rb +96 -0
- data/lib/dhaka/lexer/state.rb +68 -0
- data/lib/dhaka/lexer/state_machine.rb +37 -0
- data/lib/dhaka/parser/action.rb +55 -0
- data/lib/dhaka/parser/channel.rb +58 -0
- data/lib/dhaka/parser/compiled_parser.rb +51 -0
- data/lib/dhaka/parser/conflict.rb +54 -0
- data/lib/dhaka/parser/item.rb +43 -0
- data/lib/dhaka/parser/parse_result.rb +50 -0
- data/lib/dhaka/parser/parse_tree.rb +66 -0
- data/lib/dhaka/parser/parser.rb +165 -0
- data/lib/dhaka/parser/parser_methods.rb +11 -0
- data/lib/dhaka/parser/parser_run.rb +39 -0
- data/lib/dhaka/parser/parser_state.rb +74 -0
- data/lib/dhaka/parser/token.rb +22 -0
- data/lib/dhaka/runtime.rb +51 -0
- data/lib/dhaka/tokenizer/tokenizer.rb +190 -0
- data/test/all_tests.rb +5 -0
- data/test/arithmetic/arithmetic_evaluator.rb +64 -0
- data/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
- data/test/arithmetic/arithmetic_grammar.rb +41 -0
- data/test/arithmetic/arithmetic_grammar_test.rb +9 -0
- data/test/arithmetic/arithmetic_test_methods.rb +9 -0
- data/test/arithmetic/arithmetic_tokenizer.rb +39 -0
- data/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
- data/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
- data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
- data/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
- data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
- data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
- data/test/brackets/bracket_grammar.rb +23 -0
- data/test/brackets/bracket_tokenizer.rb +22 -0
- data/test/brackets/brackets_test.rb +28 -0
- data/test/chittagong/chittagong_driver.rb +46 -0
- data/test/chittagong/chittagong_driver_test.rb +276 -0
- data/test/chittagong/chittagong_evaluator.rb +284 -0
- data/test/chittagong/chittagong_evaluator_test.rb +38 -0
- data/test/chittagong/chittagong_grammar.rb +104 -0
- data/test/chittagong/chittagong_lexer.rb +109 -0
- data/test/chittagong/chittagong_lexer_specification.rb +37 -0
- data/test/chittagong/chittagong_lexer_test.rb +58 -0
- data/test/chittagong/chittagong_parser.rb +879 -0
- data/test/chittagong/chittagong_parser_test.rb +55 -0
- data/test/chittagong/chittagong_test.rb +170 -0
- data/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
- data/test/core/compiled_parser_test.rb +44 -0
- data/test/core/dfa_test.rb +170 -0
- data/test/core/evaluator_test.rb +22 -0
- data/test/core/grammar_test.rb +83 -0
- data/test/core/lalr_but_not_slr_grammar.rb +19 -0
- data/test/core/lexer_test.rb +139 -0
- data/test/core/malformed_grammar.rb +7 -0
- data/test/core/malformed_grammar_test.rb +8 -0
- data/test/core/nullable_grammar.rb +21 -0
- data/test/core/parse_result_test.rb +44 -0
- data/test/core/parser_state_test.rb +24 -0
- data/test/core/parser_test.rb +131 -0
- data/test/core/precedence_grammar.rb +17 -0
- data/test/core/precedence_grammar_test.rb +9 -0
- data/test/core/rr_conflict_grammar.rb +21 -0
- data/test/core/simple_grammar.rb +22 -0
- data/test/core/sr_conflict_grammar.rb +16 -0
- data/test/dhaka_test_helper.rb +18 -0
- data/test/fake_logger.rb +17 -0
- metadata +137 -0
data/Rakefile
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
require 'rake/rdoctask'
|
|
2
|
+
require 'rake/gempackagetask'
|
|
3
|
+
require 'rake/testtask'
|
|
4
|
+
require 'rubygems'
|
|
5
|
+
|
|
6
|
+
Rake::RDocTask.new do |rdoc|
|
|
7
|
+
rdoc.rdoc_files.include('README', 'lib/**/*.rb')
|
|
8
|
+
rdoc.rdoc_files.exclude("dhaka/lib/lexer/regex_parser.rb")
|
|
9
|
+
rdoc.main = "README"
|
|
10
|
+
rdoc.rdoc_dir = '../doc'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
spec = Gem::Specification.new do |s|
|
|
14
|
+
s.name = "dhaka"
|
|
15
|
+
s.author = "Mushfeq Khan"
|
|
16
|
+
s.email = "mushfeq dot khan at gmail dot com"
|
|
17
|
+
s.version = ENV['VERSION'] || "0.0.0"
|
|
18
|
+
s.platform = Gem::Platform::RUBY
|
|
19
|
+
s.summary = "An LALR1 parser generator written in Ruby"
|
|
20
|
+
s.files = Dir.glob("{lib,test}/**/*").select {|file| file.include?('.rb') || file.include?('.txt')} + ['Rakefile']
|
|
21
|
+
s.require_path = 'lib'
|
|
22
|
+
s.autorequire = 'dhaka'
|
|
23
|
+
s.has_rdoc = true
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
|
27
|
+
pkg.package_dir = "../gems"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
Rake::TestTask.new do |t|
|
|
31
|
+
t.libs << "test"
|
|
32
|
+
t.test_files = FileList['test/**/*test.rb']
|
|
33
|
+
t.verbose = true
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
task :generate_regex_parser do
|
|
37
|
+
require 'lib/dhaka'
|
|
38
|
+
File.open('lib/dhaka/lexer/regex_parser.rb', 'w') do |file|
|
|
39
|
+
file << Dhaka::Parser.new(Dhaka::LexerSupport::RegexGrammar).compile_to_ruby_source_as('Dhaka::LexerSupport::RegexParser')
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
task :gem => [:test, :generate_regex_parser]
|
|
44
|
+
|
|
45
|
+
task :default => :test
|
|
46
|
+
|
|
47
|
+
task :test => [:generate_chittagong_parser, :generate_chittagong_lexer]
|
|
48
|
+
|
|
49
|
+
task :generate_chittagong_parser do
|
|
50
|
+
require 'lib/dhaka'
|
|
51
|
+
require 'test/chittagong/chittagong_grammar'
|
|
52
|
+
require 'test/fake_logger'
|
|
53
|
+
File.open('test/chittagong/chittagong_parser.rb', 'w') do |file|
|
|
54
|
+
file << Dhaka::Parser.new(ChittagongGrammar, FakeLogger.new).compile_to_ruby_source_as(:ChittagongParser)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
task :generate_chittagong_lexer do
|
|
59
|
+
require 'lib/dhaka'
|
|
60
|
+
require 'test/chittagong/chittagong_lexer_specification'
|
|
61
|
+
File.open('test/chittagong/chittagong_lexer.rb', 'w') do |file|
|
|
62
|
+
file << Dhaka::Lexer.new(ChittagongLexerSpecification).compile_to_ruby_source_as(:ChittagongLexer)
|
|
63
|
+
end
|
|
64
|
+
end
|
data/lib/dhaka.rb
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright (c) 2006, 2007 Mushfeq Khan
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
require 'set'
|
|
25
|
+
require 'logger'
|
|
26
|
+
require 'delegate'
|
|
27
|
+
|
|
28
|
+
%w[
|
|
29
|
+
dot/dot
|
|
30
|
+
grammar/grammar_symbol
|
|
31
|
+
grammar/production
|
|
32
|
+
grammar/closure_hash
|
|
33
|
+
grammar/grammar
|
|
34
|
+
grammar/precedence
|
|
35
|
+
parser/parse_tree
|
|
36
|
+
parser/parse_result
|
|
37
|
+
parser/item
|
|
38
|
+
parser/channel
|
|
39
|
+
parser/parser_methods
|
|
40
|
+
parser/parser_state
|
|
41
|
+
parser/conflict
|
|
42
|
+
parser/token
|
|
43
|
+
parser/action
|
|
44
|
+
parser/parser_run
|
|
45
|
+
parser/parser
|
|
46
|
+
parser/compiled_parser
|
|
47
|
+
tokenizer/tokenizer
|
|
48
|
+
evaluator/evaluator
|
|
49
|
+
lexer/accept_actions
|
|
50
|
+
lexer/alphabet
|
|
51
|
+
lexer/regex_grammar
|
|
52
|
+
lexer/regex_tokenizer
|
|
53
|
+
lexer/regex_parser
|
|
54
|
+
lexer/state_machine
|
|
55
|
+
lexer/dfa
|
|
56
|
+
lexer/state
|
|
57
|
+
lexer/specification
|
|
58
|
+
lexer/lexeme
|
|
59
|
+
lexer/lexer_run
|
|
60
|
+
lexer/lexer
|
|
61
|
+
lexer/compiled_lexer
|
|
62
|
+
].each {|path| require File.join(File.dirname(__FILE__), 'dhaka/' + path)}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
module Dhaka
|
|
2
|
+
module Dot #:nodoc:
|
|
3
|
+
class Digraph #:nodoc:
|
|
4
|
+
def initialize(node_attributes = {})
|
|
5
|
+
@result = ["digraph x {"]
|
|
6
|
+
@result << %(node #{dotify_hash(node_attributes)})
|
|
7
|
+
yield(self)
|
|
8
|
+
@result << '}'
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def node(obj, attributes = {})
|
|
12
|
+
@result << "#{obj.object_id} #{dotify_hash(attributes)}"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def edge(src, dest, attributes = {})
|
|
16
|
+
@result << "#{src.object_id} -> #{dest.object_id} #{dotify_hash(attributes)}"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def dotify_hash hash
|
|
20
|
+
sorted_key_value_pairs = hash.collect {|key, value| [key.to_s, value.to_s]}.sort
|
|
21
|
+
hash.empty? ? "" : '[' + sorted_key_value_pairs.collect {|key, value| "#{key}=#{value.to_s.inspect}"}.join(' ') + ']'
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def to_dot
|
|
25
|
+
@result.join("\n")
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
module Dhaka
|
|
2
|
+
# Abstract base class for evaluators.
|
|
3
|
+
#
|
|
4
|
+
# Defining an evaluator is an easy way to perform syntax-directed evaluation without having to generate an abstract
|
|
5
|
+
# syntax tree representation of the input.
|
|
6
|
+
#
|
|
7
|
+
# An evaluation rule for a given production named +bar+ is defined by calling +for_bar+ with
|
|
8
|
+
# a block that performs the evaluation. For detailed examples, see the evaluators in the
|
|
9
|
+
# test suite.
|
|
10
|
+
#
|
|
11
|
+
# The following is an evaluator for arithmetic expressions. When a parse tree node is encountered that
|
|
12
|
+
# corresponds to the production named +addition+, the block passed to +for_addition+ is invoked. The +evaluate+
|
|
13
|
+
# method is then recursively called on the child nodes, in this case the operands to the addition operation. The
|
|
14
|
+
# result is obtained by adding the evaluation results of the child nodes.
|
|
15
|
+
#
|
|
16
|
+
# class ArithmeticPrecedenceEvaluator < Dhaka::Evaluator
|
|
17
|
+
#
|
|
18
|
+
# self.grammar = ArithmeticPrecedenceGrammar
|
|
19
|
+
#
|
|
20
|
+
# define_evaluation_rules do
|
|
21
|
+
#
|
|
22
|
+
# for_subtraction do
|
|
23
|
+
# evaluate(child_nodes[0]) - evaluate(child_nodes[2])
|
|
24
|
+
# end
|
|
25
|
+
#
|
|
26
|
+
# for_addition do
|
|
27
|
+
# evaluate(child_nodes[0]) + evaluate(child_nodes[2])
|
|
28
|
+
# end
|
|
29
|
+
#
|
|
30
|
+
# for_division do
|
|
31
|
+
# evaluate(child_nodes[0]).to_f/evaluate(child_nodes[2])
|
|
32
|
+
# end
|
|
33
|
+
#
|
|
34
|
+
# for_multiplication do
|
|
35
|
+
# evaluate(child_nodes[0]) * evaluate(child_nodes[2])
|
|
36
|
+
# end
|
|
37
|
+
#
|
|
38
|
+
# for_literal do
|
|
39
|
+
# child_nodes[0].token.value.to_i
|
|
40
|
+
# end
|
|
41
|
+
#
|
|
42
|
+
# for_parenthetized_expression do
|
|
43
|
+
# evaluate(child_nodes[1])
|
|
44
|
+
# end
|
|
45
|
+
#
|
|
46
|
+
# for_negated_expression do
|
|
47
|
+
# -evaluate(child_nodes[1])
|
|
48
|
+
# end
|
|
49
|
+
#
|
|
50
|
+
# for_power do
|
|
51
|
+
# evaluate(child_nodes[0])**evaluate(child_nodes[2])
|
|
52
|
+
# end
|
|
53
|
+
#
|
|
54
|
+
# end
|
|
55
|
+
#
|
|
56
|
+
# end
|
|
57
|
+
|
|
58
|
+
class Evaluator < SimpleDelegator
|
|
59
|
+
class << self
|
|
60
|
+
# Define evaluation rules within a block passed to this method. The evaluator will define
|
|
61
|
+
# default evaluation rules for pass-through productions (i.e. productions with expansions
|
|
62
|
+
# consisting of exactly one grammar symbol). The default evaluation rule for such productions
|
|
63
|
+
# is to simply return the result of calling +evaluate+ on the unique child node. Setting the
|
|
64
|
+
# <tt>:raise_error</tt> option to true tells the evaluator to throw an exception if you neglect
|
|
65
|
+
# to define a rule for a non-pass-through production (one where the expansion consists of
|
|
66
|
+
# multiple symbols), listing all the productions that absolutely need to be defined before you
|
|
67
|
+
# can continue.
|
|
68
|
+
def define_evaluation_rules(options = {})
|
|
69
|
+
yield
|
|
70
|
+
check_definitions(options)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def check_definitions(options)
|
|
76
|
+
filter = lambda {|productions| productions.map {|production| production.name} - actions}
|
|
77
|
+
pass_through_productions_without_rules = filter[grammar.productions.select {|production| production.expansion.size == 1}]
|
|
78
|
+
pass_through_productions_without_rules.each do |rule_name|
|
|
79
|
+
send(:define_method, rule_name) do
|
|
80
|
+
evaluate(child_nodes.first)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
non_trivial_productions_with_rules_undefined = filter[grammar.productions.select {|production| production.expansion.size != 1}]
|
|
84
|
+
raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty? || !options[:raise_error]
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def inherited(evaluator)
|
|
88
|
+
class << evaluator
|
|
89
|
+
attr_accessor :grammar, :actions
|
|
90
|
+
end
|
|
91
|
+
evaluator.actions = []
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def method_missing(method_name, *args, &blk)
|
|
95
|
+
name = method_name.to_s
|
|
96
|
+
if name =~ /^for_(.+)$/
|
|
97
|
+
rule_name = $1
|
|
98
|
+
raise "Attempted to define evaluation rule for non-existent production '#{rule_name}'" unless grammar.production_named(rule_name)
|
|
99
|
+
actions << rule_name
|
|
100
|
+
send(:define_method, rule_name, &blk)
|
|
101
|
+
else
|
|
102
|
+
super
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Evaluate a parse tree node.
|
|
108
|
+
def evaluate node
|
|
109
|
+
@node_stack ||= []
|
|
110
|
+
@node_stack << node
|
|
111
|
+
__setobj__(@node_stack.last)
|
|
112
|
+
result = send(node.production.name)
|
|
113
|
+
@node_stack.pop
|
|
114
|
+
__setobj__(@node_stack.last)
|
|
115
|
+
result
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def initialize
|
|
119
|
+
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
class EvaluatorDefinitionError < StandardError #:nodoc:
|
|
124
|
+
def initialize(non_trivial_productions_with_rules_undefined)
|
|
125
|
+
@non_trivial_productions_with_rules_undefined = non_trivial_productions_with_rules_undefined
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def to_s
|
|
129
|
+
result = "The following non-trivial productions do not have any evaluation rules defined:\n"
|
|
130
|
+
result << @non_trivial_productions_with_rules_undefined.join("\n")
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
module Dhaka
|
|
2
|
+
|
|
3
|
+
# Reserved name for the start symbol for all grammars.
|
|
4
|
+
START_SYMBOL_NAME = "_Start_"
|
|
5
|
+
END_SYMBOL_NAME = "_End_" #:nodoc:
|
|
6
|
+
|
|
7
|
+
# Productions for specific grammar symbols are defined in the context of this class.
|
|
8
|
+
class ProductionBuilder
|
|
9
|
+
# +symbol+ is the grammar symbol that productions are being defined for.
|
|
10
|
+
def initialize(grammar, symbol)
|
|
11
|
+
@grammar = grammar
|
|
12
|
+
@symbol = symbol
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Creates a new production for +symbol+ with an expansion of +expansion+. The options hash can include
|
|
16
|
+
# a directive <tt>:prec</tt>, the value of which is a grammar symbol name. The precedence of the production is then
|
|
17
|
+
# set to the precedence of the grammar symbol corresponding to that name.
|
|
18
|
+
#
|
|
19
|
+
# See the arithmetic precedence grammar in the test suites for an example.
|
|
20
|
+
def method_missing(production_name, expansion, options = {}, &blk)
|
|
21
|
+
expansion_symbols = (expansion.respond_to?('lines') ? expansion.lines : expansion).collect {|name| @grammar.symbols[name]}
|
|
22
|
+
production_args = [@symbol, expansion_symbols, production_name.to_s, blk, @grammar.production_index]
|
|
23
|
+
if precedence_symbol_name = options[:prec]
|
|
24
|
+
production_args << @grammar.symbol_for_name(precedence_symbol_name).precedence
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
production = Production.new(*production_args)
|
|
28
|
+
@grammar.production_index += 1
|
|
29
|
+
|
|
30
|
+
@symbol.nullable = true if expansion_symbols.empty?
|
|
31
|
+
@grammar.productions_by_symbol[production.symbol] << production
|
|
32
|
+
raise "Duplicate production named #{production.name}" if @grammar.productions_by_name[production.name]
|
|
33
|
+
@grammar.productions_by_name[production.name] = production
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# The precedence builder defines three methods, +left+, +right+ and +nonassoc+. These accept arrays of grammar
|
|
38
|
+
# symbols all of which have the same precedence level and associativity.
|
|
39
|
+
#
|
|
40
|
+
# See the arithmetic precedence grammar in the test suites for an example of how this works.
|
|
41
|
+
class PrecedenceBuilder
|
|
42
|
+
def initialize(grammar) #:nodoc:
|
|
43
|
+
@grammar = grammar
|
|
44
|
+
@precedence_level = 0
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
[:left, :right, :nonassoc].each do |associativity|
|
|
48
|
+
define_method(associativity) do |symbols|
|
|
49
|
+
assign_precedences associativity, symbols
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
def assign_precedences(associativity, symbol_names)
|
|
55
|
+
symbol_names.each do |symbol_name|
|
|
56
|
+
symbol = @grammar.symbols[symbol_name]
|
|
57
|
+
symbol.precedence = Precedence.new(@precedence_level, associativity)
|
|
58
|
+
end
|
|
59
|
+
@precedence_level += 1
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Abstract base class for grammar specifications.
|
|
64
|
+
#
|
|
65
|
+
# The following is a grammar specification for simple arithmetic. Precedences are specified as in Yacc -
|
|
66
|
+
# in ascending order of binding strength, with equal-strength symbols on the same level.
|
|
67
|
+
# Production rules are specified for each symbol by specifying the name of the production (used when
|
|
68
|
+
# encoding the Evaluator) and the expansion for that particular production. For example, the production named
|
|
69
|
+
# +addition+ expands the symbol <tt>'E'</tt> to the list of symbols <tt>['E', '+', 'E']</tt>.
|
|
70
|
+
#
|
|
71
|
+
# class ArithmeticPrecedenceGrammar < Dhaka::Grammar
|
|
72
|
+
# precedences do
|
|
73
|
+
# left ['+', '-']
|
|
74
|
+
# left ['*', '/']
|
|
75
|
+
# nonassoc ['^']
|
|
76
|
+
# end
|
|
77
|
+
#
|
|
78
|
+
# for_symbol(Dhaka::START_SYMBOL_NAME) do
|
|
79
|
+
# expression ['E']
|
|
80
|
+
# end
|
|
81
|
+
#
|
|
82
|
+
# for_symbol('E') do
|
|
83
|
+
# addition ['E', '+', 'E']
|
|
84
|
+
# subtraction ['E', '-', 'E']
|
|
85
|
+
# multiplication ['E', '*', 'E']
|
|
86
|
+
# division ['E', '/', 'E']
|
|
87
|
+
# power ['E', '^', 'E']
|
|
88
|
+
# literal ['n']
|
|
89
|
+
# parenthetized_expression ['(', 'E', ')']
|
|
90
|
+
# negated_expression ['-', 'E'], :prec => '*'
|
|
91
|
+
# end
|
|
92
|
+
# end
|
|
93
|
+
#
|
|
94
|
+
# In the above grammar, the symbols <tt>+</tt> and <tt>-</tt> are declared as being +left+-associative, meaning that
|
|
95
|
+
# 1 + 2 + 3 is parsed as (1 + 2) + 3 as opposed to 1 + (2 + 3) (+right+-associativity). The symbol <tt>^</tt> is declared
|
|
96
|
+
# +nonassoc+ which means that expressions such as 2 ^ 3 ^ 4 are not allowed (non-associative). <tt>+</tt> and <tt>-</tt> are listed
|
|
97
|
+
# before <tt>^</tt> which means that they bind lower, and an expression such as 2 + 3 ^ 5 will be always be parsed as
|
|
98
|
+
# 2 + (3 ^ 5) and not (2 + 3) ^ 5.
|
|
99
|
+
class Grammar
|
|
100
|
+
class << self
|
|
101
|
+
# Used for defining the Production-s for the symbol with name +symbol+. The block +blk+ is
|
|
102
|
+
# evaluated in the context of a ProductionBuilder.
|
|
103
|
+
def for_symbol symbol, &blk
|
|
104
|
+
symbol = symbols[symbol]
|
|
105
|
+
symbol.non_terminal = true
|
|
106
|
+
ProductionBuilder.new(self, symbol).instance_eval(&blk)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Used for defining the precedences and associativities of symbols. The block +blk+ is
|
|
110
|
+
# evaluated in the context of a PrecedenceBuilder.
|
|
111
|
+
def precedences &blk
|
|
112
|
+
PrecedenceBuilder.new(self).instance_eval(&blk)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Returns the grammar symbol identified by +name+
|
|
116
|
+
def symbol_for_name(name)
|
|
117
|
+
if symbols.has_key? name
|
|
118
|
+
symbols[name]
|
|
119
|
+
else
|
|
120
|
+
raise "No symbol with name #{name} found"
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Returns a list of all the Production-s in this grammar.
|
|
125
|
+
def productions
|
|
126
|
+
productions_by_name.values
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def productions_for_symbol(symbol) #:nodoc:
|
|
130
|
+
productions_by_symbol[symbol]
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def closure(kernel) #:nodoc:
|
|
134
|
+
channels = Hash.new {|hash, start_item| hash[start_item] = Set.new}
|
|
135
|
+
result = compute_closure(kernel) do |hash, item|
|
|
136
|
+
if item.next_symbol and item.next_symbol.non_terminal
|
|
137
|
+
productions_by_symbol[item.next_symbol].each do |production|
|
|
138
|
+
new_channel = spontaneous_channel(item, hash[Item.new(production, 0)])
|
|
139
|
+
channels[item] << new_channel
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
[result, channels]
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def passive_channel(start_item, end_item) #:nodoc:
|
|
147
|
+
PassiveChannel.new(self, start_item, end_item)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def first(given_symbol) #:nodoc:
|
|
151
|
+
cached_result = __first_cache[given_symbol]
|
|
152
|
+
return cached_result if cached_result
|
|
153
|
+
result = compute_closure([given_symbol]) do |hash, symbol|
|
|
154
|
+
productions_by_symbol[symbol].each do |production|
|
|
155
|
+
symbol_index = 0
|
|
156
|
+
while next_symbol = production.expansion[symbol_index]
|
|
157
|
+
hash[next_symbol]
|
|
158
|
+
break unless next_symbol.nullable
|
|
159
|
+
symbol_index += 1
|
|
160
|
+
end
|
|
161
|
+
end if symbol.non_terminal
|
|
162
|
+
end.values.select {|symbol| symbol.terminal}.to_set
|
|
163
|
+
__first_cache[given_symbol] = result
|
|
164
|
+
result
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Returns the Production identified by +name+.
|
|
168
|
+
def production_named(name)
|
|
169
|
+
productions_by_name[name]
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Returns the set of terminal symbols in the grammar.
|
|
173
|
+
def terminal_symbols
|
|
174
|
+
symbols.values.select {|symbol| symbol.terminal}
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Returns the set of non-terminal symbols in the grammar.
|
|
178
|
+
def non_terminal_symbols
|
|
179
|
+
symbols.values.select {|symbol| symbol.non_terminal}
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Export the grammar to a BNF-like format
|
|
183
|
+
def to_bnf
|
|
184
|
+
result = []
|
|
185
|
+
last_symbol = nil
|
|
186
|
+
productions.sort.each do |production|
|
|
187
|
+
if production.symbol != last_symbol
|
|
188
|
+
result << ""
|
|
189
|
+
result << "#{production.symbol.name.inspect} :"
|
|
190
|
+
last_symbol = production.symbol
|
|
191
|
+
end
|
|
192
|
+
result << " | #{production.expansion.collect{|symbol| symbol.name.inspect}.join(' ')}"
|
|
193
|
+
end
|
|
194
|
+
result.join("\n")
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
private
|
|
198
|
+
|
|
199
|
+
def inherited(grammar)
|
|
200
|
+
class << grammar
|
|
201
|
+
attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache, :production_index
|
|
202
|
+
end
|
|
203
|
+
grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
|
|
204
|
+
grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
|
|
205
|
+
grammar.productions_by_name = {}
|
|
206
|
+
grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
|
|
207
|
+
grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
|
|
208
|
+
grammar.__first_cache = {}
|
|
209
|
+
grammar.production_index = 0
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def spontaneous_channel(start_item, end_item)
|
|
213
|
+
SpontaneousChannel.new(self, start_item, end_item)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def compute_closure(initial)
|
|
217
|
+
closure_hash = ClosureHash.new do |hash, item|
|
|
218
|
+
hash.dirty = true
|
|
219
|
+
hash[item] = item
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
closure_hash.load_set(initial)
|
|
223
|
+
|
|
224
|
+
loop do
|
|
225
|
+
closure_hash.keys.each do |element|
|
|
226
|
+
yield closure_hash, element
|
|
227
|
+
end
|
|
228
|
+
break unless closure_hash.dirty
|
|
229
|
+
closure_hash.dirty = false
|
|
230
|
+
end
|
|
231
|
+
closure_hash
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
end
|