dhaka 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +64 -0
- data/lib/dhaka.rb +12 -0
- data/lib/dot/dot.rb +29 -0
- data/lib/evaluator/evaluator.rb +35 -26
- data/lib/grammar/grammar.rb +42 -17
- data/lib/grammar/grammar_symbol.rb +4 -3
- data/lib/grammar/production.rb +9 -3
- data/lib/lexer/compiled_lexer.rb +46 -0
- data/lib/lexer/dfa.rb +71 -0
- data/lib/lexer/lexeme.rb +33 -0
- data/lib/lexer/lexer.rb +61 -0
- data/lib/lexer/lexer_run.rb +66 -0
- data/lib/lexer/regex_grammar.rb +368 -0
- data/lib/lexer/regex_parser.rb +1888 -0
- data/lib/lexer/regex_tokenizer.rb +14 -0
- data/lib/lexer/specification.rb +69 -0
- data/lib/lexer/state.rb +45 -0
- data/lib/lexer/state_machine.rb +37 -0
- data/lib/parser/action.rb +3 -3
- data/lib/parser/compiled_parser.rb +11 -3
- data/lib/parser/parse_result.rb +3 -5
- data/lib/parser/parse_tree.rb +6 -17
- data/lib/parser/parser.rb +15 -14
- data/lib/parser/parser_run.rb +4 -2
- data/lib/parser/parser_state.rb +16 -8
- data/lib/tokenizer/tokenizer.rb +5 -3
- data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
- data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +4 -2
- data/test/chittagong/chittagong_driver.rb +12 -13
- data/test/chittagong/chittagong_driver_test.rb +18 -11
- data/test/chittagong/chittagong_evaluator.rb +7 -16
- data/test/chittagong/chittagong_evaluator_test.rb +7 -4
- data/test/chittagong/chittagong_grammar.rb +0 -6
- data/test/chittagong/chittagong_lexer.rb +109 -0
- data/test/chittagong/chittagong_lexer_specification.rb +39 -0
- data/test/chittagong/{chittagong_tokenizer_test.rb → chittagong_lexer_test.rb} +12 -6
- data/test/chittagong/chittagong_parser.rb +879 -0
- data/test/chittagong/chittagong_parser_test.rb +8 -10
- data/test/chittagong/chittagong_test.rb +17 -13
- data/test/compiled_parser_test.rb +7 -2
- data/test/evaluator_test.rb +0 -1
- data/test/grammar_test.rb +19 -1
- data/test/lexer_test.rb +215 -0
- data/test/parse_result_test.rb +8 -8
- data/test/parser_state_test.rb +0 -12
- metadata +21 -5
- data/test/arithmetic_precedence/arithmetic_precedence_tokenizer.rb +0 -39
- data/test/chittagong/chittagong_tokenizer.rb +0 -88
data/lib/lexer/dfa.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
module Dhaka
|
2
|
+
module LexerSupport
|
3
|
+
|
4
|
+
# Raised when an invalid regular expression pattern is encountered
|
5
|
+
# in a LexerSpecification
|
6
|
+
class InvalidRegexException < StandardError
|
7
|
+
end
|
8
|
+
|
9
|
+
class DFA < StateMachine #:nodoc:
|
10
|
+
def initialize(regex)
|
11
|
+
@regex = regex
|
12
|
+
|
13
|
+
tokenize_result = RegexTokenizer.tokenize(@regex)
|
14
|
+
raise InvalidRegexException.new(tokenize_error_message(tokenize_result)) if tokenize_result.has_error?
|
15
|
+
|
16
|
+
parse_result = RegexParser.parse(tokenize_result)
|
17
|
+
raise InvalidRegexException.new(parse_error_message(parse_result)) if parse_result.has_error?
|
18
|
+
|
19
|
+
ast = parse_result
|
20
|
+
ast.calculate_follow_sets
|
21
|
+
|
22
|
+
super(ItemSet.new(ast.first))
|
23
|
+
end
|
24
|
+
|
25
|
+
def tokenize_error_message(tokenize_result)
|
26
|
+
"Invalid character #{@regex[tokenize_result.unexpected_char_index].chr}: #{@regex.dup.insert(tokenize_result.unexpected_char_index, '>>>')}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse_error_message(parse_result)
|
30
|
+
unexpected_token = parse_result.unexpected_token
|
31
|
+
if unexpected_token.symbol_name == END_SYMBOL_NAME
|
32
|
+
"Unexpected end of regex."
|
33
|
+
else
|
34
|
+
"Unexpected token #{parse_result.unexpected_token.symbol_name}: #{@regex.dup.insert(parse_result.unexpected_token.input_position, '>>>')}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def dest_key_for key, char
|
39
|
+
result = ItemSet.new
|
40
|
+
key.each do |position|
|
41
|
+
result.merge(position.follow_set) if position.character == char
|
42
|
+
end
|
43
|
+
result
|
44
|
+
end
|
45
|
+
|
46
|
+
def new_state_for_key key
|
47
|
+
accepting = key.detect {|position| position.accepting}
|
48
|
+
State.new(self, accepting && @regex)
|
49
|
+
end
|
50
|
+
|
51
|
+
def transition_characters key
|
52
|
+
result = Set.new
|
53
|
+
key.each do |node|
|
54
|
+
result << node.character unless node.accepting
|
55
|
+
end
|
56
|
+
result
|
57
|
+
end
|
58
|
+
|
59
|
+
def matches(string)
|
60
|
+
curr_state = @start_state
|
61
|
+
string.unpack("C*").each do |i|
|
62
|
+
dest_state = curr_state.transitions[i.chr]
|
63
|
+
return false unless dest_state
|
64
|
+
curr_state = dest_state
|
65
|
+
end
|
66
|
+
return curr_state.accepting?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
data/lib/lexer/lexeme.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# Represents a portion of the input string that has been recognized as matching a given lexer pattern.
|
3
|
+
class Lexeme
|
4
|
+
# The pattern matched by this lexeme.
|
5
|
+
attr_accessor :pattern
|
6
|
+
|
7
|
+
# +input_position+ is the index in the input stream that this lexeme starts at.
|
8
|
+
attr_reader :input_position
|
9
|
+
attr_reader :characters
|
10
|
+
|
11
|
+
def initialize(input_position) #:nodoc:
|
12
|
+
@input_position = input_position
|
13
|
+
@characters = []
|
14
|
+
end
|
15
|
+
|
16
|
+
# The substring of the input stream that this lexeme is comprised of.
|
17
|
+
def value
|
18
|
+
characters.join
|
19
|
+
end
|
20
|
+
|
21
|
+
def accepted? #:nodoc:
|
22
|
+
pattern
|
23
|
+
end
|
24
|
+
|
25
|
+
def << char #:nodoc:
|
26
|
+
@characters << char
|
27
|
+
end
|
28
|
+
|
29
|
+
def concat chars #:nodoc:
|
30
|
+
@characters.concat chars
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/lexer/lexer.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# The lexer generator. To generate a lexer from a lexer specification +MyLexerSpecification+:
|
3
|
+
# lexer = Dhaka::Lexer.new(MyLexerSpecification)
|
4
|
+
#
|
5
|
+
# To compile this lexer as +MyLexer+ to a string of Ruby source:
|
6
|
+
# lexer.compile_to_ruby_source_as(:MyLexer)
|
7
|
+
class Lexer < LexerSupport::StateMachine
|
8
|
+
attr_reader :specification
|
9
|
+
|
10
|
+
# Creates a new lexer from a given specification.
|
11
|
+
def initialize(specification)
|
12
|
+
dfas = {}
|
13
|
+
@specification = specification
|
14
|
+
specification.items.each do |pattern, item|
|
15
|
+
dfas[pattern] = LexerSupport::DFA.new(pattern)
|
16
|
+
end
|
17
|
+
super(ItemSet.new(dfas.values.collect{|dfa| dfa.start_state}))
|
18
|
+
end
|
19
|
+
|
20
|
+
# Compiles the lexer to Ruby code that when executed, reloads all the states and actions of the lexer
|
21
|
+
# into a class named +lexer_class_name+.
|
22
|
+
def compile_to_ruby_source_as lexer_class_name
|
23
|
+
result = "class #{lexer_class_name} < Dhaka::CompiledLexer\n\n"
|
24
|
+
result << " self.specification = #{specification.name}\n\n"
|
25
|
+
result << " start_with #{start_state.object_id}\n\n"
|
26
|
+
@states.each do |key, state|
|
27
|
+
result << "#{state.compile_to_ruby_source}\n\n"
|
28
|
+
end
|
29
|
+
result << "end"
|
30
|
+
result
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns a LexerRun that tokenizes +input+.
|
34
|
+
def lex input
|
35
|
+
LexerRun.new(self, input)
|
36
|
+
end
|
37
|
+
|
38
|
+
def action_for_pattern pattern #:nodoc
|
39
|
+
@specification.items[pattern].action
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def new_state_for_key key
|
44
|
+
item = key.select {|state| state.accepting?}.collect {|state| @specification.items[state.pattern]}.min
|
45
|
+
LexerSupport::State.new(self, item && item.pattern)
|
46
|
+
end
|
47
|
+
|
48
|
+
def transition_characters states
|
49
|
+
states.collect{|state| state.transitions.keys}.flatten.uniq
|
50
|
+
end
|
51
|
+
|
52
|
+
def dest_key_for states, char
|
53
|
+
result = ItemSet.new
|
54
|
+
states.each do |state|
|
55
|
+
dest_state = state.transitions[char]
|
56
|
+
result << dest_state if dest_state
|
57
|
+
end
|
58
|
+
result
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# Represents a run of a lexer on a given input string.
|
3
|
+
class LexerRun
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
attr_reader :current_lexeme
|
7
|
+
def initialize lexer, input
|
8
|
+
@lexer, @input = lexer, input
|
9
|
+
@input_position = 0
|
10
|
+
@not_yet_accepted_chars = []
|
11
|
+
end
|
12
|
+
|
13
|
+
# Constructs a token of type +symbol_name+ from the +current_lexeme+.
|
14
|
+
def create_token(symbol_name)
|
15
|
+
Token.new(symbol_name, @current_lexeme.characters.join, @current_lexeme.input_position)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Yields each token as it is recognized. Returns a TokenizerErrorResult if an error occurs during tokenization.
|
19
|
+
def each
|
20
|
+
reset_and_rewind
|
21
|
+
loop do
|
22
|
+
c = curr_char
|
23
|
+
break if (c == "\0" && @not_yet_accepted_chars.empty? && !@current_lexeme.accepted?)
|
24
|
+
dest_state = @curr_state.transitions[c]
|
25
|
+
unless dest_state
|
26
|
+
return TokenizerErrorResult.new(@input_position) unless @current_lexeme.accepted?
|
27
|
+
token = get_token
|
28
|
+
yield token if token
|
29
|
+
reset_and_rewind
|
30
|
+
else
|
31
|
+
@curr_state = dest_state
|
32
|
+
if @curr_state.accepting?
|
33
|
+
@current_lexeme.pattern = @curr_state.pattern
|
34
|
+
@current_lexeme.concat @not_yet_accepted_chars
|
35
|
+
@not_yet_accepted_chars = []
|
36
|
+
@current_lexeme << c
|
37
|
+
else
|
38
|
+
@not_yet_accepted_chars << c
|
39
|
+
end
|
40
|
+
advance
|
41
|
+
end
|
42
|
+
end
|
43
|
+
yield Token.new(END_SYMBOL_NAME, nil, nil)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def reset_and_rewind
|
48
|
+
@input_position -= @not_yet_accepted_chars.size
|
49
|
+
@current_lexeme = Lexeme.new(@input_position)
|
50
|
+
@curr_state = @lexer.start_state
|
51
|
+
@not_yet_accepted_chars = []
|
52
|
+
end
|
53
|
+
|
54
|
+
def curr_char
|
55
|
+
(@input[@input_position] || 0).chr
|
56
|
+
end
|
57
|
+
|
58
|
+
def advance
|
59
|
+
@input_position += 1
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_token
|
63
|
+
instance_eval(&@lexer.action_for_pattern(@current_lexeme.pattern))
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,368 @@
|
|
1
|
+
module Dhaka
|
2
|
+
module LexerSupport #:nodoc:all
|
3
|
+
DIGITS = ('0'..'9').to_a
|
4
|
+
LOWERCASE_LETTERS = ('a'..'z').to_a
|
5
|
+
UPPERCASE_LETTERS = ('A'..'Z').to_a
|
6
|
+
LETTERS = LOWERCASE_LETTERS + UPPERCASE_LETTERS
|
7
|
+
WHITESPACE = [" ", "\n", "\t"]
|
8
|
+
SYMBOLS = %w| ~ ` ! @ # % & _ = : ; " ' < , > - |
|
9
|
+
CLASSES = {'d' => DIGITS, 'w' => LETTERS, 's' => WHITESPACE}
|
10
|
+
|
11
|
+
OPERATOR_CHARACTERS = {'(' => 'open_parenth', ')' => 'close_parenth', '[' => 'open_square_bracket',
|
12
|
+
']' => 'close_square_bracket', '+' => 'plus', '*' => 'asterisk',
|
13
|
+
'?' => 'question_mark', '.' => 'period', '\\' => 'back_slash',
|
14
|
+
'|' => 'pipe', '{' => 'left_curly_brace', '}' => 'right_curly_brace',
|
15
|
+
'/' => 'forward_slash', '^' => 'caret', '$' => 'dollar'}
|
16
|
+
|
17
|
+
SET_OPERATOR_CHARACTERS = %w| - ^ [ ] \\ |
|
18
|
+
|
19
|
+
ALL_CHARACTERS = DIGITS + LETTERS + SYMBOLS + WHITESPACE + OPERATOR_CHARACTERS.keys
|
20
|
+
|
21
|
+
|
22
|
+
class RegexGrammar < Dhaka::Grammar
|
23
|
+
|
24
|
+
for_symbol(Dhaka::START_SYMBOL_NAME) do
|
25
|
+
regex %w| Disjunction | do RootNode.new(child_nodes[0]) end
|
26
|
+
end
|
27
|
+
|
28
|
+
for_symbol('Disjunction') do
|
29
|
+
disjunction %w| Alternative \| Disjunction | do OrNode.new(child_nodes[0], child_nodes[2]) end
|
30
|
+
alternative %w| Alternative | do child_nodes[0] end
|
31
|
+
end
|
32
|
+
|
33
|
+
for_symbol('Alternative') do
|
34
|
+
concatenation %w| Alternative Term | do CatNode.new(child_nodes[0], child_nodes[1]) end
|
35
|
+
term %w| Term | do child_nodes[0] end
|
36
|
+
end
|
37
|
+
|
38
|
+
for_symbol('Term') do
|
39
|
+
zero_or_more %w| Atom * | do ZeroOrMoreNode.new(child_nodes[0]) end
|
40
|
+
one_or_more %w| Atom + | do OneOrMoreNode.new(child_nodes[0]) end
|
41
|
+
zero_or_one %w| Atom ? | do ZeroOrOneNode.new(child_nodes[0]) end
|
42
|
+
atom %w| Atom | do child_nodes[0] end
|
43
|
+
end
|
44
|
+
|
45
|
+
for_symbol('Atom') do
|
46
|
+
group %w| ( Disjunction ) | do child_nodes[1] end
|
47
|
+
char %w| Character | do LeafNode.new(child_nodes[0]) end
|
48
|
+
anything %w| . | do OrNode.new(*(ALL_CHARACTERS - ["\n"]).collect {|char| LeafNode.new(char)}) end
|
49
|
+
positive_set %w| [ SetContents ] | do OrNode.new(*child_nodes[1].collect{|char| LeafNode.new(char)}) end
|
50
|
+
negative_set %w| [ ^ SetContents ] | do OrNode.new(*(ALL_CHARACTERS - child_nodes[2]).collect {|char| LeafNode.new(char)}) end
|
51
|
+
|
52
|
+
CLASSES.each do |char, expansion|
|
53
|
+
send("character_class_#{char}", ['\\', char]) do
|
54
|
+
OrNode.new(*CLASSES[char].collect {|c| LeafNode.new(c)})
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
OPERATOR_CHARACTERS.each do |char, method_name|
|
59
|
+
send(method_name, ['\\', char]) do
|
60
|
+
LeafNode.new(char)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
for_symbol('Character') do
|
66
|
+
letter_character %w| Letter | do child_nodes[0] end
|
67
|
+
digit_character %w| Digit | do child_nodes[0] end
|
68
|
+
white_space_character %w| Whitespace | do child_nodes[0] end
|
69
|
+
symbol_character %w| Symbol | do child_nodes[0] end
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
for_symbol('SetContents') do
|
74
|
+
single_item %w| SetItem | do child_nodes[0] end
|
75
|
+
multiple_items %w| SetContents SetItem | do child_nodes[0].concat child_nodes[1] end
|
76
|
+
end
|
77
|
+
|
78
|
+
for_symbol('SetItem') do
|
79
|
+
single_char_item %w| SetCharacter | do [child_nodes[0]] end
|
80
|
+
lower_case_letter_range %w| LowercaseLetter - LowercaseLetter | do (child_nodes[0]..child_nodes[2]).to_a end
|
81
|
+
upper_case_letter_range %w| UppercaseLetter - UppercaseLetter | do (child_nodes[0]..child_nodes[2]).to_a end
|
82
|
+
digit_range %w| Digit - Digit | do (child_nodes[0]..child_nodes[2]).to_a end
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
for_symbol('Letter') do
|
88
|
+
lower_case_letter %w| LowercaseLetter | do child_nodes[0] end
|
89
|
+
upper_case_letter %w| UppercaseLetter | do child_nodes[0] end
|
90
|
+
end
|
91
|
+
|
92
|
+
for_symbol('LowercaseLetter') do
|
93
|
+
LOWERCASE_LETTERS.each do |letter|
|
94
|
+
send("lower_char_letter_#{letter}", letter) do
|
95
|
+
letter
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
for_symbol('UppercaseLetter') do
|
101
|
+
UPPERCASE_LETTERS.each do |letter|
|
102
|
+
send("upper_case_letter_#{letter}", letter) do
|
103
|
+
letter
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
for_symbol('Digit') do
|
109
|
+
DIGITS.each do |digit|
|
110
|
+
send("digit_#{digit}", digit) do
|
111
|
+
digit
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
for_symbol('Whitespace') do
|
117
|
+
WHITESPACE.each do |whitespace_char|
|
118
|
+
send("whitespace_#{whitespace_char[0]}", whitespace_char) do
|
119
|
+
whitespace_char
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
for_symbol('Symbol') do
|
125
|
+
SYMBOLS.each do |symbol_char|
|
126
|
+
send("symbol_char_#{symbol_char[0]}", symbol_char) do
|
127
|
+
symbol_char
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
for_symbol('SetCharacter') do
|
133
|
+
(ALL_CHARACTERS - SET_OPERATOR_CHARACTERS).each do |char|
|
134
|
+
send("set_character_#{char[0]}", char) do
|
135
|
+
char
|
136
|
+
end
|
137
|
+
end
|
138
|
+
SET_OPERATOR_CHARACTERS.each do |char|
|
139
|
+
send("set_operator_character_#{char[0]}", ['\\', char]) do
|
140
|
+
char
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
class ASTNode
|
148
|
+
def accepting
|
149
|
+
false
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
class BinaryNode < ASTNode
|
154
|
+
attr_reader :left, :right
|
155
|
+
def initialize left, right
|
156
|
+
@left, @right = left, right
|
157
|
+
end
|
158
|
+
|
159
|
+
def to_dot(graph)
|
160
|
+
graph.node(self, :label => label)
|
161
|
+
graph.edge(self, left)
|
162
|
+
graph.edge(self, right)
|
163
|
+
left.to_dot(graph)
|
164
|
+
right.to_dot(graph)
|
165
|
+
end
|
166
|
+
|
167
|
+
def calculate_follow_sets
|
168
|
+
left.calculate_follow_sets
|
169
|
+
right.calculate_follow_sets
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
class OrNode < ASTNode
|
174
|
+
attr_reader :children
|
175
|
+
def initialize(*children)
|
176
|
+
@children = children
|
177
|
+
end
|
178
|
+
def label
|
179
|
+
"|"
|
180
|
+
end
|
181
|
+
|
182
|
+
def nullable
|
183
|
+
children.any? {|child| child.nullable}
|
184
|
+
end
|
185
|
+
|
186
|
+
def first
|
187
|
+
children.inject(Set.new([])) do |result, child|
|
188
|
+
result | child.first
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def last
|
193
|
+
children.inject(Set.new([])) do |result, child|
|
194
|
+
result | child.last
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def to_dot(graph)
|
199
|
+
graph.node(self, :label => label)
|
200
|
+
children.each do |child|
|
201
|
+
graph.edge(self, child)
|
202
|
+
child.to_dot(graph)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def calculate_follow_sets
|
207
|
+
children.each do |child|
|
208
|
+
child.calculate_follow_sets
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
class CatNode < BinaryNode
|
214
|
+
def label
|
215
|
+
"cat"
|
216
|
+
end
|
217
|
+
|
218
|
+
def nullable
|
219
|
+
left.nullable && right.nullable
|
220
|
+
end
|
221
|
+
|
222
|
+
def first
|
223
|
+
left.nullable ? (left.first | right.first) : left.first
|
224
|
+
end
|
225
|
+
|
226
|
+
def last
|
227
|
+
right.nullable ? (left.last | right.last) : right.last
|
228
|
+
end
|
229
|
+
|
230
|
+
def calculate_follow_sets
|
231
|
+
super
|
232
|
+
left.last.each do |leaf_node|
|
233
|
+
leaf_node.follow_set.merge right.first
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
class UnaryNode < ASTNode
|
239
|
+
attr_reader :child
|
240
|
+
def initialize child
|
241
|
+
@child = child
|
242
|
+
end
|
243
|
+
|
244
|
+
def to_dot(graph)
|
245
|
+
graph.node(self, :label => label)
|
246
|
+
graph.edge(self, child)
|
247
|
+
child.to_dot(graph)
|
248
|
+
end
|
249
|
+
|
250
|
+
def nullable
|
251
|
+
child.nullable
|
252
|
+
end
|
253
|
+
|
254
|
+
def first
|
255
|
+
child.first
|
256
|
+
end
|
257
|
+
|
258
|
+
def last
|
259
|
+
child.last
|
260
|
+
end
|
261
|
+
|
262
|
+
def calculate_follow_sets
|
263
|
+
child.calculate_follow_sets
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
class RootNode < CatNode
|
268
|
+
def initialize(left)
|
269
|
+
super(left, AcceptingNode.new())
|
270
|
+
end
|
271
|
+
|
272
|
+
def label
|
273
|
+
"start"
|
274
|
+
end
|
275
|
+
|
276
|
+
def head_node?
|
277
|
+
true
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
class ZeroOrMoreNode < UnaryNode
|
282
|
+
def label
|
283
|
+
"*"
|
284
|
+
end
|
285
|
+
|
286
|
+
def nullable
|
287
|
+
true
|
288
|
+
end
|
289
|
+
|
290
|
+
def calculate_follow_sets
|
291
|
+
super
|
292
|
+
last.each do |leaf_node|
|
293
|
+
leaf_node.follow_set.merge first
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
class ZeroOrOneNode < UnaryNode
|
299
|
+
def label
|
300
|
+
"?"
|
301
|
+
end
|
302
|
+
|
303
|
+
def nullable
|
304
|
+
true
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
class OneOrMoreNode < UnaryNode
|
309
|
+
def label
|
310
|
+
"+"
|
311
|
+
end
|
312
|
+
|
313
|
+
def calculate_follow_sets
|
314
|
+
super
|
315
|
+
last.each do |leaf_node|
|
316
|
+
leaf_node.follow_set.merge first
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
class LeafNode < ASTNode
|
322
|
+
attr_reader :character, :follow_set
|
323
|
+
def initialize character
|
324
|
+
@character = character
|
325
|
+
@follow_set = Set.new
|
326
|
+
end
|
327
|
+
|
328
|
+
def to_dot(graph)
|
329
|
+
graph.node(self, :label => character)
|
330
|
+
end
|
331
|
+
|
332
|
+
def nullable
|
333
|
+
false
|
334
|
+
end
|
335
|
+
|
336
|
+
def first
|
337
|
+
Set.new([self])
|
338
|
+
end
|
339
|
+
|
340
|
+
def last
|
341
|
+
Set.new([self])
|
342
|
+
end
|
343
|
+
|
344
|
+
def calculate_follow_sets
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
class AcceptingNode < ASTNode
|
349
|
+
def accepting
|
350
|
+
true
|
351
|
+
end
|
352
|
+
|
353
|
+
def character
|
354
|
+
end
|
355
|
+
|
356
|
+
def first
|
357
|
+
Set.new([self])
|
358
|
+
end
|
359
|
+
|
360
|
+
def calculate_follow_sets
|
361
|
+
end
|
362
|
+
|
363
|
+
def to_dot(graph)
|
364
|
+
graph.node(self, :label => '#')
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
end
|