rley 0.0.04 → 0.0.05
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/chart.rb +2 -4
- data/lib/rley/parser/dotted_item.rb +2 -4
- data/lib/rley/parser/earley_parser.rb +13 -16
- data/lib/rley/parser/parse_state.rb +2 -5
- data/lib/rley/parser/parsing.rb +17 -20
- data/lib/rley/parser/state_set.rb +1 -5
- data/lib/rley/parser/token.rb +0 -4
- data/lib/rley/syntax/grammar.rb +1 -4
- data/lib/rley/syntax/grm_symbol.rb +0 -2
- data/lib/rley/syntax/literal.rb +0 -2
- data/lib/rley/syntax/non_terminal.rb +0 -4
- data/lib/rley/syntax/production.rb +3 -7
- data/lib/rley/syntax/symbol_seq.rb +7 -8
- data/lib/rley/syntax/verbatim_symbol.rb +0 -2
- data/spec/rley/parser/chart_spec.rb +24 -26
- data/spec/rley/parser/dotted_item_spec.rb +83 -88
- data/spec/rley/parser/earley_parser_spec.rb +277 -241
- data/spec/rley/parser/parse_state_spec.rb +66 -66
- data/spec/rley/parser/parsing_spec.rb +89 -90
- data/spec/rley/parser/state_set_spec.rb +54 -56
- data/spec/rley/parser/token_spec.rb +18 -20
- data/spec/rley/syntax/grammar_spec.rb +118 -120
- data/spec/rley/syntax/grm_symbol_spec.rb +12 -15
- data/spec/rley/syntax/literal_spec.rb +16 -18
- data/spec/rley/syntax/non_terminal_spec.rb +12 -15
- data/spec/rley/syntax/production_spec.rb +33 -35
- data/spec/rley/syntax/symbol_seq_spec.rb +51 -52
- data/spec/rley/syntax/terminal_spec.rb +12 -15
- data/spec/rley/syntax/verbatim_symbol_spec.rb +16 -18
- metadata +2 -2
@@ -2,7 +2,6 @@ require_relative 'terminal' # Load superclass
|
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
4
|
module Syntax # This module is used as a namespace
|
5
|
-
|
6
5
|
# A verbatim word is terminal symbol that represents one unique word
|
7
6
|
# in the language defined the grammar.
|
8
7
|
class VerbatimSymbol < Terminal
|
@@ -14,7 +13,6 @@ module Rley # This module is used as a namespace
|
|
14
13
|
@text = aText.dup
|
15
14
|
end
|
16
15
|
end # class
|
17
|
-
|
18
16
|
end # module
|
19
17
|
end # module
|
20
18
|
|
@@ -6,42 +6,40 @@ require_relative '../../../lib/rley/parser/chart'
|
|
6
6
|
|
7
7
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
8
8
|
module Parser # Open this namespace to avoid module qualifier prefixes
|
9
|
+
describe Chart do
|
9
10
|
|
10
|
-
|
11
|
+
let(:count_token) { 20 }
|
12
|
+
let(:dotted_rule) { double('fake-dotted-item') }
|
11
13
|
|
12
|
-
|
13
|
-
let(:dotted_rule) { double('fake-dotted-item') }
|
14
|
+
context 'Initialization:' do
|
14
15
|
|
15
|
-
|
16
|
+
# Default instantiation rule
|
17
|
+
subject { Chart.new(dotted_rule, count_token) }
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
+
it 'should be created with a start dotted rule and a token count' do
|
20
|
+
expect { Chart.new(dotted_rule, count_token) }.not_to raise_error
|
21
|
+
end
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
it 'should have a seed state in first state_set' do
|
24
|
+
seed_state = ParseState.new(dotted_rule, 0)
|
25
|
+
expect(subject[0].states).to eq([seed_state])
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
# Shorthand syntax
|
28
|
+
expect(subject[0].first).to eq(seed_state)
|
29
|
+
end
|
27
30
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
it 'should have the correct state_set count' do
|
32
|
+
expect(subject.state_sets.size).to eq(count_token + 1)
|
33
|
+
end
|
31
34
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
+
it 'should the start dotted rule' do
|
36
|
+
expect(subject.start_dotted_rule).to eq(dotted_rule)
|
37
|
+
end
|
35
38
|
|
36
|
-
|
37
|
-
expect(subject.start_dotted_rule).to eq(dotted_rule)
|
38
|
-
end
|
39
|
-
|
40
|
-
end # context
|
41
|
-
|
42
|
-
end # describe
|
39
|
+
end # context
|
43
40
|
|
41
|
+
end # describe
|
44
42
|
end # module
|
45
43
|
end # module
|
46
44
|
|
47
|
-
# End of file
|
45
|
+
# End of file
|
@@ -9,100 +9,95 @@ require_relative '../../../lib/rley/parser/dotted_item'
|
|
9
9
|
|
10
10
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
11
11
|
module Parser # Open this namespace to avoid module qualifier prefixes
|
12
|
+
describe DottedItem do
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
|
18
|
-
|
19
|
-
let(:sample_prod) do
|
20
|
-
Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
|
21
|
-
end
|
22
|
-
|
23
|
-
let(:other_prod) do
|
24
|
-
Syntax::Production.new(nt_sentence, [t_a])
|
25
|
-
end
|
26
|
-
|
27
|
-
let(:empty_prod) do
|
28
|
-
Syntax::Production.new(nt_sentence,[])
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
subject { DottedItem.new(sample_prod, 1) }
|
33
|
-
|
34
|
-
context 'Initialization:' do
|
35
|
-
it 'should be created with a production and an index' do
|
36
|
-
expect { DottedItem.new(sample_prod, 0) }.not_to raise_error
|
37
|
-
expect { DottedItem.new(sample_prod, 3) }.not_to raise_error
|
38
|
-
end
|
39
|
-
|
40
|
-
it 'should complain when the index is out-of-bounds' do
|
41
|
-
err = StandardError
|
42
|
-
msg = 'Out of bound index'
|
43
|
-
expect { DottedItem.new(sample_prod, 4) }.to raise_error(err, msg)
|
44
|
-
end
|
45
|
-
|
46
|
-
it 'should know its production' do
|
47
|
-
expect(subject.production).to eq(sample_prod)
|
48
|
-
end
|
49
|
-
|
50
|
-
it 'should know the lhs of the production' do
|
51
|
-
expect(subject.lhs).to eq(sample_prod.lhs)
|
14
|
+
# Factory method. Builds a production with given left-hand side (LHS)
|
15
|
+
# and given RHS (right-hand side)
|
16
|
+
def build_prod(theLHS, *theRHSSymbols)
|
17
|
+
return Syntax::Production.new(theLHS, theRHSSymbols)
|
52
18
|
end
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
19
|
+
|
20
|
+
let(:t_a) { Syntax::Terminal.new('A') }
|
21
|
+
let(:t_b) { Syntax::Terminal.new('B') }
|
22
|
+
let(:t_c) { Syntax::Terminal.new('C') }
|
23
|
+
let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
|
24
|
+
let(:sample_prod) { build_prod(nt_sentence, t_a, t_b, t_c) }
|
25
|
+
let(:other_prod) { build_prod(nt_sentence, t_a) }
|
26
|
+
let(:empty_prod) { build_prod(nt_sentence) }
|
27
|
+
|
28
|
+
# Default instantiation rule
|
29
|
+
subject { DottedItem.new(sample_prod, 1) }
|
30
|
+
|
31
|
+
context 'Initialization:' do
|
32
|
+
it 'should be created with a production and an index' do
|
33
|
+
expect { DottedItem.new(sample_prod, 0) }.not_to raise_error
|
34
|
+
expect { DottedItem.new(sample_prod, 3) }.not_to raise_error
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'should complain when the index is out-of-bounds' do
|
38
|
+
err = StandardError
|
39
|
+
msg = 'Out of bound index'
|
40
|
+
expect { DottedItem.new(sample_prod, 4) }.to raise_error(err, msg)
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should know its production' do
|
44
|
+
expect(subject.production).to eq(sample_prod)
|
45
|
+
end
|
77
46
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
47
|
+
it 'should know the lhs of the production' do
|
48
|
+
expect(subject.lhs).to eq(sample_prod.lhs)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'should know its position' do
|
52
|
+
# At start position
|
53
|
+
instance1 = DottedItem.new(sample_prod, 0)
|
54
|
+
expect(instance1.position).to eq(0)
|
55
|
+
|
56
|
+
# At (before) last symbol
|
57
|
+
instance2 = DottedItem.new(sample_prod, 2)
|
58
|
+
expect(instance2.position).to eq(2)
|
59
|
+
|
60
|
+
# After all symbols in rhs
|
61
|
+
instance3 = DottedItem.new(sample_prod, 3)
|
62
|
+
expect(instance3.position).to eq(-1)
|
63
|
+
|
64
|
+
# At start/end at the same time (production is empty)
|
65
|
+
instance4 = DottedItem.new(build_prod(nt_sentence), 0)
|
66
|
+
expect(instance4.position).to eq(-2)
|
67
|
+
end
|
68
|
+
|
69
|
+
end # context
|
70
|
+
|
71
|
+
context 'Provided service:' do
|
72
|
+
it 'should whether its dot is at start position' do
|
73
|
+
expect(subject).not_to be_at_start
|
74
|
+
|
75
|
+
# At start position
|
76
|
+
instance1 = DottedItem.new(sample_prod, 0)
|
77
|
+
expect(instance1).to be_at_start
|
78
|
+
|
79
|
+
# At start/end at the same time (production is empty)
|
80
|
+
instance2 = DottedItem.new(build_prod(nt_sentence), 0)
|
81
|
+
expect(instance2).to be_at_start
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'should whether it is a reduce item' do
|
85
|
+
expect(subject).not_to be_reduce_item
|
86
|
+
|
87
|
+
first_instance = DottedItem.new(sample_prod, 3)
|
88
|
+
expect(first_instance).to be_reduce_item
|
89
|
+
|
90
|
+
second_instance = DottedItem.new(empty_prod, 0)
|
91
|
+
expect(second_instance).to be_reduce_item
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should know the symbol after the dot' do
|
95
|
+
expect(subject.next_symbol).to eq(t_b)
|
96
|
+
end
|
85
97
|
end
|
86
98
|
|
87
|
-
|
88
|
-
expect(subject).not_to be_reduce_item
|
89
|
-
|
90
|
-
first_instance = DottedItem.new(sample_prod, 3)
|
91
|
-
expect(first_instance).to be_reduce_item
|
92
|
-
|
93
|
-
second_instance = DottedItem.new(empty_prod, 0)
|
94
|
-
expect(second_instance).to be_reduce_item
|
95
|
-
end
|
96
|
-
|
97
|
-
it 'should know the symbol after the dot' do
|
98
|
-
expect(subject.next_symbol).to eq(t_b)
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
end # describe
|
103
|
-
|
99
|
+
end # describe
|
104
100
|
end # module
|
105
101
|
end # module
|
106
102
|
|
107
103
|
# End of file
|
108
|
-
|
@@ -9,263 +9,299 @@ require_relative '../../../lib/rley/parser/earley_parser'
|
|
9
9
|
|
10
10
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
11
11
|
module Parser # Open this namespace to avoid module qualifier prefixes
|
12
|
-
|
13
|
-
describe EarleyParser do
|
12
|
+
describe EarleyParser do
|
14
13
|
=begin
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
end
|
22
|
-
let(:string) do
|
23
|
-
string_pattern = /"([^\\"]|\\.)*"/
|
24
|
-
Syntax::Literal('string', string_pattern)
|
25
|
-
end
|
26
|
-
let(:lbracket) { Syntax::VerbatimSymbol('[') }
|
27
|
-
let(:rbracket) { Syntax::VerbatimSymbol(']') }
|
28
|
-
let(:comma) { Syntax::VerbatimSymbol(',') }
|
29
|
-
let(:array) { Syntax::NonTerminal('Array') }
|
30
|
-
let(:object) { Syntax::NonTerminal('Object') }
|
31
|
-
|
32
|
-
let(:array_prod) do
|
33
|
-
Production.new(array, )
|
34
|
-
end
|
35
|
-
=end
|
36
|
-
|
37
|
-
# Grammar 1: A very simple language
|
38
|
-
# S ::= A.
|
39
|
-
# A ::= "a" A "c".
|
40
|
-
# A ::= "b".
|
41
|
-
let(:nt_S) { Syntax::NonTerminal.new('S') }
|
42
|
-
let(:nt_A) { Syntax::NonTerminal.new('A') }
|
43
|
-
let(:a_) { Syntax::VerbatimSymbol.new('a') }
|
44
|
-
let(:b_) { Syntax::VerbatimSymbol.new('b') }
|
45
|
-
let(:c_) { Syntax::VerbatimSymbol.new('c') }
|
46
|
-
let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
|
47
|
-
let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
|
48
|
-
let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
|
49
|
-
let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
|
50
|
-
|
51
|
-
# Helper method that mimicks the output of a tokenizer
|
52
|
-
# for the language specified by gramma_abc
|
53
|
-
def grm1_tokens()
|
54
|
-
tokens = [
|
55
|
-
Token.new('a', a_),
|
56
|
-
Token.new('a', a_),
|
57
|
-
Token.new('b', b_),
|
58
|
-
Token.new('c', c_),
|
59
|
-
Token.new('c', c_)
|
60
|
-
]
|
61
|
-
|
62
|
-
return tokens
|
63
|
-
end
|
64
|
-
|
65
|
-
|
66
|
-
# Default instantiation rule
|
67
|
-
subject { EarleyParser.new(grammar_abc) }
|
68
|
-
|
69
|
-
context 'Initialization:' do
|
70
|
-
it 'should be created with a grammar' do
|
71
|
-
expect { EarleyParser.new(grammar_abc) }.not_to raise_error
|
14
|
+
let(:kw_true) { Syntax::VerbatimSymbol.new('true') }
|
15
|
+
let(:kw_false) { Syntax::VerbatimSymbol.new('false') }
|
16
|
+
let(:kw_null) { Syntax::VerbatimSymbol.new('null') }
|
17
|
+
let(:number) do
|
18
|
+
number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
|
19
|
+
Syntax::Literal.new('number', number_pattern)
|
72
20
|
end
|
73
|
-
|
74
|
-
|
75
|
-
|
21
|
+
let(:string) do
|
22
|
+
string_pattern = /"([^\\"]|\\.)*"/
|
23
|
+
Syntax::Literal('string', string_pattern)
|
76
24
|
end
|
77
|
-
|
78
|
-
|
79
|
-
|
25
|
+
let(:lbracket) { Syntax::VerbatimSymbol.new('[') }
|
26
|
+
let(:rbracket) { Syntax::VerbatimSymbol.new(']') }
|
27
|
+
let(:comma) { Syntax::VerbatimSymbol.new(',') }
|
28
|
+
let(:array) { Syntax::NonTerminal.new('Array') }
|
29
|
+
let(:object) { Syntax::NonTerminal.new('Object') }
|
30
|
+
|
31
|
+
let(:array_prod) do
|
32
|
+
Production.new(array, )
|
80
33
|
end
|
34
|
+
=end
|
81
35
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
36
|
+
# Grammar 1: A very simple language
|
37
|
+
# S ::= A.
|
38
|
+
# A ::= "a" A "c".
|
39
|
+
# A ::= "b".
|
40
|
+
# Let's create the grammar piece by piece
|
41
|
+
let(:nt_S) { Syntax::NonTerminal.new('S') }
|
42
|
+
let(:nt_A) { Syntax::NonTerminal.new('A') }
|
43
|
+
let(:a_) { Syntax::VerbatimSymbol.new('a') }
|
44
|
+
let(:b_) { Syntax::VerbatimSymbol.new('b') }
|
45
|
+
let(:c_) { Syntax::VerbatimSymbol.new('c') }
|
46
|
+
let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
|
47
|
+
let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
|
48
|
+
let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
|
49
|
+
let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
|
50
|
+
|
51
|
+
# Helper method that mimicks the output of a tokenizer
|
52
|
+
# for the language specified by grammar_abc
|
53
|
+
def grm1_tokens()
|
54
|
+
tokens = [
|
55
|
+
Token.new('a', a_),
|
56
|
+
Token.new('a', a_),
|
57
|
+
Token.new('b', b_),
|
58
|
+
Token.new('c', c_),
|
59
|
+
Token.new('c', c_)
|
60
|
+
]
|
91
61
|
|
92
|
-
|
93
|
-
# than production in grammar.
|
94
|
-
expect(start_items_A[0].production).to eq(prod_A1)
|
95
|
-
expect(start_items_A[1].production).to eq(prod_A2)
|
62
|
+
return tokens
|
96
63
|
end
|
97
64
|
|
98
|
-
it 'should have its next mapping initialized' do
|
99
|
-
expect(subject.next_mapping.size).to eq(5)
|
100
|
-
end
|
101
|
-
end # context
|
102
|
-
|
103
|
-
context 'Parsing: ' do
|
104
|
-
# Helper method. Compare the data from the parse state
|
105
|
-
# with values from expectation hash.
|
106
|
-
def compare_state(aState, expectations)
|
107
|
-
expect(aState.origin).to eq(expectations[:origin])
|
108
|
-
dotted_item = aState.dotted_rule
|
109
|
-
expect(dotted_item.production).to eq(expectations[:production])
|
110
|
-
expect(dotted_item.position).to eq(expectations[:dot])
|
111
|
-
end
|
112
65
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
expectations = { origin: 0, production: prod_A1, dot: 1 }
|
140
|
-
compare_state(state_set_1.states[0], expectations)
|
141
|
-
|
142
|
-
expectations = { origin: 1, production: prod_A1, dot: 0 }
|
143
|
-
compare_state(state_set_1.states[1], expectations)
|
144
|
-
|
145
|
-
expectations = { origin: 1, production: prod_A2, dot: 0 }
|
146
|
-
compare_state(state_set_1.states[2], expectations)
|
147
|
-
|
148
|
-
######################
|
149
|
-
state_set_2 = parse_result.chart[2]
|
150
|
-
expect(state_set_2.states.size).to eq(3)
|
151
|
-
# Expectation chart[2]:
|
152
|
-
# 0: A -> "a" . A "c", 1 # scan from S(0) 1
|
153
|
-
# 1: A -> . "a" A "c", 2 # predict from 0
|
154
|
-
# 2: A -> . "b", 2 # predict from 0
|
155
|
-
expectations = { origin: 1, production: prod_A1, dot: 1 }
|
156
|
-
compare_state(state_set_2.states[0], expectations)
|
157
|
-
|
158
|
-
expectations = { origin: 2, production: prod_A1, dot: 0 }
|
159
|
-
compare_state(state_set_2.states[1], expectations)
|
160
|
-
|
161
|
-
expectations = { origin: 2, production: prod_A2, dot: 0 }
|
162
|
-
compare_state(state_set_2.states[2], expectations)
|
163
|
-
|
164
|
-
######################
|
165
|
-
state_set_3 = parse_result.chart[3]
|
166
|
-
expect(state_set_3.states.size).to eq(2)
|
167
|
-
# Expectation chart[3]:
|
168
|
-
# 0: A -> "b" ., 2 # scan from S(2) 2
|
169
|
-
# 1: A -> "a" A . "c", 1 # complete from 0 and S(2) 0
|
170
|
-
expectations = { origin: 2, production: prod_A2, dot: -1 }
|
171
|
-
compare_state(state_set_3.states[0], expectations)
|
172
|
-
|
173
|
-
expectations = { origin: 1, production: prod_A1, dot: 2 }
|
174
|
-
compare_state(state_set_3.states[1], expectations)
|
175
|
-
|
176
|
-
######################
|
177
|
-
state_set_4 = parse_result.chart[4]
|
178
|
-
expect(state_set_4.states.size).to eq(2)
|
179
|
-
# Expectation chart[4]:
|
180
|
-
# 0: A -> "a" A "c" ., 1 # scan from S(3) 1
|
181
|
-
# 1: A -> "a" A . "c", 0 # complete from 0 and S(1) 0
|
182
|
-
expectations = { origin: 1, production: prod_A1, dot: -1 }
|
183
|
-
compare_state(state_set_4.states[0], expectations)
|
184
|
-
|
185
|
-
expectations = { origin: 0, production: prod_A1, dot: 2 }
|
186
|
-
compare_state(state_set_4.states[1], expectations)
|
187
|
-
|
188
|
-
######################
|
189
|
-
state_set_5 = parse_result.chart[5]
|
190
|
-
expect(state_set_5.states.size).to eq(2)
|
191
|
-
# Expectation chart[5]:
|
192
|
-
# 0: A -> "a" A "c" ., 0 # scan from S(4) 1
|
193
|
-
# 1: S -> A ., 0 # complete from 0 and S(0) 0
|
194
|
-
expectations = { origin: 0, production: prod_A1, dot: -1 }
|
195
|
-
compare_state(state_set_5.states[0], expectations)
|
196
|
-
|
197
|
-
expectations = { origin: 0, production: prod_S, dot: -1 }
|
198
|
-
compare_state(state_set_5.states[1], expectations)
|
66
|
+
# Grammar 2: A simple arithmetic expression language
|
67
|
+
# E ::= S.
|
68
|
+
# S ::= S "+" M.
|
69
|
+
# S ::= M.
|
70
|
+
# M ::= M "*" M.
|
71
|
+
# M ::= T.
|
72
|
+
# T ::= an integer number token.
|
73
|
+
# Let's create the grammar piece by piece
|
74
|
+
let(:nt_E) { Syntax::NonTerminal.new('E') }
|
75
|
+
let(:nt_M) { Syntax::NonTerminal.new('M') }
|
76
|
+
let(:nt_T) { Syntax::NonTerminal.new('T') }
|
77
|
+
let(:plus) { Syntax::VerbatimSymbol.new('+') }
|
78
|
+
let(:star) { Syntax::VerbatimSymbol.new('*') }
|
79
|
+
let(:integer) do
|
80
|
+
integer_pattern = /[-+]?[0-9]+/ # Decimal notation
|
81
|
+
Syntax::Literal.new('integer', integer_pattern)
|
82
|
+
end
|
83
|
+
let(:prod_E) { Syntax::Production.new(nt_E, [nt_S]) }
|
84
|
+
let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
|
85
|
+
let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
|
86
|
+
let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_M]) }
|
87
|
+
let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
|
88
|
+
let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
|
89
|
+
let(:grammar_expr) do
|
90
|
+
all_prods = [prod_E, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
|
91
|
+
Syntax::Grammar.new(all_prods)
|
199
92
|
end
|
200
93
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
Token.new('
|
206
|
-
Token.new('
|
207
|
-
Token.new('
|
94
|
+
# Helper method that mimicks the output of a tokenizer
|
95
|
+
# for the language specified by grammar_expr
|
96
|
+
def grm2_tokens()
|
97
|
+
tokens = [
|
98
|
+
Token.new('2', integer),
|
99
|
+
Token.new('+', plus),
|
100
|
+
Token.new('3', integer),
|
101
|
+
Token.new('*', star),
|
102
|
+
Token.new('4', integer)
|
208
103
|
]
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
###################### S(0) == . a a c c
|
213
|
-
state_set_0 = parse_result.chart[0]
|
214
|
-
# Expectation chart[0]:
|
215
|
-
# S -> . A, 0 # start rule
|
216
|
-
# A -> . "a" A "c", 0
|
217
|
-
# A -> . "b", 0
|
218
|
-
expectations = { origin: 0, production: prod_S, dot: 0 }
|
219
|
-
compare_state(state_set_0.states[0], expectations)
|
220
|
-
|
221
|
-
expectations = { origin: 0, production: prod_A1, dot: 0 }
|
222
|
-
compare_state(state_set_0.states[1], expectations)
|
223
|
-
|
224
|
-
expectations = { origin: 0, production: prod_A2, dot: 0 }
|
225
|
-
compare_state(state_set_0.states[2], expectations)
|
226
|
-
|
227
|
-
###################### S(1) == a . a c c
|
228
|
-
state_set_1 = parse_result.chart[1]
|
229
|
-
expect(state_set_1.states.size).to eq(3)
|
230
|
-
# Expectation chart[1]:
|
231
|
-
# 0: A -> "a" . A "c", 0 # scan from S(0) 1
|
232
|
-
# 1: A -> . "a" A "c", 1 # predict from 0
|
233
|
-
# 2: A -> . "b", 1 # predict from 0
|
234
|
-
expectations = { origin: 0, production: prod_A1, dot: 1 }
|
235
|
-
compare_state(state_set_1.states[0], expectations)
|
236
|
-
|
237
|
-
expectations = { origin: 1, production: prod_A1, dot: 0 }
|
238
|
-
compare_state(state_set_1.states[1], expectations)
|
239
|
-
|
240
|
-
expectations = { origin: 1, production: prod_A2, dot: 0 }
|
241
|
-
compare_state(state_set_1.states[2], expectations)
|
242
|
-
|
243
|
-
###################### S(2) == a a . c c
|
244
|
-
state_set_2 = parse_result.chart[2]
|
245
|
-
expect(state_set_2.states.size).to eq(3)
|
246
|
-
# Expectation chart[2]:
|
247
|
-
# 0: A -> "a" . A "c", 1 # scan from S(0) 1
|
248
|
-
# 1: A -> . "a" A "c", 2 # predict from 0
|
249
|
-
# 2: A -> . "b", 2 # predict from 0
|
250
|
-
expectations = { origin: 1, production: prod_A1, dot: 1 }
|
251
|
-
compare_state(state_set_2.states[0], expectations)
|
252
|
-
|
253
|
-
expectations = { origin: 2, production: prod_A1, dot: 0 }
|
254
|
-
compare_state(state_set_2.states[1], expectations)
|
255
|
-
|
256
|
-
expectations = { origin: 2, production: prod_A2, dot: 0 }
|
257
|
-
compare_state(state_set_2.states[2], expectations)
|
258
|
-
|
259
|
-
###################### S(3) == a a c? c
|
260
|
-
state_set_3 = parse_result.chart[3]
|
261
|
-
expect(state_set_3.states).to be_empty # This is an error symptom
|
104
|
+
|
105
|
+
return tokens
|
262
106
|
end
|
263
|
-
end # context
|
264
107
|
|
265
|
-
end # describe
|
266
108
|
|
109
|
+
# Default instantiation rule
|
110
|
+
subject { EarleyParser.new(grammar_abc) }
|
111
|
+
|
112
|
+
context 'Initialization:' do
|
113
|
+
it 'should be created with a grammar' do
|
114
|
+
expect { EarleyParser.new(grammar_abc) }.not_to raise_error
|
115
|
+
expect { EarleyParser.new(grammar_expr) }.not_to raise_error
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'should know its grammar' do
|
119
|
+
expect(subject.grammar).to eq(grammar_abc)
|
120
|
+
end
|
121
|
+
|
122
|
+
it 'should know its dotted items' do
|
123
|
+
expect(subject.dotted_items.size).to eq(8)
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'should have its start mapping initialized' do
|
127
|
+
expect(subject.start_mapping.size).to eq(2)
|
128
|
+
|
129
|
+
start_items_S = subject.start_mapping[nt_S]
|
130
|
+
expect(start_items_S.size).to eq(1)
|
131
|
+
expect(start_items_S[0].production).to eq(prod_S)
|
132
|
+
|
133
|
+
start_items_A = subject.start_mapping[nt_A]
|
134
|
+
expect(start_items_A.size).to eq(2)
|
135
|
+
|
136
|
+
# Assuming that dotted_items are created in same order
|
137
|
+
# than production in grammar.
|
138
|
+
expect(start_items_A[0].production).to eq(prod_A1)
|
139
|
+
expect(start_items_A[1].production).to eq(prod_A2)
|
140
|
+
end
|
141
|
+
|
142
|
+
it 'should have its next mapping initialized' do
|
143
|
+
expect(subject.next_mapping.size).to eq(5)
|
144
|
+
end
|
145
|
+
end # context
|
146
|
+
|
147
|
+
context 'Parsing: ' do
|
148
|
+
# Helper method. Compare the data from the parse state
|
149
|
+
# with values from expectation hash.
|
150
|
+
def compare_state(aState, expectations)
|
151
|
+
expect(aState.origin).to eq(expectations[:origin])
|
152
|
+
dotted_item = aState.dotted_rule
|
153
|
+
expect(dotted_item.production).to eq(expectations[:production])
|
154
|
+
expect(dotted_item.position).to eq(expectations[:dot])
|
155
|
+
end
|
156
|
+
|
157
|
+
# Helper method. Compare the data from all the parse states
|
158
|
+
# of a given StateSet with an array of expectation hashes.
|
159
|
+
def compare_state_set(aStateSet, expectations)
|
160
|
+
(0...expectations.size).each do |i|
|
161
|
+
compare_state(aStateSet.states[i], expectations[i])
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
it 'should parse a valid simple input' do
|
166
|
+
parse_result = subject.parse(grm1_tokens)
|
167
|
+
expect(parse_result.success?).to eq(true)
|
168
|
+
|
169
|
+
######################
|
170
|
+
# Expectation chart[0]:
|
171
|
+
# S -> . A, 0 # start rule
|
172
|
+
# A -> . "a" A "c", 0 # predict from 0
|
173
|
+
# A -> . "b", 0 # predict from 0
|
174
|
+
expectations = [
|
175
|
+
{ origin: 0, production: prod_S, dot: 0 },
|
176
|
+
{ origin: 0, production: prod_A1, dot: 0 },
|
177
|
+
{ origin: 0, production: prod_A2, dot: 0 }
|
178
|
+
]
|
179
|
+
compare_state_set(parse_result.chart[0], expectations)
|
180
|
+
|
181
|
+
######################
|
182
|
+
state_set_1 = parse_result.chart[1]
|
183
|
+
expect(state_set_1.states.size).to eq(3)
|
184
|
+
# Expectation chart[1]:
|
185
|
+
# 0: A -> "a" . A "c", 0 # scan from S(0) 1
|
186
|
+
# 1: A -> . "a" A "c", 1 # predict from 0
|
187
|
+
# 2: A -> . "b", 1 # predict from 0
|
188
|
+
expectations = [
|
189
|
+
{ origin: 0, production: prod_A1, dot: 1 },
|
190
|
+
{ origin: 1, production: prod_A1, dot: 0 },
|
191
|
+
{ origin: 1, production: prod_A2, dot: 0 }
|
192
|
+
]
|
193
|
+
compare_state_set(state_set_1, expectations)
|
194
|
+
|
195
|
+
######################
|
196
|
+
state_set_2 = parse_result.chart[2]
|
197
|
+
expect(state_set_2.states.size).to eq(3)
|
198
|
+
# Expectation chart[2]:
|
199
|
+
# 0: A -> "a" . A "c", 1 # scan from S(0) 1
|
200
|
+
# 1: A -> . "a" A "c", 2 # predict from 0
|
201
|
+
# 2: A -> . "b", 2 # predict from 0
|
202
|
+
expectations = [
|
203
|
+
{ origin: 1, production: prod_A1, dot: 1 },
|
204
|
+
{ origin: 2, production: prod_A1, dot: 0 },
|
205
|
+
{ origin: 2, production: prod_A2, dot: 0 }
|
206
|
+
]
|
207
|
+
compare_state_set(state_set_2, expectations)
|
208
|
+
|
209
|
+
######################
|
210
|
+
state_set_3 = parse_result.chart[3]
|
211
|
+
expect(state_set_3.states.size).to eq(2)
|
212
|
+
# Expectation chart[3]:
|
213
|
+
# 0: A -> "b" ., 2 # scan from S(2) 2
|
214
|
+
# 1: A -> "a" A . "c", 1 # complete from 0 and S(2) 0
|
215
|
+
expectations = [
|
216
|
+
{ origin: 2, production: prod_A2, dot: -1 },
|
217
|
+
{ origin: 1, production: prod_A1, dot: 2 }
|
218
|
+
]
|
219
|
+
compare_state_set(state_set_3, expectations)
|
220
|
+
|
221
|
+
######################
|
222
|
+
state_set_4 = parse_result.chart[4]
|
223
|
+
expect(state_set_4.states.size).to eq(2)
|
224
|
+
# Expectation chart[4]:
|
225
|
+
# 0: A -> "a" A "c" ., 1 # scan from S(3) 1
|
226
|
+
# 1: A -> "a" A . "c", 0 # complete from 0 and S(1) 0
|
227
|
+
expectations = [
|
228
|
+
{ origin: 1, production: prod_A1, dot: -1 },
|
229
|
+
{ origin: 0, production: prod_A1, dot: 2 }
|
230
|
+
]
|
231
|
+
compare_state_set(state_set_4, expectations)
|
232
|
+
|
233
|
+
######################
|
234
|
+
state_set_5 = parse_result.chart[5]
|
235
|
+
expect(state_set_5.states.size).to eq(2)
|
236
|
+
# Expectation chart[5]:
|
237
|
+
# 0: A -> "a" A "c" ., 0 # scan from S(4) 1
|
238
|
+
# 1: S -> A ., 0 # complete from 0 and S(0) 0
|
239
|
+
expectations = [
|
240
|
+
{ origin: 0, production: prod_A1, dot: -1 },
|
241
|
+
{ origin: 0, production: prod_S, dot: -1 }
|
242
|
+
]
|
243
|
+
compare_state_set(state_set_5, expectations)
|
244
|
+
end
|
245
|
+
|
246
|
+
it 'should parse an invalid simple input' do
|
247
|
+
# Parse an erroneous input (b is missing)
|
248
|
+
wrong = [
|
249
|
+
Token.new('a', a_),
|
250
|
+
Token.new('a', a_),
|
251
|
+
Token.new('c', c_),
|
252
|
+
Token.new('c', c_)
|
253
|
+
]
|
254
|
+
parse_result = subject.parse(wrong)
|
255
|
+
expect(parse_result.success?).to eq(false)
|
256
|
+
|
257
|
+
###################### S(0) == . a a c c
|
258
|
+
# Expectation chart[0]:
|
259
|
+
# S -> . A, 0 # start rule
|
260
|
+
# A -> . "a" A "c", 0
|
261
|
+
# A -> . "b", 0
|
262
|
+
expectations = [
|
263
|
+
{ origin: 0, production: prod_S, dot: 0 },
|
264
|
+
{ origin: 0, production: prod_A1, dot: 0 },
|
265
|
+
{ origin: 0, production: prod_A2, dot: 0 }
|
266
|
+
]
|
267
|
+
compare_state_set(parse_result.chart[0], expectations)
|
268
|
+
|
269
|
+
###################### S(1) == a . a c c
|
270
|
+
state_set_1 = parse_result.chart[1]
|
271
|
+
expect(state_set_1.states.size).to eq(3)
|
272
|
+
# Expectation chart[1]:
|
273
|
+
# 0: A -> "a" . A "c", 0 # scan from S(0) 1
|
274
|
+
# 1: A -> . "a" A "c", 1 # predict from 0
|
275
|
+
# 2: A -> . "b", 1 # predict from 0
|
276
|
+
expectations = [
|
277
|
+
{ origin: 0, production: prod_A1, dot: 1 },
|
278
|
+
{ origin: 1, production: prod_A1, dot: 0 },
|
279
|
+
{ origin: 1, production: prod_A2, dot: 0 }
|
280
|
+
]
|
281
|
+
compare_state_set(state_set_1, expectations)
|
282
|
+
|
283
|
+
###################### S(2) == a a . c c
|
284
|
+
state_set_2 = parse_result.chart[2]
|
285
|
+
expect(state_set_2.states.size).to eq(3)
|
286
|
+
# Expectation chart[2]:
|
287
|
+
# 0: A -> "a" . A "c", 1 # scan from S(0) 1
|
288
|
+
# 1: A -> . "a" A "c", 2 # predict from 0
|
289
|
+
# 2: A -> . "b", 2 # predict from 0
|
290
|
+
expectations = [
|
291
|
+
{ origin: 1, production: prod_A1, dot: 1 },
|
292
|
+
{ origin: 2, production: prod_A1, dot: 0 },
|
293
|
+
{ origin: 2, production: prod_A2, dot: 0 }
|
294
|
+
]
|
295
|
+
compare_state_set(state_set_2, expectations)
|
296
|
+
|
297
|
+
###################### S(3) == a a c? c
|
298
|
+
state_set_3 = parse_result.chart[3]
|
299
|
+
expect(state_set_3.states).to be_empty # This is an error symptom
|
300
|
+
end
|
301
|
+
end # context
|
302
|
+
|
303
|
+
end # describe
|
267
304
|
end # module
|
268
305
|
end # module
|
269
306
|
|
270
307
|
# End of file
|
271
|
-
|