rley 0.4.01 → 0.4.02
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +2 -2
- data/README.md +3 -3
- data/examples/NLP/mini_en_demo.rb +1 -1
- data/examples/data_formats/JSON/JSON_demo.rb +1 -0
- data/examples/data_formats/JSON/JSON_lexer.rb +4 -4
- data/examples/general/calc/calc_lexer.rb +2 -2
- data/lib/rley.rb +1 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/formatter/debug.rb +2 -2
- data/lib/rley/formatter/json.rb +4 -4
- data/lib/rley/parse_tree_visitor.rb +9 -9
- data/lib/rley/parser/base_parser.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +9 -0
- data/lib/rley/parser/parse_tree_builder.rb +176 -126
- data/lib/rley/parser/parse_tree_factory.rb +57 -0
- data/lib/rley/ptree/non_terminal_node.rb +10 -9
- data/lib/rley/ptree/parse_tree_node.rb +10 -5
- data/lib/rley/ptree/terminal_node.rb +14 -6
- data/lib/rley/sppf/sppf_node.rb +2 -2
- data/lib/rley/{parser → tokens}/token.rb +1 -4
- data/lib/rley/{ptree → tokens}/token_range.rb +1 -1
- data/spec/rley/formatter/debug_spec.rb +16 -16
- data/spec/rley/formatter/json_spec.rb +8 -8
- data/spec/rley/parse_forest_visitor_spec.rb +1 -1
- data/spec/rley/parse_tree_visitor_spec.rb +28 -28
- data/spec/rley/parser/error_reason_spec.rb +3 -3
- data/spec/rley/parser/gfg_chart_spec.rb +2 -2
- data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
- data/spec/rley/parser/gfg_parsing_spec.rb +2 -2
- data/spec/rley/parser/groucho_spec.rb +1 -1
- data/spec/rley/parser/parse_tracer_spec.rb +2 -2
- data/spec/rley/parser/parse_tree_builder_spec.rb +213 -140
- data/spec/rley/parser/parse_tree_factory_spec.rb +85 -0
- data/spec/rley/parser/parse_walker_factory_spec.rb +11 -10
- data/spec/rley/ptree/non_terminal_node_spec.rb +23 -20
- data/spec/rley/ptree/terminal_node_spec.rb +7 -12
- data/spec/rley/sppf/alternative_node_spec.rb +2 -2
- data/spec/rley/sppf/non_terminal_node_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/expectation_helper.rb +1 -1
- data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
- data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
- data/spec/rley/support/grammar_helper.rb +3 -3
- data/spec/rley/support/grammar_l0_helper.rb +2 -2
- data/spec/rley/support/grammar_pb_helper.rb +2 -2
- data/spec/rley/{ptree → tokens}/token_range_spec.rb +2 -2
- data/spec/rley/{parser → tokens}/token_spec.rb +2 -2
- metadata +11 -17
- data/lib/rley/parser/chart.rb +0 -82
- data/lib/rley/parser/earley_parser.rb +0 -203
- data/lib/rley/parser/parsing.rb +0 -265
- data/spec/rley/parser/chart_spec.rb +0 -120
- data/spec/rley/parser/earley_parser_spec.rb +0 -710
- data/spec/rley/parser/parsing_spec.rb +0 -408
@@ -1,120 +0,0 @@
|
|
1
|
-
require_relative '../../spec_helper'
|
2
|
-
require 'stringio'
|
3
|
-
|
4
|
-
require_relative '../../../lib/rley/syntax/terminal'
|
5
|
-
require_relative '../../../lib/rley/syntax/non_terminal'
|
6
|
-
require_relative '../../../lib/rley/syntax/production'
|
7
|
-
require_relative '../../../lib/rley/parser/token'
|
8
|
-
require_relative '../../../lib/rley/parser/dotted_item'
|
9
|
-
require_relative '../../../lib/rley/parser/parse_state'
|
10
|
-
require_relative '../../../lib/rley/parser/parse_tracer'
|
11
|
-
|
12
|
-
# Load the class under test
|
13
|
-
require_relative '../../../lib/rley/parser/chart'
|
14
|
-
|
15
|
-
module Rley # Open this namespace to avoid module qualifier prefixes
|
16
|
-
module Parser # Open this namespace to avoid module qualifier prefixes
|
17
|
-
describe Chart do
|
18
|
-
let(:count_token) { 20 }
|
19
|
-
let(:sample_start_symbol) { double('fake_non-terminal') }
|
20
|
-
let(:dotted_rule) { double('fake-dotted-item') }
|
21
|
-
|
22
|
-
let(:output) { StringIO.new('', 'w') }
|
23
|
-
|
24
|
-
let(:token_seq) do
|
25
|
-
literals = %w(I saw John with a dog)
|
26
|
-
literals.map { |lexeme| Token.new(lexeme, nil) }
|
27
|
-
end
|
28
|
-
|
29
|
-
let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
|
30
|
-
|
31
|
-
# Default instantiation rule
|
32
|
-
subject do
|
33
|
-
allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
|
34
|
-
Chart.new([ dotted_rule ], count_token, sample_tracer)
|
35
|
-
end
|
36
|
-
|
37
|
-
context 'Initialization:' do
|
38
|
-
it 'should be created with start dotted rule, token count, tracer' do
|
39
|
-
allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
|
40
|
-
expect { Chart.new([ dotted_rule ], count_token, sample_tracer) }
|
41
|
-
.not_to raise_error
|
42
|
-
end
|
43
|
-
|
44
|
-
it 'should have a seed state in first state_set' do
|
45
|
-
seed_state = ParseState.new(dotted_rule, 0)
|
46
|
-
expect(subject[0].states).to eq([seed_state])
|
47
|
-
|
48
|
-
# Shorthand syntax
|
49
|
-
expect(subject[0].first).to eq(seed_state)
|
50
|
-
end
|
51
|
-
|
52
|
-
it 'should have the correct state_set count' do
|
53
|
-
expect(subject.state_sets.size).to eq(count_token + 1)
|
54
|
-
end
|
55
|
-
|
56
|
-
it 'should know the start dotted rule' do
|
57
|
-
expect(subject.start_dotted_rule).to eq(dotted_rule)
|
58
|
-
end
|
59
|
-
|
60
|
-
it 'should know the start symbol' do
|
61
|
-
expect(subject.start_symbol).to eq(sample_start_symbol)
|
62
|
-
end
|
63
|
-
|
64
|
-
it 'should have at least one non-empty state set' do
|
65
|
-
expect(subject.last_index).to eq(0)
|
66
|
-
end
|
67
|
-
|
68
|
-
it 'should reference a tracer' do
|
69
|
-
expect(subject.tracer).to eq(sample_tracer)
|
70
|
-
end
|
71
|
-
end # context
|
72
|
-
|
73
|
-
context 'Provided services:' do
|
74
|
-
let(:t_a) { Syntax::Terminal.new('A') }
|
75
|
-
let(:t_b) { Syntax::Terminal.new('B') }
|
76
|
-
let(:t_c) { Syntax::Terminal.new('C') }
|
77
|
-
let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
|
78
|
-
|
79
|
-
let(:sample_prod) do
|
80
|
-
Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
|
81
|
-
end
|
82
|
-
|
83
|
-
let(:origin_val) { 3 }
|
84
|
-
let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
|
85
|
-
let(:complete_rule) { DottedItem.new(sample_prod, 3) }
|
86
|
-
let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
|
87
|
-
let(:sample_tracer) { ParseTracer.new(1, output, token_seq) }
|
88
|
-
|
89
|
-
# Factory method.
|
90
|
-
def parse_state(origin, aDottedRule)
|
91
|
-
ParseState.new(aDottedRule, origin)
|
92
|
-
end
|
93
|
-
|
94
|
-
|
95
|
-
it 'should trace its initialization' do
|
96
|
-
subject[0] # Force constructor call here
|
97
|
-
expectation = <<-SNIPPET
|
98
|
-
['I', 'saw', 'John', 'with', 'a', 'dog']
|
99
|
-
|. I . saw . John . with . a . dog .|
|
100
|
-
|> . . . . . .| [0:0] sentence => A B . C
|
101
|
-
SNIPPET
|
102
|
-
expect(output.string).to eq(expectation)
|
103
|
-
end
|
104
|
-
|
105
|
-
it 'should trace parse state pushing' do
|
106
|
-
subject[0] # Force constructor call here
|
107
|
-
output.string = ''
|
108
|
-
|
109
|
-
subject.push_state(dotted_rule, 3, 5, :prediction)
|
110
|
-
expectation = <<-SNIPPET
|
111
|
-
|. . . > .| [3:5] sentence => A B . C
|
112
|
-
SNIPPET
|
113
|
-
expect(output.string).to eq(expectation)
|
114
|
-
end
|
115
|
-
end # context
|
116
|
-
end # describe
|
117
|
-
end # module
|
118
|
-
end # module
|
119
|
-
|
120
|
-
# End of file
|
@@ -1,710 +0,0 @@
|
|
1
|
-
require_relative '../../spec_helper'
|
2
|
-
require 'stringio'
|
3
|
-
|
4
|
-
require_relative '../../../lib/rley/syntax/verbatim_symbol'
|
5
|
-
require_relative '../../../lib/rley/syntax/non_terminal'
|
6
|
-
require_relative '../../../lib/rley/syntax/production'
|
7
|
-
require_relative '../../../lib/rley/syntax/grammar_builder'
|
8
|
-
require_relative '../../../lib/rley/parser/token'
|
9
|
-
require_relative '../../../lib/rley/parser/dotted_item'
|
10
|
-
require_relative '../support/ambiguous_grammar_helper'
|
11
|
-
# Load the class under test
|
12
|
-
require_relative '../../../lib/rley/parser/earley_parser'
|
13
|
-
|
14
|
-
module Rley # Open this namespace to avoid module qualifier prefixes
|
15
|
-
module Parser # Open this namespace to avoid module qualifier prefixes
|
16
|
-
describe EarleyParser do
|
17
|
-
=begin
|
18
|
-
let(:kw_true) { Syntax::VerbatimSymbol.new('true') }
|
19
|
-
let(:kw_false) { Syntax::VerbatimSymbol.new('false') }
|
20
|
-
let(:kw_null) { Syntax::VerbatimSymbol.new('null') }
|
21
|
-
let(:number) do
|
22
|
-
number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
|
23
|
-
Syntax::Literal.new('number', number_pattern)
|
24
|
-
end
|
25
|
-
let(:string) do
|
26
|
-
string_pattern = /"([^\\"]|\\.)*"/
|
27
|
-
Syntax::Literal('string', string_pattern)
|
28
|
-
end
|
29
|
-
let(:lbracket) { Syntax::VerbatimSymbol.new('[') }
|
30
|
-
let(:rbracket) { Syntax::VerbatimSymbol.new(']') }
|
31
|
-
let(:comma) { Syntax::VerbatimSymbol.new(',') }
|
32
|
-
let(:array) { Syntax::NonTerminal.new('Array') }
|
33
|
-
let(:object) { Syntax::NonTerminal.new('Object') }
|
34
|
-
|
35
|
-
let(:array_prod) do
|
36
|
-
Production.new(array, )
|
37
|
-
end
|
38
|
-
=end
|
39
|
-
|
40
|
-
|
41
|
-
# Grammar 1: A very simple language
|
42
|
-
# (based on example in N. Wirth "Compiler Construction" book, p. 6)
|
43
|
-
# S => A.
|
44
|
-
# A => "a" A "c".
|
45
|
-
# A => "b".
|
46
|
-
# Let's create the grammar piece by piece
|
47
|
-
let(:nt_S) { Syntax::NonTerminal.new('S') }
|
48
|
-
let(:nt_A) { Syntax::NonTerminal.new('A') }
|
49
|
-
let(:a_) { Syntax::VerbatimSymbol.new('a') }
|
50
|
-
let(:b_) { Syntax::VerbatimSymbol.new('b') }
|
51
|
-
let(:c_) { Syntax::VerbatimSymbol.new('c') }
|
52
|
-
let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
|
53
|
-
let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
|
54
|
-
let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
|
55
|
-
let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
|
56
|
-
|
57
|
-
# Helper method that mimicks the output of a tokenizer
|
58
|
-
# for the language specified by grammar_abc
|
59
|
-
def grm1_tokens()
|
60
|
-
tokens = [
|
61
|
-
Token.new('a', a_),
|
62
|
-
Token.new('a', a_),
|
63
|
-
Token.new('b', b_),
|
64
|
-
Token.new('c', c_),
|
65
|
-
Token.new('c', c_)
|
66
|
-
]
|
67
|
-
|
68
|
-
return tokens
|
69
|
-
end
|
70
|
-
|
71
|
-
|
72
|
-
# Grammar 2: A simple arithmetic expression language
|
73
|
-
# (based on example in article on Earley's algorithm in Wikipedia)
|
74
|
-
# P ::= S.
|
75
|
-
# S ::= S "+" M.
|
76
|
-
# S ::= M.
|
77
|
-
# M ::= M "*" M.
|
78
|
-
# M ::= T.
|
79
|
-
# T ::= an integer number token.
|
80
|
-
# Let's create the grammar piece by piece
|
81
|
-
let(:nt_P) { Syntax::NonTerminal.new('P') }
|
82
|
-
let(:nt_M) { Syntax::NonTerminal.new('M') }
|
83
|
-
let(:nt_T) { Syntax::NonTerminal.new('T') }
|
84
|
-
let(:plus) { Syntax::VerbatimSymbol.new('+') }
|
85
|
-
let(:star) { Syntax::VerbatimSymbol.new('*') }
|
86
|
-
let(:integer) do
|
87
|
-
integer_pattern = /[-+]?[0-9]+/ # Decimal notation
|
88
|
-
Syntax::Literal.new('integer', integer_pattern)
|
89
|
-
end
|
90
|
-
let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
|
91
|
-
let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
|
92
|
-
let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
|
93
|
-
let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_T]) }
|
94
|
-
let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
|
95
|
-
let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
|
96
|
-
let(:grammar_expr) do
|
97
|
-
all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
|
98
|
-
Syntax::Grammar.new(all_prods)
|
99
|
-
end
|
100
|
-
|
101
|
-
# Helper method that mimicks the output of a tokenizer
|
102
|
-
# for the language specified by grammar_expr
|
103
|
-
def grm2_tokens()
|
104
|
-
tokens = [
|
105
|
-
Token.new('2', integer),
|
106
|
-
Token.new('+', plus),
|
107
|
-
Token.new('3', integer),
|
108
|
-
Token.new('*', star),
|
109
|
-
Token.new('4', integer)
|
110
|
-
]
|
111
|
-
|
112
|
-
return tokens
|
113
|
-
end
|
114
|
-
|
115
|
-
|
116
|
-
# Default instantiation rule
|
117
|
-
subject { EarleyParser.new(grammar_abc) }
|
118
|
-
|
119
|
-
context 'Initialization:' do
|
120
|
-
it 'should be created with a grammar' do
|
121
|
-
expect { EarleyParser.new(grammar_abc) }.not_to raise_error
|
122
|
-
expect { EarleyParser.new(grammar_expr) }.not_to raise_error
|
123
|
-
end
|
124
|
-
|
125
|
-
it 'should know its grammar' do
|
126
|
-
expect(subject.grammar).to eq(grammar_abc)
|
127
|
-
end
|
128
|
-
|
129
|
-
it 'should know its dotted items' do
|
130
|
-
expect(subject.dotted_items.size).to eq(8)
|
131
|
-
end
|
132
|
-
|
133
|
-
it 'should have its start mapping initialized' do
|
134
|
-
expect(subject.start_mapping.size).to eq(2)
|
135
|
-
|
136
|
-
start_items_S = subject.start_mapping[nt_S]
|
137
|
-
expect(start_items_S.size).to eq(1)
|
138
|
-
expect(start_items_S[0].production).to eq(prod_S)
|
139
|
-
|
140
|
-
start_items_A = subject.start_mapping[nt_A]
|
141
|
-
expect(start_items_A.size).to eq(2)
|
142
|
-
|
143
|
-
# Assuming that dotted_items are created in same order
|
144
|
-
# than production in grammar.
|
145
|
-
expect(start_items_A[0].production).to eq(prod_A1)
|
146
|
-
expect(start_items_A[1].production).to eq(prod_A2)
|
147
|
-
end
|
148
|
-
|
149
|
-
it 'should have its next mapping initialized' do
|
150
|
-
expect(subject.next_mapping.size).to eq(5)
|
151
|
-
end
|
152
|
-
end # context
|
153
|
-
|
154
|
-
context 'Parsing: ' do
|
155
|
-
# Helper method. Compare the data from all the parse states
|
156
|
-
# of a given StateSet with an array of expectation string.
|
157
|
-
def compare_state_texts(aStateSet, expectations)
|
158
|
-
(0...expectations.size).each do |i|
|
159
|
-
expect(aStateSet.states[i].to_s).to eq(expectations[i])
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
it 'should parse a valid simple input' do
|
164
|
-
parse_result = subject.parse(grm1_tokens)
|
165
|
-
expect(parse_result.success?).to eq(true)
|
166
|
-
expect(parse_result.ambiguous?).to eq(false)
|
167
|
-
|
168
|
-
######################
|
169
|
-
# Expectation chart[0]:
|
170
|
-
expected = [
|
171
|
-
'S => . A | 0', # start rule
|
172
|
-
"A => . 'a' A 'c' | 0", # predict from 0
|
173
|
-
"A => . 'b' | 0" # predict from 0
|
174
|
-
]
|
175
|
-
compare_state_texts(parse_result.chart[0], expected)
|
176
|
-
|
177
|
-
######################
|
178
|
-
# Expectation chart[1]:
|
179
|
-
expected = [
|
180
|
-
"A => 'a' . A 'c' | 0", # scan from S(0) 1
|
181
|
-
"A => . 'a' A 'c' | 1", # predict from 0
|
182
|
-
"A => . 'b' | 1" # predict from 0
|
183
|
-
]
|
184
|
-
state_set_1 = parse_result.chart[1]
|
185
|
-
expect(state_set_1.states.size).to eq(3)
|
186
|
-
compare_state_texts(state_set_1, expected)
|
187
|
-
|
188
|
-
######################
|
189
|
-
# Expectation chart[2]:
|
190
|
-
expected = [
|
191
|
-
"A => 'a' . A 'c' | 1", # scan from S(0) 1
|
192
|
-
"A => . 'a' A 'c' | 2", # predict from 0
|
193
|
-
"A => . 'b' | 2" # predict from 0
|
194
|
-
]
|
195
|
-
state_set_2 = parse_result.chart[2]
|
196
|
-
expect(state_set_2.states.size).to eq(3)
|
197
|
-
compare_state_texts(state_set_2, expected)
|
198
|
-
|
199
|
-
######################
|
200
|
-
# Expectation chart[3]:
|
201
|
-
expected = [
|
202
|
-
"A => 'b' . | 2", # scan from S(2) 2
|
203
|
-
"A => 'a' A . 'c' | 1" # complete from 0 and S(2) 0
|
204
|
-
]
|
205
|
-
state_set_3 = parse_result.chart[3]
|
206
|
-
expect(state_set_3.states.size).to eq(2)
|
207
|
-
compare_state_texts(state_set_3, expected)
|
208
|
-
|
209
|
-
|
210
|
-
######################
|
211
|
-
# Expectation chart[4]:
|
212
|
-
expected = [
|
213
|
-
"A => 'a' A 'c' . | 1", # scan from S(3) 1
|
214
|
-
"A => 'a' A . 'c' | 0" # complete from 0 and S(1) 0
|
215
|
-
]
|
216
|
-
state_set_4 = parse_result.chart[4]
|
217
|
-
expect(state_set_4.states.size).to eq(2)
|
218
|
-
compare_state_texts(state_set_4, expected)
|
219
|
-
|
220
|
-
######################
|
221
|
-
# Expectation chart[5]:
|
222
|
-
expected = [
|
223
|
-
"A => 'a' A 'c' . | 0", # scan from S(4) 1
|
224
|
-
'S => A . | 0' # complete from 0 and S(0) 0
|
225
|
-
]
|
226
|
-
state_set_5 = parse_result.chart[5]
|
227
|
-
expect(state_set_5.states.size).to eq(2)
|
228
|
-
compare_state_texts(state_set_5, expected)
|
229
|
-
end
|
230
|
-
|
231
|
-
it 'should trace a parse with level 1' do
|
232
|
-
# Substitute temporarily $stdout by a StringIO
|
233
|
-
prev_ostream = $stdout
|
234
|
-
$stdout = StringIO.new('', 'w')
|
235
|
-
|
236
|
-
trace_level = 1
|
237
|
-
subject.parse(grm1_tokens, trace_level)
|
238
|
-
expectations = <<-SNIPPET
|
239
|
-
['a', 'a', 'b', 'c', 'c']
|
240
|
-
|. a . a . b . c . c .|
|
241
|
-
|> . . . . .| [0:0] S => . A
|
242
|
-
|> . . . . .| [0:0] A => . 'a' A 'c'
|
243
|
-
|> . . . . .| [0:0] A => . 'b'
|
244
|
-
|[---] . . . .| [0:1] A => 'a' . A 'c'
|
245
|
-
|. > . . . .| [1:1] A => . 'a' A 'c'
|
246
|
-
|. > . . . .| [1:1] A => . 'b'
|
247
|
-
|. [---] . . .| [1:2] A => 'a' . A 'c'
|
248
|
-
|. . > . . .| [2:2] A => . 'a' A 'c'
|
249
|
-
|. . > . . .| [2:2] A => . 'b'
|
250
|
-
|. . [---] . .| [2:3] A => 'b' .
|
251
|
-
|. [-------> . .| [1:3] A => 'a' A . 'c'
|
252
|
-
|. . . [---] .| [3:4] A => 'a' A 'c' .
|
253
|
-
|[---------------> .| [0:4] A => 'a' A . 'c'
|
254
|
-
|. . . . [---]| [4:5] A => 'a' A 'c' .
|
255
|
-
|[===================]| [0:5] S => A .
|
256
|
-
SNIPPET
|
257
|
-
expect($stdout.string).to eq(expectations)
|
258
|
-
|
259
|
-
# Restore standard ouput stream
|
260
|
-
$stdout = prev_ostream
|
261
|
-
end
|
262
|
-
|
263
|
-
it 'should parse a valid simple expression' do
|
264
|
-
instance = EarleyParser.new(grammar_expr)
|
265
|
-
parse_result = instance.parse(grm2_tokens)
|
266
|
-
expect(parse_result.success?).to eq(true)
|
267
|
-
expect(parse_result.ambiguous?).to eq(false)
|
268
|
-
|
269
|
-
###################### S(0): . 2 + 3 * 4
|
270
|
-
# Expectation chart[0]:
|
271
|
-
expected = [
|
272
|
-
'P => . S | 0', # start rule
|
273
|
-
"S => . S '+' M | 0", # predict from (1)
|
274
|
-
'S => . M | 0', # predict from (1)
|
275
|
-
"M => . M '*' T | 0", # predict from (4)
|
276
|
-
'M => . T | 0', # predict from (4)
|
277
|
-
'T => . integer | 0' # predict from (4)
|
278
|
-
]
|
279
|
-
compare_state_texts(parse_result.chart[0], expected)
|
280
|
-
|
281
|
-
|
282
|
-
###################### S(1): 2 . + 3 * 4
|
283
|
-
# Expectation chart[1]:
|
284
|
-
expected = [
|
285
|
-
'T => integer . | 0', # scan from S(0) 6
|
286
|
-
'M => T . | 0', # complete from (1) and S(0) 5
|
287
|
-
'S => M . | 0', # complete from (2) and S(0) 3
|
288
|
-
"M => M . '*' T | 0", # complete from (2) and S(0) 4
|
289
|
-
'P => S . | 0', # complete from (4) and S(0) 1
|
290
|
-
"S => S . '+' M | 0" # complete from (4) and S(0) 2
|
291
|
-
]
|
292
|
-
compare_state_texts(parse_result.chart[1], expected)
|
293
|
-
|
294
|
-
|
295
|
-
###################### S(2): 2 + . 3 * 4
|
296
|
-
# Expectation chart[2]:
|
297
|
-
expected = [
|
298
|
-
"S => S '+' . M | 0", # scan from S(1) 6
|
299
|
-
"M => . M '*' T | 2", # predict from (1)
|
300
|
-
'M => . T | 2', # predict from (1)
|
301
|
-
'T => . integer | 2' # predict from (3)
|
302
|
-
]
|
303
|
-
compare_state_texts(parse_result.chart[2], expected)
|
304
|
-
|
305
|
-
|
306
|
-
###################### S(3): 2 + 3 . * 4
|
307
|
-
# Expectation chart[3]:
|
308
|
-
expected = [
|
309
|
-
'T => integer . | 2', # scan from S(2) 4
|
310
|
-
'M => T . | 2', # complete from (1) and S(2) 3
|
311
|
-
"S => S '+' M . | 0", # complete from (1) and S(2) 1
|
312
|
-
"M => M . '*' T | 2", # complete from (2) and S(2) 2
|
313
|
-
'P => S . | 0' # complete from (4) and S(0) 1
|
314
|
-
]
|
315
|
-
compare_state_texts(parse_result.chart[3], expected)
|
316
|
-
|
317
|
-
###################### S(4): 2 + 3 * . 4
|
318
|
-
# Expectation chart[4]:
|
319
|
-
expected = [
|
320
|
-
"M => M '*' . T | 2", # scan from S(3) 4
|
321
|
-
'T => . integer | 4' # predict from (1)
|
322
|
-
]
|
323
|
-
compare_state_texts(parse_result.chart[4], expected)
|
324
|
-
|
325
|
-
###################### S(5): 2 + 3 * 4 .
|
326
|
-
# Expectation chart[5]:
|
327
|
-
expected = [
|
328
|
-
'T => integer . | 4', # scan from S(4) 2
|
329
|
-
"M => M '*' T . | 2", # complete from (1) and S(4) 1
|
330
|
-
"S => S '+' M . | 0", # complete from (2) and S(2) 1
|
331
|
-
"M => M . '*' T | 2", # complete from (2) and S(2) 2
|
332
|
-
'P => S . | 0' # complete from (3) and S(2) 2
|
333
|
-
]
|
334
|
-
compare_state_texts(parse_result.chart[5], expected)
|
335
|
-
end
|
336
|
-
|
337
|
-
it 'should parse a nullable grammar' do
|
338
|
-
# Simple but problematic grammar for the original Earley parser
|
339
|
-
# (based on example in D. Grune, C. Jacobs "Parsing Techniques" book)
|
340
|
-
# Ss => A A 'x';
|
341
|
-
# A => ;
|
342
|
-
t_x = Syntax::VerbatimSymbol.new('x')
|
343
|
-
|
344
|
-
builder = Syntax::GrammarBuilder.new
|
345
|
-
builder.add_terminals(t_x)
|
346
|
-
builder.add_production('Ss' => %w(A A x))
|
347
|
-
builder.add_production('A' => [])
|
348
|
-
tokens = [ Token.new('x', t_x) ]
|
349
|
-
|
350
|
-
instance = EarleyParser.new(builder.grammar)
|
351
|
-
expect { instance.parse(tokens) }.not_to raise_error
|
352
|
-
parse_result = instance.parse(tokens)
|
353
|
-
expect(parse_result.success?).to eq(true)
|
354
|
-
###################### S(0): . x
|
355
|
-
# Expectation chart[0]:
|
356
|
-
expected = [
|
357
|
-
"Ss => . A A 'x' | 0", # Start rule
|
358
|
-
'A => . | 0', # predict from (1)
|
359
|
-
"Ss => A . A 'x' | 0", # modified predict from (1)
|
360
|
-
"Ss => A A . 'x' | 0" # modified predict from (1)
|
361
|
-
]
|
362
|
-
compare_state_texts(parse_result.chart[0], expected)
|
363
|
-
|
364
|
-
###################### S(1): x .
|
365
|
-
# Expectation chart[1]:
|
366
|
-
expected = [
|
367
|
-
"Ss => A A 'x' . | 0" # scan from S(0) 4
|
368
|
-
]
|
369
|
-
compare_state_texts(parse_result.chart[1], expected)
|
370
|
-
end
|
371
|
-
|
372
|
-
it 'should parse an ambiguous grammar (I)' do
|
373
|
-
# Grammar 3: A ambiguous arithmetic expression language
|
374
|
-
# (based on example in article on Earley's algorithm in Wikipedia)
|
375
|
-
# P => S.
|
376
|
-
# S => S "+" S.
|
377
|
-
# S => S "*" S.
|
378
|
-
# S => L.
|
379
|
-
# L => an integer number token.
|
380
|
-
t_int = Syntax::Literal.new('integer', /[-+]?\d+/)
|
381
|
-
t_plus = Syntax::VerbatimSymbol.new('+')
|
382
|
-
t_star = Syntax::VerbatimSymbol.new('*')
|
383
|
-
|
384
|
-
builder = Syntax::GrammarBuilder.new
|
385
|
-
builder.add_terminals(t_int, t_plus, t_star)
|
386
|
-
builder.add_production('P' => 'S')
|
387
|
-
builder.add_production('S' => %w(S + S))
|
388
|
-
builder.add_production('S' => %w(S * S))
|
389
|
-
builder.add_production('S' => 'L')
|
390
|
-
builder.add_production('L' => 'integer')
|
391
|
-
tokens = [
|
392
|
-
Token.new('2', t_int),
|
393
|
-
Token.new('+', t_plus),
|
394
|
-
Token.new('3', t_int),
|
395
|
-
Token.new('*', t_star),
|
396
|
-
Token.new('4', t_int)
|
397
|
-
]
|
398
|
-
instance = EarleyParser.new(builder.grammar)
|
399
|
-
expect { instance.parse(tokens) }.not_to raise_error
|
400
|
-
parse_result = instance.parse(tokens)
|
401
|
-
expect(parse_result.success?).to eq(true)
|
402
|
-
expect(parse_result.ambiguous?).to eq(true)
|
403
|
-
|
404
|
-
###################### S(0): . 2 + 3 * 4
|
405
|
-
# Expectation chart[0]:
|
406
|
-
expected = [
|
407
|
-
'P => . S | 0', # Start rule
|
408
|
-
"S => . S '+' S | 0", # predict from (1)
|
409
|
-
"S => . S '*' S | 0", # predict from (1)
|
410
|
-
'S => . L | 0', # predict from (1)
|
411
|
-
'L => . integer | 0' # predict from (4)
|
412
|
-
]
|
413
|
-
compare_state_texts(parse_result.chart[0], expected)
|
414
|
-
|
415
|
-
###################### S(1): 2 . + 3 * 4
|
416
|
-
# Expectation chart[1]:
|
417
|
-
expected = [
|
418
|
-
'L => integer . | 0', # scan from S(0) 4
|
419
|
-
'S => L . | 0', # complete from (1) and S(0) 4
|
420
|
-
'P => S . | 0', # complete from (2) and S(0) 1
|
421
|
-
"S => S . '+' S | 0", # complete from (2) and S(0) 2
|
422
|
-
"S => S . '*' S | 0", # complete from (2) and S(0) 3
|
423
|
-
]
|
424
|
-
compare_state_texts(parse_result.chart[1], expected)
|
425
|
-
|
426
|
-
###################### S(2): 2 + . 3 * 4
|
427
|
-
# Expectation chart[2]:
|
428
|
-
expected = [
|
429
|
-
"S => S '+' . S | 0", # scan from S(1) 4
|
430
|
-
"S => . S '+' S | 2", # predict from (1)
|
431
|
-
"S => . S '*' S | 2", # predict from (1)
|
432
|
-
'S => . L | 2', # predict from (1)
|
433
|
-
'L => . integer | 2' # predict from (4)
|
434
|
-
]
|
435
|
-
compare_state_texts(parse_result.chart[2], expected)
|
436
|
-
|
437
|
-
###################### S(3): 2 + 3 . * 4
|
438
|
-
# Expectation chart[3]:
|
439
|
-
expected = [
|
440
|
-
'L => integer . | 2', # scan from S(2) 5
|
441
|
-
'S => L . | 2', # complete from (1) and S(2) 4
|
442
|
-
"S => S '+' S . | 0", # complete from (2) and S(2) 1
|
443
|
-
"S => S . '+' S | 2", # complete from (2) and S(2) 2
|
444
|
-
"S => S . '*' S | 2", # complete from (2) and S(2) 3
|
445
|
-
'P => S . | 0', # complete from (2) and S(0) 1
|
446
|
-
"S => S . '+' S | 0", # complete from (2) and S(0) 2
|
447
|
-
"S => S . '*' S | 0", # complete from (2) and S(0) 3
|
448
|
-
]
|
449
|
-
compare_state_texts(parse_result.chart[3], expected)
|
450
|
-
|
451
|
-
###################### S(4): 2 + 3 * . 4
|
452
|
-
# Expectation chart[4]:
|
453
|
-
expected = [
|
454
|
-
"S => S '*' . S | 2", # scan from S(3) 5
|
455
|
-
"S => S '*' . S | 0", # scan from S(3) 8
|
456
|
-
"S => . S '+' S | 4", # predict from (1)
|
457
|
-
"S => . S '*' S | 4", # predict from (1)
|
458
|
-
'S => . L | 4', # predict from (1)
|
459
|
-
'L => . integer | 4' # predict from (4)
|
460
|
-
]
|
461
|
-
compare_state_texts(parse_result.chart[4], expected)
|
462
|
-
|
463
|
-
###################### S(5): 2 + 3 * 4 .
|
464
|
-
# Expectation chart[5]:
|
465
|
-
expected = [
|
466
|
-
'L => integer . | 4', # scan from S(4) 6
|
467
|
-
'S => L . | 4', # complete from (1) and S(4) 5
|
468
|
-
"S => S '*' S . | 2", # complete from (2) and S(4) 1
|
469
|
-
"S => S '*' S . | 0", # complete from (2) and S(4) 2
|
470
|
-
"S => S . '+' S | 4", # complete from (2) and S(4) 3
|
471
|
-
"S => S . '*' S | 4", # complete from (2) and S(4) 4
|
472
|
-
"S => S '+' S . | 0", # complete from (2) and S(2) 1
|
473
|
-
"S => S . '+' S | 2", # complete from (2) and S(2) 2
|
474
|
-
"S => S . '*' S | 2", # complete from (2) and S(2) 3
|
475
|
-
'P => S . | 0', # complete from (2) and S(0) 1
|
476
|
-
"S => S . '+' S | 0", # complete from (2) and S(0) 2
|
477
|
-
"S => S . '*' S | 0" # complete from (2) and S(0) 3
|
478
|
-
]
|
479
|
-
compare_state_texts(parse_result.chart[5], expected)
|
480
|
-
end
|
481
|
-
|
482
|
-
it 'should parse an ambiguous grammar (II)' do
|
483
|
-
extend(AmbiguousGrammarHelper)
|
484
|
-
grammar = grammar_builder.grammar
|
485
|
-
instance = EarleyParser.new(grammar)
|
486
|
-
tokens = tokenize('abc + def + ghi', grammar)
|
487
|
-
expect { instance.parse(tokens) }.not_to raise_error
|
488
|
-
parse_result = instance.parse(tokens)
|
489
|
-
expect(parse_result.success?).to eq(true)
|
490
|
-
expect(parse_result.ambiguous?).to eq(true)
|
491
|
-
|
492
|
-
###################### S(0): . abc + def + ghi
|
493
|
-
# Expectation chart[0]:
|
494
|
-
expected = [
|
495
|
-
'S => . E | 0', # Start rule
|
496
|
-
'E => . E + E | 0', # predict from (1)
|
497
|
-
'E => . id | 0' # predict from (1)
|
498
|
-
]
|
499
|
-
compare_state_texts(parse_result.chart[0], expected)
|
500
|
-
|
501
|
-
###################### S(1): abc . + def + ghi
|
502
|
-
# Expectation chart[1]:
|
503
|
-
expected = [
|
504
|
-
'E => id . | 0', # scan from S(0) 3
|
505
|
-
'S => E . | 0', # complete from (1) and S(0) 2
|
506
|
-
'E => E . + E | 0' # complete from (1) and S(0) 3
|
507
|
-
]
|
508
|
-
compare_state_texts(parse_result.chart[1], expected)
|
509
|
-
|
510
|
-
###################### S(2): abc + . def + ghi
|
511
|
-
# Expectation chart[2]:
|
512
|
-
expected = [
|
513
|
-
'E => E + . E | 0', # Scan from S(1) 3
|
514
|
-
'E => . E + E | 2', # predict from (1)
|
515
|
-
'E => . id | 2' # predict from (1)
|
516
|
-
]
|
517
|
-
compare_state_texts(parse_result.chart[2], expected)
|
518
|
-
|
519
|
-
###################### S(3): abc + def . + ghi
|
520
|
-
# Expectation chart[3]:
|
521
|
-
expected = [
|
522
|
-
'E => id . | 2', # Scan from S(2) 3
|
523
|
-
'E => E + E . | 0', # complete from (1) and S(2) 1
|
524
|
-
'E => E . + E | 2', # complete from (1) and S(2) 2
|
525
|
-
'S => E . | 0', # complete from (1) and S(0) 1
|
526
|
-
'E => E . + E | 0' # complete from (1) and S(0) 2
|
527
|
-
]
|
528
|
-
compare_state_texts(parse_result.chart[3], expected)
|
529
|
-
|
530
|
-
###################### S(4): abc + def + . ghi
|
531
|
-
# Expectation chart[4]:
|
532
|
-
expected = [
|
533
|
-
'E => E + . E | 2', # Scan from S(3) 3
|
534
|
-
'E => E + . E | 0', # Scan from S(3) 5
|
535
|
-
'E => . E + E | 4', # predict from (1)
|
536
|
-
'E => . id | 4' # predict from (1)
|
537
|
-
]
|
538
|
-
compare_state_texts(parse_result.chart[4], expected)
|
539
|
-
|
540
|
-
###################### S(5): abc + def + ghi .
|
541
|
-
# Expectation chart[5]:
|
542
|
-
expected = [
|
543
|
-
'E => id . | 4', # Scan from S(4) 4
|
544
|
-
'E => E + E . | 2', # complete from (1) and S(4) 1
|
545
|
-
'E => E + E . | 0', # complete from (1) and S(4) 2
|
546
|
-
'E => E . + E | 4', # complete from (1) and S(4) 3
|
547
|
-
'E => E . + E | 2', # complete from (1) and S(2) 2
|
548
|
-
'S => E . | 0', # complete from (1) and S(0) 1
|
549
|
-
'E => E . + E | 0', # complete from (1) and S(0) 2
|
550
|
-
]
|
551
|
-
compare_state_texts(parse_result.chart[5], expected)
|
552
|
-
end
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
it 'should parse an invalid simple input' do
|
557
|
-
# Parse an erroneous input (b is missing)
|
558
|
-
wrong = [
|
559
|
-
Token.new('a', a_),
|
560
|
-
Token.new('a', a_),
|
561
|
-
Token.new('c', c_),
|
562
|
-
Token.new('c', c_)
|
563
|
-
]
|
564
|
-
err_msg = <<-MSG
|
565
|
-
Syntax error at or near token 3>>>c<<<:
|
566
|
-
Expected one of: ['a', 'b'], found a 'c' instead.
|
567
|
-
MSG
|
568
|
-
err = StandardError
|
569
|
-
expect { subject.parse(wrong) }
|
570
|
-
.to raise_error(err, err_msg.chomp)
|
571
|
-
=begin
|
572
|
-
# This code is never reached (because of exception)
|
573
|
-
###################### S(0) == . a a c c
|
574
|
-
# Expectation chart[0]:
|
575
|
-
expected = [
|
576
|
-
'S => . A | 0', # start rule
|
577
|
-
"A => . 'a' A 'c' | 0", # predict from 0
|
578
|
-
"A => . 'b' | 0" # predict from 0
|
579
|
-
]
|
580
|
-
compare_state_texts(parse_result.chart[0], expected)
|
581
|
-
|
582
|
-
###################### S(1) == a . a c c
|
583
|
-
expected = [
|
584
|
-
"A => 'a' . A 'c' | 0", # scan from S(0) 1
|
585
|
-
"A => . 'a' A 'c' | 1", # predict from 0
|
586
|
-
"A => . 'b' | 1" # predict from 0
|
587
|
-
]
|
588
|
-
compare_state_texts(parse_result.chart[1], expected)
|
589
|
-
|
590
|
-
###################### S(2) == a a . c c
|
591
|
-
expected = [
|
592
|
-
"A => 'a' . A 'c' | 1", # scan from S(0) 1
|
593
|
-
"A => . 'a' A 'c' | 2", # predict from 0
|
594
|
-
"A => . 'b' | 2" # predict from 0
|
595
|
-
]
|
596
|
-
compare_state_texts(parse_result.chart[2], expected)
|
597
|
-
|
598
|
-
###################### S(3) == a a c? c
|
599
|
-
state_set_3 = parse_result.chart[3]
|
600
|
-
expect(state_set_3.states).to be_empty # This is an error symptom
|
601
|
-
=end
|
602
|
-
end
|
603
|
-
|
604
|
-
it 'should parse a grammar with nullable nonterminals' do
|
605
|
-
# Grammar 4: A grammar with nullable nonterminal
|
606
|
-
# based on example in "Parsing Techniques" book (D. Grune, C. Jabobs)
|
607
|
-
# Z ::= E.
|
608
|
-
# E ::= E Q F.
|
609
|
-
# E ::= F.
|
610
|
-
# F ::= a.
|
611
|
-
# Q ::= *.
|
612
|
-
# Q ::= /.
|
613
|
-
# Q ::=.
|
614
|
-
t_a = Syntax::VerbatimSymbol.new('a')
|
615
|
-
t_star = Syntax::VerbatimSymbol.new('*')
|
616
|
-
t_slash = Syntax::VerbatimSymbol.new('/')
|
617
|
-
|
618
|
-
builder = Syntax::GrammarBuilder.new
|
619
|
-
builder.add_terminals(t_a, t_star, t_slash)
|
620
|
-
builder.add_production('Z' => 'E')
|
621
|
-
builder.add_production('E' => %w(E Q F))
|
622
|
-
builder.add_production('E' => 'F')
|
623
|
-
builder.add_production('F' => t_a)
|
624
|
-
builder.add_production('Q' => t_star)
|
625
|
-
builder.add_production('Q' => t_slash)
|
626
|
-
builder.add_production('Q' => []) # Empty production
|
627
|
-
tokens = [
|
628
|
-
Token.new('a', t_a),
|
629
|
-
Token.new('a', t_a),
|
630
|
-
Token.new('/', t_slash),
|
631
|
-
Token.new('a', t_a)
|
632
|
-
]
|
633
|
-
|
634
|
-
instance = EarleyParser.new(builder.grammar)
|
635
|
-
expect { instance.parse(tokens) }.not_to raise_error
|
636
|
-
parse_result = instance.parse(tokens)
|
637
|
-
expect(parse_result.success?).to eq(true)
|
638
|
-
|
639
|
-
###################### S(0) == . a a / a
|
640
|
-
# Expectation chart[0]:
|
641
|
-
expected = [
|
642
|
-
'Z => . E | 0', # start rule
|
643
|
-
'E => . E Q F | 0', # predict from (1)
|
644
|
-
'E => . F | 0', # predict from (1)
|
645
|
-
"F => . 'a' | 0" # predict from (3)
|
646
|
-
]
|
647
|
-
compare_state_texts(parse_result.chart[0], expected)
|
648
|
-
|
649
|
-
###################### S(1) == a . a / a
|
650
|
-
# Expectation chart[1]:
|
651
|
-
expected = [
|
652
|
-
"F => 'a' . | 0", # scan from S(0) 4
|
653
|
-
'E => F . | 0', # complete from (1) and S(0) 3
|
654
|
-
'Z => E . | 0', # complete from (2) and S(0) 1
|
655
|
-
'E => E . Q F | 0', # complete from (2) and S(0) 2
|
656
|
-
"Q => . '*' | 1", # Predict from (4)
|
657
|
-
"Q => . '/' | 1", # Predict from (4)
|
658
|
-
'Q => . | 1', # Predict from (4)
|
659
|
-
'E => E Q . F | 0', # Modified predict from (4)
|
660
|
-
"F => . 'a' | 1" # Predict from (8)
|
661
|
-
]
|
662
|
-
compare_state_texts(parse_result.chart[1], expected)
|
663
|
-
|
664
|
-
###################### S(2) == a a . / a
|
665
|
-
# Expectation chart[2]:
|
666
|
-
expected = [
|
667
|
-
"F => 'a' . | 1", # scan from S(1) 9
|
668
|
-
'E => E Q F . | 0', # complete from (1) and S(1) 8
|
669
|
-
'Z => E . | 0', # complete from (1) and S(0) 1
|
670
|
-
'E => E . Q F | 0', # complete from (1) and S(0) 2
|
671
|
-
"Q => . '*' | 2", # Predict from (4)
|
672
|
-
"Q => . '/' | 2", # Predict from (4)
|
673
|
-
'Q => . | 2', # Predict from (4)
|
674
|
-
'E => E Q . F | 0', # Complete from (5) and S(1) 4
|
675
|
-
"F => . 'a' | 2" # Predict from (8)
|
676
|
-
]
|
677
|
-
compare_state_texts(parse_result.chart[2], expected)
|
678
|
-
|
679
|
-
|
680
|
-
###################### S(3) == a a / . a
|
681
|
-
# Expectation chart[3]:
|
682
|
-
expected = [
|
683
|
-
"Q => '/' . | 2", # scan from S(2) 6
|
684
|
-
'E => E Q . F | 0', # complete from (1) and S(1) 4
|
685
|
-
"F => . 'a' | 3" # Predict from (2)
|
686
|
-
]
|
687
|
-
compare_state_texts(parse_result.chart[3], expected)
|
688
|
-
|
689
|
-
|
690
|
-
###################### S(4) == a a / a .
|
691
|
-
# Expectation chart[4]:
|
692
|
-
expected = [
|
693
|
-
"F => 'a' . | 3", # scan from S(3) 3
|
694
|
-
'E => E Q F . | 0', # complete from (1) and S(3) 2
|
695
|
-
'Z => E . | 0', # complete from (2) and S(0) 1
|
696
|
-
'E => E . Q F | 0', # complete from (2) and S(0) 2
|
697
|
-
"Q => . '*' | 4", # Predict from (4)
|
698
|
-
"Q => . '/' | 4", # Predict from (4)
|
699
|
-
'Q => . | 4', # Predict from (4)
|
700
|
-
'E => E Q . F | 0', # Modified predict from (4)
|
701
|
-
"F => . 'a' | 4" # Predict from (8)
|
702
|
-
]
|
703
|
-
compare_state_texts(parse_result.chart[4], expected)
|
704
|
-
end
|
705
|
-
end # context
|
706
|
-
end # describe
|
707
|
-
end # module
|
708
|
-
end # module
|
709
|
-
|
710
|
-
# End of file
|