rley 0.4.01 → 0.4.02
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +2 -2
- data/README.md +3 -3
- data/examples/NLP/mini_en_demo.rb +1 -1
- data/examples/data_formats/JSON/JSON_demo.rb +1 -0
- data/examples/data_formats/JSON/JSON_lexer.rb +4 -4
- data/examples/general/calc/calc_lexer.rb +2 -2
- data/lib/rley.rb +1 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/formatter/debug.rb +2 -2
- data/lib/rley/formatter/json.rb +4 -4
- data/lib/rley/parse_tree_visitor.rb +9 -9
- data/lib/rley/parser/base_parser.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +9 -0
- data/lib/rley/parser/parse_tree_builder.rb +176 -126
- data/lib/rley/parser/parse_tree_factory.rb +57 -0
- data/lib/rley/ptree/non_terminal_node.rb +10 -9
- data/lib/rley/ptree/parse_tree_node.rb +10 -5
- data/lib/rley/ptree/terminal_node.rb +14 -6
- data/lib/rley/sppf/sppf_node.rb +2 -2
- data/lib/rley/{parser → tokens}/token.rb +1 -4
- data/lib/rley/{ptree → tokens}/token_range.rb +1 -1
- data/spec/rley/formatter/debug_spec.rb +16 -16
- data/spec/rley/formatter/json_spec.rb +8 -8
- data/spec/rley/parse_forest_visitor_spec.rb +1 -1
- data/spec/rley/parse_tree_visitor_spec.rb +28 -28
- data/spec/rley/parser/error_reason_spec.rb +3 -3
- data/spec/rley/parser/gfg_chart_spec.rb +2 -2
- data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
- data/spec/rley/parser/gfg_parsing_spec.rb +2 -2
- data/spec/rley/parser/groucho_spec.rb +1 -1
- data/spec/rley/parser/parse_tracer_spec.rb +2 -2
- data/spec/rley/parser/parse_tree_builder_spec.rb +213 -140
- data/spec/rley/parser/parse_tree_factory_spec.rb +85 -0
- data/spec/rley/parser/parse_walker_factory_spec.rb +11 -10
- data/spec/rley/ptree/non_terminal_node_spec.rb +23 -20
- data/spec/rley/ptree/terminal_node_spec.rb +7 -12
- data/spec/rley/sppf/alternative_node_spec.rb +2 -2
- data/spec/rley/sppf/non_terminal_node_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/expectation_helper.rb +1 -1
- data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
- data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
- data/spec/rley/support/grammar_helper.rb +3 -3
- data/spec/rley/support/grammar_l0_helper.rb +2 -2
- data/spec/rley/support/grammar_pb_helper.rb +2 -2
- data/spec/rley/{ptree → tokens}/token_range_spec.rb +2 -2
- data/spec/rley/{parser → tokens}/token_spec.rb +2 -2
- metadata +11 -17
- data/lib/rley/parser/chart.rb +0 -82
- data/lib/rley/parser/earley_parser.rb +0 -203
- data/lib/rley/parser/parsing.rb +0 -265
- data/spec/rley/parser/chart_spec.rb +0 -120
- data/spec/rley/parser/earley_parser_spec.rb +0 -710
- data/spec/rley/parser/parsing_spec.rb +0 -408
@@ -1,120 +0,0 @@
|
|
1
|
-
require_relative '../../spec_helper'
|
2
|
-
require 'stringio'
|
3
|
-
|
4
|
-
require_relative '../../../lib/rley/syntax/terminal'
|
5
|
-
require_relative '../../../lib/rley/syntax/non_terminal'
|
6
|
-
require_relative '../../../lib/rley/syntax/production'
|
7
|
-
require_relative '../../../lib/rley/parser/token'
|
8
|
-
require_relative '../../../lib/rley/parser/dotted_item'
|
9
|
-
require_relative '../../../lib/rley/parser/parse_state'
|
10
|
-
require_relative '../../../lib/rley/parser/parse_tracer'
|
11
|
-
|
12
|
-
# Load the class under test
|
13
|
-
require_relative '../../../lib/rley/parser/chart'
|
14
|
-
|
15
|
-
module Rley # Open this namespace to avoid module qualifier prefixes
|
16
|
-
module Parser # Open this namespace to avoid module qualifier prefixes
|
17
|
-
describe Chart do
|
18
|
-
let(:count_token) { 20 }
|
19
|
-
let(:sample_start_symbol) { double('fake_non-terminal') }
|
20
|
-
let(:dotted_rule) { double('fake-dotted-item') }
|
21
|
-
|
22
|
-
let(:output) { StringIO.new('', 'w') }
|
23
|
-
|
24
|
-
let(:token_seq) do
|
25
|
-
literals = %w(I saw John with a dog)
|
26
|
-
literals.map { |lexeme| Token.new(lexeme, nil) }
|
27
|
-
end
|
28
|
-
|
29
|
-
let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
|
30
|
-
|
31
|
-
# Default instantiation rule
|
32
|
-
subject do
|
33
|
-
allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
|
34
|
-
Chart.new([ dotted_rule ], count_token, sample_tracer)
|
35
|
-
end
|
36
|
-
|
37
|
-
context 'Initialization:' do
|
38
|
-
it 'should be created with start dotted rule, token count, tracer' do
|
39
|
-
allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
|
40
|
-
expect { Chart.new([ dotted_rule ], count_token, sample_tracer) }
|
41
|
-
.not_to raise_error
|
42
|
-
end
|
43
|
-
|
44
|
-
it 'should have a seed state in first state_set' do
|
45
|
-
seed_state = ParseState.new(dotted_rule, 0)
|
46
|
-
expect(subject[0].states).to eq([seed_state])
|
47
|
-
|
48
|
-
# Shorthand syntax
|
49
|
-
expect(subject[0].first).to eq(seed_state)
|
50
|
-
end
|
51
|
-
|
52
|
-
it 'should have the correct state_set count' do
|
53
|
-
expect(subject.state_sets.size).to eq(count_token + 1)
|
54
|
-
end
|
55
|
-
|
56
|
-
it 'should know the start dotted rule' do
|
57
|
-
expect(subject.start_dotted_rule).to eq(dotted_rule)
|
58
|
-
end
|
59
|
-
|
60
|
-
it 'should know the start symbol' do
|
61
|
-
expect(subject.start_symbol).to eq(sample_start_symbol)
|
62
|
-
end
|
63
|
-
|
64
|
-
it 'should have at least one non-empty state set' do
|
65
|
-
expect(subject.last_index).to eq(0)
|
66
|
-
end
|
67
|
-
|
68
|
-
it 'should reference a tracer' do
|
69
|
-
expect(subject.tracer).to eq(sample_tracer)
|
70
|
-
end
|
71
|
-
end # context
|
72
|
-
|
73
|
-
context 'Provided services:' do
|
74
|
-
let(:t_a) { Syntax::Terminal.new('A') }
|
75
|
-
let(:t_b) { Syntax::Terminal.new('B') }
|
76
|
-
let(:t_c) { Syntax::Terminal.new('C') }
|
77
|
-
let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
|
78
|
-
|
79
|
-
let(:sample_prod) do
|
80
|
-
Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
|
81
|
-
end
|
82
|
-
|
83
|
-
let(:origin_val) { 3 }
|
84
|
-
let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
|
85
|
-
let(:complete_rule) { DottedItem.new(sample_prod, 3) }
|
86
|
-
let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
|
87
|
-
let(:sample_tracer) { ParseTracer.new(1, output, token_seq) }
|
88
|
-
|
89
|
-
# Factory method.
|
90
|
-
def parse_state(origin, aDottedRule)
|
91
|
-
ParseState.new(aDottedRule, origin)
|
92
|
-
end
|
93
|
-
|
94
|
-
|
95
|
-
it 'should trace its initialization' do
|
96
|
-
subject[0] # Force constructor call here
|
97
|
-
expectation = <<-SNIPPET
|
98
|
-
['I', 'saw', 'John', 'with', 'a', 'dog']
|
99
|
-
|. I . saw . John . with . a . dog .|
|
100
|
-
|> . . . . . .| [0:0] sentence => A B . C
|
101
|
-
SNIPPET
|
102
|
-
expect(output.string).to eq(expectation)
|
103
|
-
end
|
104
|
-
|
105
|
-
it 'should trace parse state pushing' do
|
106
|
-
subject[0] # Force constructor call here
|
107
|
-
output.string = ''
|
108
|
-
|
109
|
-
subject.push_state(dotted_rule, 3, 5, :prediction)
|
110
|
-
expectation = <<-SNIPPET
|
111
|
-
|. . . > .| [3:5] sentence => A B . C
|
112
|
-
SNIPPET
|
113
|
-
expect(output.string).to eq(expectation)
|
114
|
-
end
|
115
|
-
end # context
|
116
|
-
end # describe
|
117
|
-
end # module
|
118
|
-
end # module
|
119
|
-
|
120
|
-
# End of file
|
@@ -1,710 +0,0 @@
|
|
1
|
-
require_relative '../../spec_helper'
|
2
|
-
require 'stringio'
|
3
|
-
|
4
|
-
require_relative '../../../lib/rley/syntax/verbatim_symbol'
|
5
|
-
require_relative '../../../lib/rley/syntax/non_terminal'
|
6
|
-
require_relative '../../../lib/rley/syntax/production'
|
7
|
-
require_relative '../../../lib/rley/syntax/grammar_builder'
|
8
|
-
require_relative '../../../lib/rley/parser/token'
|
9
|
-
require_relative '../../../lib/rley/parser/dotted_item'
|
10
|
-
require_relative '../support/ambiguous_grammar_helper'
|
11
|
-
# Load the class under test
|
12
|
-
require_relative '../../../lib/rley/parser/earley_parser'
|
13
|
-
|
14
|
-
module Rley # Open this namespace to avoid module qualifier prefixes
|
15
|
-
module Parser # Open this namespace to avoid module qualifier prefixes
|
16
|
-
describe EarleyParser do
|
17
|
-
=begin
|
18
|
-
let(:kw_true) { Syntax::VerbatimSymbol.new('true') }
|
19
|
-
let(:kw_false) { Syntax::VerbatimSymbol.new('false') }
|
20
|
-
let(:kw_null) { Syntax::VerbatimSymbol.new('null') }
|
21
|
-
let(:number) do
|
22
|
-
number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
|
23
|
-
Syntax::Literal.new('number', number_pattern)
|
24
|
-
end
|
25
|
-
let(:string) do
|
26
|
-
string_pattern = /"([^\\"]|\\.)*"/
|
27
|
-
Syntax::Literal('string', string_pattern)
|
28
|
-
end
|
29
|
-
let(:lbracket) { Syntax::VerbatimSymbol.new('[') }
|
30
|
-
let(:rbracket) { Syntax::VerbatimSymbol.new(']') }
|
31
|
-
let(:comma) { Syntax::VerbatimSymbol.new(',') }
|
32
|
-
let(:array) { Syntax::NonTerminal.new('Array') }
|
33
|
-
let(:object) { Syntax::NonTerminal.new('Object') }
|
34
|
-
|
35
|
-
let(:array_prod) do
|
36
|
-
Production.new(array, )
|
37
|
-
end
|
38
|
-
=end
|
39
|
-
|
40
|
-
|
41
|
-
# Grammar 1: A very simple language
|
42
|
-
# (based on example in N. Wirth "Compiler Construction" book, p. 6)
|
43
|
-
# S => A.
|
44
|
-
# A => "a" A "c".
|
45
|
-
# A => "b".
|
46
|
-
# Let's create the grammar piece by piece
|
47
|
-
let(:nt_S) { Syntax::NonTerminal.new('S') }
|
48
|
-
let(:nt_A) { Syntax::NonTerminal.new('A') }
|
49
|
-
let(:a_) { Syntax::VerbatimSymbol.new('a') }
|
50
|
-
let(:b_) { Syntax::VerbatimSymbol.new('b') }
|
51
|
-
let(:c_) { Syntax::VerbatimSymbol.new('c') }
|
52
|
-
let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
|
53
|
-
let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
|
54
|
-
let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
|
55
|
-
let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
|
56
|
-
|
57
|
-
# Helper method that mimicks the output of a tokenizer
|
58
|
-
# for the language specified by grammar_abc
|
59
|
-
def grm1_tokens()
|
60
|
-
tokens = [
|
61
|
-
Token.new('a', a_),
|
62
|
-
Token.new('a', a_),
|
63
|
-
Token.new('b', b_),
|
64
|
-
Token.new('c', c_),
|
65
|
-
Token.new('c', c_)
|
66
|
-
]
|
67
|
-
|
68
|
-
return tokens
|
69
|
-
end
|
70
|
-
|
71
|
-
|
72
|
-
# Grammar 2: A simple arithmetic expression language
|
73
|
-
# (based on example in article on Earley's algorithm in Wikipedia)
|
74
|
-
# P ::= S.
|
75
|
-
# S ::= S "+" M.
|
76
|
-
# S ::= M.
|
77
|
-
# M ::= M "*" M.
|
78
|
-
# M ::= T.
|
79
|
-
# T ::= an integer number token.
|
80
|
-
# Let's create the grammar piece by piece
|
81
|
-
let(:nt_P) { Syntax::NonTerminal.new('P') }
|
82
|
-
let(:nt_M) { Syntax::NonTerminal.new('M') }
|
83
|
-
let(:nt_T) { Syntax::NonTerminal.new('T') }
|
84
|
-
let(:plus) { Syntax::VerbatimSymbol.new('+') }
|
85
|
-
let(:star) { Syntax::VerbatimSymbol.new('*') }
|
86
|
-
let(:integer) do
|
87
|
-
integer_pattern = /[-+]?[0-9]+/ # Decimal notation
|
88
|
-
Syntax::Literal.new('integer', integer_pattern)
|
89
|
-
end
|
90
|
-
let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
|
91
|
-
let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
|
92
|
-
let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
|
93
|
-
let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_T]) }
|
94
|
-
let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
|
95
|
-
let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
|
96
|
-
let(:grammar_expr) do
|
97
|
-
all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
|
98
|
-
Syntax::Grammar.new(all_prods)
|
99
|
-
end
|
100
|
-
|
101
|
-
# Helper method that mimicks the output of a tokenizer
|
102
|
-
# for the language specified by grammar_expr
|
103
|
-
def grm2_tokens()
|
104
|
-
tokens = [
|
105
|
-
Token.new('2', integer),
|
106
|
-
Token.new('+', plus),
|
107
|
-
Token.new('3', integer),
|
108
|
-
Token.new('*', star),
|
109
|
-
Token.new('4', integer)
|
110
|
-
]
|
111
|
-
|
112
|
-
return tokens
|
113
|
-
end
|
114
|
-
|
115
|
-
|
116
|
-
# Default instantiation rule
|
117
|
-
subject { EarleyParser.new(grammar_abc) }
|
118
|
-
|
119
|
-
context 'Initialization:' do
|
120
|
-
it 'should be created with a grammar' do
|
121
|
-
expect { EarleyParser.new(grammar_abc) }.not_to raise_error
|
122
|
-
expect { EarleyParser.new(grammar_expr) }.not_to raise_error
|
123
|
-
end
|
124
|
-
|
125
|
-
it 'should know its grammar' do
|
126
|
-
expect(subject.grammar).to eq(grammar_abc)
|
127
|
-
end
|
128
|
-
|
129
|
-
it 'should know its dotted items' do
|
130
|
-
expect(subject.dotted_items.size).to eq(8)
|
131
|
-
end
|
132
|
-
|
133
|
-
it 'should have its start mapping initialized' do
|
134
|
-
expect(subject.start_mapping.size).to eq(2)
|
135
|
-
|
136
|
-
start_items_S = subject.start_mapping[nt_S]
|
137
|
-
expect(start_items_S.size).to eq(1)
|
138
|
-
expect(start_items_S[0].production).to eq(prod_S)
|
139
|
-
|
140
|
-
start_items_A = subject.start_mapping[nt_A]
|
141
|
-
expect(start_items_A.size).to eq(2)
|
142
|
-
|
143
|
-
# Assuming that dotted_items are created in same order
|
144
|
-
# than production in grammar.
|
145
|
-
expect(start_items_A[0].production).to eq(prod_A1)
|
146
|
-
expect(start_items_A[1].production).to eq(prod_A2)
|
147
|
-
end
|
148
|
-
|
149
|
-
it 'should have its next mapping initialized' do
|
150
|
-
expect(subject.next_mapping.size).to eq(5)
|
151
|
-
end
|
152
|
-
end # context
|
153
|
-
|
154
|
-
context 'Parsing: ' do
|
155
|
-
# Helper method. Compare the data from all the parse states
|
156
|
-
# of a given StateSet with an array of expectation string.
|
157
|
-
def compare_state_texts(aStateSet, expectations)
|
158
|
-
(0...expectations.size).each do |i|
|
159
|
-
expect(aStateSet.states[i].to_s).to eq(expectations[i])
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
it 'should parse a valid simple input' do
|
164
|
-
parse_result = subject.parse(grm1_tokens)
|
165
|
-
expect(parse_result.success?).to eq(true)
|
166
|
-
expect(parse_result.ambiguous?).to eq(false)
|
167
|
-
|
168
|
-
######################
|
169
|
-
# Expectation chart[0]:
|
170
|
-
expected = [
|
171
|
-
'S => . A | 0', # start rule
|
172
|
-
"A => . 'a' A 'c' | 0", # predict from 0
|
173
|
-
"A => . 'b' | 0" # predict from 0
|
174
|
-
]
|
175
|
-
compare_state_texts(parse_result.chart[0], expected)
|
176
|
-
|
177
|
-
######################
|
178
|
-
# Expectation chart[1]:
|
179
|
-
expected = [
|
180
|
-
"A => 'a' . A 'c' | 0", # scan from S(0) 1
|
181
|
-
"A => . 'a' A 'c' | 1", # predict from 0
|
182
|
-
"A => . 'b' | 1" # predict from 0
|
183
|
-
]
|
184
|
-
state_set_1 = parse_result.chart[1]
|
185
|
-
expect(state_set_1.states.size).to eq(3)
|
186
|
-
compare_state_texts(state_set_1, expected)
|
187
|
-
|
188
|
-
######################
|
189
|
-
# Expectation chart[2]:
|
190
|
-
expected = [
|
191
|
-
"A => 'a' . A 'c' | 1", # scan from S(0) 1
|
192
|
-
"A => . 'a' A 'c' | 2", # predict from 0
|
193
|
-
"A => . 'b' | 2" # predict from 0
|
194
|
-
]
|
195
|
-
state_set_2 = parse_result.chart[2]
|
196
|
-
expect(state_set_2.states.size).to eq(3)
|
197
|
-
compare_state_texts(state_set_2, expected)
|
198
|
-
|
199
|
-
######################
|
200
|
-
# Expectation chart[3]:
|
201
|
-
expected = [
|
202
|
-
"A => 'b' . | 2", # scan from S(2) 2
|
203
|
-
"A => 'a' A . 'c' | 1" # complete from 0 and S(2) 0
|
204
|
-
]
|
205
|
-
state_set_3 = parse_result.chart[3]
|
206
|
-
expect(state_set_3.states.size).to eq(2)
|
207
|
-
compare_state_texts(state_set_3, expected)
|
208
|
-
|
209
|
-
|
210
|
-
######################
|
211
|
-
# Expectation chart[4]:
|
212
|
-
expected = [
|
213
|
-
"A => 'a' A 'c' . | 1", # scan from S(3) 1
|
214
|
-
"A => 'a' A . 'c' | 0" # complete from 0 and S(1) 0
|
215
|
-
]
|
216
|
-
state_set_4 = parse_result.chart[4]
|
217
|
-
expect(state_set_4.states.size).to eq(2)
|
218
|
-
compare_state_texts(state_set_4, expected)
|
219
|
-
|
220
|
-
######################
|
221
|
-
# Expectation chart[5]:
|
222
|
-
expected = [
|
223
|
-
"A => 'a' A 'c' . | 0", # scan from S(4) 1
|
224
|
-
'S => A . | 0' # complete from 0 and S(0) 0
|
225
|
-
]
|
226
|
-
state_set_5 = parse_result.chart[5]
|
227
|
-
expect(state_set_5.states.size).to eq(2)
|
228
|
-
compare_state_texts(state_set_5, expected)
|
229
|
-
end
|
230
|
-
|
231
|
-
it 'should trace a parse with level 1' do
|
232
|
-
# Substitute temporarily $stdout by a StringIO
|
233
|
-
prev_ostream = $stdout
|
234
|
-
$stdout = StringIO.new('', 'w')
|
235
|
-
|
236
|
-
trace_level = 1
|
237
|
-
subject.parse(grm1_tokens, trace_level)
|
238
|
-
expectations = <<-SNIPPET
|
239
|
-
['a', 'a', 'b', 'c', 'c']
|
240
|
-
|. a . a . b . c . c .|
|
241
|
-
|> . . . . .| [0:0] S => . A
|
242
|
-
|> . . . . .| [0:0] A => . 'a' A 'c'
|
243
|
-
|> . . . . .| [0:0] A => . 'b'
|
244
|
-
|[---] . . . .| [0:1] A => 'a' . A 'c'
|
245
|
-
|. > . . . .| [1:1] A => . 'a' A 'c'
|
246
|
-
|. > . . . .| [1:1] A => . 'b'
|
247
|
-
|. [---] . . .| [1:2] A => 'a' . A 'c'
|
248
|
-
|. . > . . .| [2:2] A => . 'a' A 'c'
|
249
|
-
|. . > . . .| [2:2] A => . 'b'
|
250
|
-
|. . [---] . .| [2:3] A => 'b' .
|
251
|
-
|. [-------> . .| [1:3] A => 'a' A . 'c'
|
252
|
-
|. . . [---] .| [3:4] A => 'a' A 'c' .
|
253
|
-
|[---------------> .| [0:4] A => 'a' A . 'c'
|
254
|
-
|. . . . [---]| [4:5] A => 'a' A 'c' .
|
255
|
-
|[===================]| [0:5] S => A .
|
256
|
-
SNIPPET
|
257
|
-
expect($stdout.string).to eq(expectations)
|
258
|
-
|
259
|
-
# Restore standard ouput stream
|
260
|
-
$stdout = prev_ostream
|
261
|
-
end
|
262
|
-
|
263
|
-
it 'should parse a valid simple expression' do
|
264
|
-
instance = EarleyParser.new(grammar_expr)
|
265
|
-
parse_result = instance.parse(grm2_tokens)
|
266
|
-
expect(parse_result.success?).to eq(true)
|
267
|
-
expect(parse_result.ambiguous?).to eq(false)
|
268
|
-
|
269
|
-
###################### S(0): . 2 + 3 * 4
|
270
|
-
# Expectation chart[0]:
|
271
|
-
expected = [
|
272
|
-
'P => . S | 0', # start rule
|
273
|
-
"S => . S '+' M | 0", # predict from (1)
|
274
|
-
'S => . M | 0', # predict from (1)
|
275
|
-
"M => . M '*' T | 0", # predict from (4)
|
276
|
-
'M => . T | 0', # predict from (4)
|
277
|
-
'T => . integer | 0' # predict from (4)
|
278
|
-
]
|
279
|
-
compare_state_texts(parse_result.chart[0], expected)
|
280
|
-
|
281
|
-
|
282
|
-
###################### S(1): 2 . + 3 * 4
|
283
|
-
# Expectation chart[1]:
|
284
|
-
expected = [
|
285
|
-
'T => integer . | 0', # scan from S(0) 6
|
286
|
-
'M => T . | 0', # complete from (1) and S(0) 5
|
287
|
-
'S => M . | 0', # complete from (2) and S(0) 3
|
288
|
-
"M => M . '*' T | 0", # complete from (2) and S(0) 4
|
289
|
-
'P => S . | 0', # complete from (4) and S(0) 1
|
290
|
-
"S => S . '+' M | 0" # complete from (4) and S(0) 2
|
291
|
-
]
|
292
|
-
compare_state_texts(parse_result.chart[1], expected)
|
293
|
-
|
294
|
-
|
295
|
-
###################### S(2): 2 + . 3 * 4
|
296
|
-
# Expectation chart[2]:
|
297
|
-
expected = [
|
298
|
-
"S => S '+' . M | 0", # scan from S(1) 6
|
299
|
-
"M => . M '*' T | 2", # predict from (1)
|
300
|
-
'M => . T | 2', # predict from (1)
|
301
|
-
'T => . integer | 2' # predict from (3)
|
302
|
-
]
|
303
|
-
compare_state_texts(parse_result.chart[2], expected)
|
304
|
-
|
305
|
-
|
306
|
-
###################### S(3): 2 + 3 . * 4
|
307
|
-
# Expectation chart[3]:
|
308
|
-
expected = [
|
309
|
-
'T => integer . | 2', # scan from S(2) 4
|
310
|
-
'M => T . | 2', # complete from (1) and S(2) 3
|
311
|
-
"S => S '+' M . | 0", # complete from (1) and S(2) 1
|
312
|
-
"M => M . '*' T | 2", # complete from (2) and S(2) 2
|
313
|
-
'P => S . | 0' # complete from (4) and S(0) 1
|
314
|
-
]
|
315
|
-
compare_state_texts(parse_result.chart[3], expected)
|
316
|
-
|
317
|
-
###################### S(4): 2 + 3 * . 4
|
318
|
-
# Expectation chart[4]:
|
319
|
-
expected = [
|
320
|
-
"M => M '*' . T | 2", # scan from S(3) 4
|
321
|
-
'T => . integer | 4' # predict from (1)
|
322
|
-
]
|
323
|
-
compare_state_texts(parse_result.chart[4], expected)
|
324
|
-
|
325
|
-
###################### S(5): 2 + 3 * 4 .
|
326
|
-
# Expectation chart[5]:
|
327
|
-
expected = [
|
328
|
-
'T => integer . | 4', # scan from S(4) 2
|
329
|
-
"M => M '*' T . | 2", # complete from (1) and S(4) 1
|
330
|
-
"S => S '+' M . | 0", # complete from (2) and S(2) 1
|
331
|
-
"M => M . '*' T | 2", # complete from (2) and S(2) 2
|
332
|
-
'P => S . | 0' # complete from (3) and S(2) 2
|
333
|
-
]
|
334
|
-
compare_state_texts(parse_result.chart[5], expected)
|
335
|
-
end
|
336
|
-
|
337
|
-
it 'should parse a nullable grammar' do
|
338
|
-
# Simple but problematic grammar for the original Earley parser
|
339
|
-
# (based on example in D. Grune, C. Jacobs "Parsing Techniques" book)
|
340
|
-
# Ss => A A 'x';
|
341
|
-
# A => ;
|
342
|
-
t_x = Syntax::VerbatimSymbol.new('x')
|
343
|
-
|
344
|
-
builder = Syntax::GrammarBuilder.new
|
345
|
-
builder.add_terminals(t_x)
|
346
|
-
builder.add_production('Ss' => %w(A A x))
|
347
|
-
builder.add_production('A' => [])
|
348
|
-
tokens = [ Token.new('x', t_x) ]
|
349
|
-
|
350
|
-
instance = EarleyParser.new(builder.grammar)
|
351
|
-
expect { instance.parse(tokens) }.not_to raise_error
|
352
|
-
parse_result = instance.parse(tokens)
|
353
|
-
expect(parse_result.success?).to eq(true)
|
354
|
-
###################### S(0): . x
|
355
|
-
# Expectation chart[0]:
|
356
|
-
expected = [
|
357
|
-
"Ss => . A A 'x' | 0", # Start rule
|
358
|
-
'A => . | 0', # predict from (1)
|
359
|
-
"Ss => A . A 'x' | 0", # modified predict from (1)
|
360
|
-
"Ss => A A . 'x' | 0" # modified predict from (1)
|
361
|
-
]
|
362
|
-
compare_state_texts(parse_result.chart[0], expected)
|
363
|
-
|
364
|
-
###################### S(1): x .
|
365
|
-
# Expectation chart[1]:
|
366
|
-
expected = [
|
367
|
-
"Ss => A A 'x' . | 0" # scan from S(0) 4
|
368
|
-
]
|
369
|
-
compare_state_texts(parse_result.chart[1], expected)
|
370
|
-
end
|
371
|
-
|
372
|
-
it 'should parse an ambiguous grammar (I)' do
|
373
|
-
# Grammar 3: A ambiguous arithmetic expression language
|
374
|
-
# (based on example in article on Earley's algorithm in Wikipedia)
|
375
|
-
# P => S.
|
376
|
-
# S => S "+" S.
|
377
|
-
# S => S "*" S.
|
378
|
-
# S => L.
|
379
|
-
# L => an integer number token.
|
380
|
-
t_int = Syntax::Literal.new('integer', /[-+]?\d+/)
|
381
|
-
t_plus = Syntax::VerbatimSymbol.new('+')
|
382
|
-
t_star = Syntax::VerbatimSymbol.new('*')
|
383
|
-
|
384
|
-
builder = Syntax::GrammarBuilder.new
|
385
|
-
builder.add_terminals(t_int, t_plus, t_star)
|
386
|
-
builder.add_production('P' => 'S')
|
387
|
-
builder.add_production('S' => %w(S + S))
|
388
|
-
builder.add_production('S' => %w(S * S))
|
389
|
-
builder.add_production('S' => 'L')
|
390
|
-
builder.add_production('L' => 'integer')
|
391
|
-
tokens = [
|
392
|
-
Token.new('2', t_int),
|
393
|
-
Token.new('+', t_plus),
|
394
|
-
Token.new('3', t_int),
|
395
|
-
Token.new('*', t_star),
|
396
|
-
Token.new('4', t_int)
|
397
|
-
]
|
398
|
-
instance = EarleyParser.new(builder.grammar)
|
399
|
-
expect { instance.parse(tokens) }.not_to raise_error
|
400
|
-
parse_result = instance.parse(tokens)
|
401
|
-
expect(parse_result.success?).to eq(true)
|
402
|
-
expect(parse_result.ambiguous?).to eq(true)
|
403
|
-
|
404
|
-
###################### S(0): . 2 + 3 * 4
|
405
|
-
# Expectation chart[0]:
|
406
|
-
expected = [
|
407
|
-
'P => . S | 0', # Start rule
|
408
|
-
"S => . S '+' S | 0", # predict from (1)
|
409
|
-
"S => . S '*' S | 0", # predict from (1)
|
410
|
-
'S => . L | 0', # predict from (1)
|
411
|
-
'L => . integer | 0' # predict from (4)
|
412
|
-
]
|
413
|
-
compare_state_texts(parse_result.chart[0], expected)
|
414
|
-
|
415
|
-
###################### S(1): 2 . + 3 * 4
|
416
|
-
# Expectation chart[1]:
|
417
|
-
expected = [
|
418
|
-
'L => integer . | 0', # scan from S(0) 4
|
419
|
-
'S => L . | 0', # complete from (1) and S(0) 4
|
420
|
-
'P => S . | 0', # complete from (2) and S(0) 1
|
421
|
-
"S => S . '+' S | 0", # complete from (2) and S(0) 2
|
422
|
-
"S => S . '*' S | 0", # complete from (2) and S(0) 3
|
423
|
-
]
|
424
|
-
compare_state_texts(parse_result.chart[1], expected)
|
425
|
-
|
426
|
-
###################### S(2): 2 + . 3 * 4
|
427
|
-
# Expectation chart[2]:
|
428
|
-
expected = [
|
429
|
-
"S => S '+' . S | 0", # scan from S(1) 4
|
430
|
-
"S => . S '+' S | 2", # predict from (1)
|
431
|
-
"S => . S '*' S | 2", # predict from (1)
|
432
|
-
'S => . L | 2', # predict from (1)
|
433
|
-
'L => . integer | 2' # predict from (4)
|
434
|
-
]
|
435
|
-
compare_state_texts(parse_result.chart[2], expected)
|
436
|
-
|
437
|
-
###################### S(3): 2 + 3 . * 4
|
438
|
-
# Expectation chart[3]:
|
439
|
-
expected = [
|
440
|
-
'L => integer . | 2', # scan from S(2) 5
|
441
|
-
'S => L . | 2', # complete from (1) and S(2) 4
|
442
|
-
"S => S '+' S . | 0", # complete from (2) and S(2) 1
|
443
|
-
"S => S . '+' S | 2", # complete from (2) and S(2) 2
|
444
|
-
"S => S . '*' S | 2", # complete from (2) and S(2) 3
|
445
|
-
'P => S . | 0', # complete from (2) and S(0) 1
|
446
|
-
"S => S . '+' S | 0", # complete from (2) and S(0) 2
|
447
|
-
"S => S . '*' S | 0", # complete from (2) and S(0) 3
|
448
|
-
]
|
449
|
-
compare_state_texts(parse_result.chart[3], expected)
|
450
|
-
|
451
|
-
###################### S(4): 2 + 3 * . 4
|
452
|
-
# Expectation chart[4]:
|
453
|
-
expected = [
|
454
|
-
"S => S '*' . S | 2", # scan from S(3) 5
|
455
|
-
"S => S '*' . S | 0", # scan from S(3) 8
|
456
|
-
"S => . S '+' S | 4", # predict from (1)
|
457
|
-
"S => . S '*' S | 4", # predict from (1)
|
458
|
-
'S => . L | 4', # predict from (1)
|
459
|
-
'L => . integer | 4' # predict from (4)
|
460
|
-
]
|
461
|
-
compare_state_texts(parse_result.chart[4], expected)
|
462
|
-
|
463
|
-
###################### S(5): 2 + 3 * 4 .
|
464
|
-
# Expectation chart[5]:
|
465
|
-
expected = [
|
466
|
-
'L => integer . | 4', # scan from S(4) 6
|
467
|
-
'S => L . | 4', # complete from (1) and S(4) 5
|
468
|
-
"S => S '*' S . | 2", # complete from (2) and S(4) 1
|
469
|
-
"S => S '*' S . | 0", # complete from (2) and S(4) 2
|
470
|
-
"S => S . '+' S | 4", # complete from (2) and S(4) 3
|
471
|
-
"S => S . '*' S | 4", # complete from (2) and S(4) 4
|
472
|
-
"S => S '+' S . | 0", # complete from (2) and S(2) 1
|
473
|
-
"S => S . '+' S | 2", # complete from (2) and S(2) 2
|
474
|
-
"S => S . '*' S | 2", # complete from (2) and S(2) 3
|
475
|
-
'P => S . | 0', # complete from (2) and S(0) 1
|
476
|
-
"S => S . '+' S | 0", # complete from (2) and S(0) 2
|
477
|
-
"S => S . '*' S | 0" # complete from (2) and S(0) 3
|
478
|
-
]
|
479
|
-
compare_state_texts(parse_result.chart[5], expected)
|
480
|
-
end
|
481
|
-
|
482
|
-
it 'should parse an ambiguous grammar (II)' do
|
483
|
-
extend(AmbiguousGrammarHelper)
|
484
|
-
grammar = grammar_builder.grammar
|
485
|
-
instance = EarleyParser.new(grammar)
|
486
|
-
tokens = tokenize('abc + def + ghi', grammar)
|
487
|
-
expect { instance.parse(tokens) }.not_to raise_error
|
488
|
-
parse_result = instance.parse(tokens)
|
489
|
-
expect(parse_result.success?).to eq(true)
|
490
|
-
expect(parse_result.ambiguous?).to eq(true)
|
491
|
-
|
492
|
-
###################### S(0): . abc + def + ghi
|
493
|
-
# Expectation chart[0]:
|
494
|
-
expected = [
|
495
|
-
'S => . E | 0', # Start rule
|
496
|
-
'E => . E + E | 0', # predict from (1)
|
497
|
-
'E => . id | 0' # predict from (1)
|
498
|
-
]
|
499
|
-
compare_state_texts(parse_result.chart[0], expected)
|
500
|
-
|
501
|
-
###################### S(1): abc . + def + ghi
|
502
|
-
# Expectation chart[1]:
|
503
|
-
expected = [
|
504
|
-
'E => id . | 0', # scan from S(0) 3
|
505
|
-
'S => E . | 0', # complete from (1) and S(0) 2
|
506
|
-
'E => E . + E | 0' # complete from (1) and S(0) 3
|
507
|
-
]
|
508
|
-
compare_state_texts(parse_result.chart[1], expected)
|
509
|
-
|
510
|
-
###################### S(2): abc + . def + ghi
|
511
|
-
# Expectation chart[2]:
|
512
|
-
expected = [
|
513
|
-
'E => E + . E | 0', # Scan from S(1) 3
|
514
|
-
'E => . E + E | 2', # predict from (1)
|
515
|
-
'E => . id | 2' # predict from (1)
|
516
|
-
]
|
517
|
-
compare_state_texts(parse_result.chart[2], expected)
|
518
|
-
|
519
|
-
###################### S(3): abc + def . + ghi
|
520
|
-
# Expectation chart[3]:
|
521
|
-
expected = [
|
522
|
-
'E => id . | 2', # Scan from S(2) 3
|
523
|
-
'E => E + E . | 0', # complete from (1) and S(2) 1
|
524
|
-
'E => E . + E | 2', # complete from (1) and S(2) 2
|
525
|
-
'S => E . | 0', # complete from (1) and S(0) 1
|
526
|
-
'E => E . + E | 0' # complete from (1) and S(0) 2
|
527
|
-
]
|
528
|
-
compare_state_texts(parse_result.chart[3], expected)
|
529
|
-
|
530
|
-
###################### S(4): abc + def + . ghi
|
531
|
-
# Expectation chart[4]:
|
532
|
-
expected = [
|
533
|
-
'E => E + . E | 2', # Scan from S(3) 3
|
534
|
-
'E => E + . E | 0', # Scan from S(3) 5
|
535
|
-
'E => . E + E | 4', # predict from (1)
|
536
|
-
'E => . id | 4' # predict from (1)
|
537
|
-
]
|
538
|
-
compare_state_texts(parse_result.chart[4], expected)
|
539
|
-
|
540
|
-
###################### S(5): abc + def + ghi .
|
541
|
-
# Expectation chart[5]:
|
542
|
-
expected = [
|
543
|
-
'E => id . | 4', # Scan from S(4) 4
|
544
|
-
'E => E + E . | 2', # complete from (1) and S(4) 1
|
545
|
-
'E => E + E . | 0', # complete from (1) and S(4) 2
|
546
|
-
'E => E . + E | 4', # complete from (1) and S(4) 3
|
547
|
-
'E => E . + E | 2', # complete from (1) and S(2) 2
|
548
|
-
'S => E . | 0', # complete from (1) and S(0) 1
|
549
|
-
'E => E . + E | 0', # complete from (1) and S(0) 2
|
550
|
-
]
|
551
|
-
compare_state_texts(parse_result.chart[5], expected)
|
552
|
-
end
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
it 'should parse an invalid simple input' do
|
557
|
-
# Parse an erroneous input (b is missing)
|
558
|
-
wrong = [
|
559
|
-
Token.new('a', a_),
|
560
|
-
Token.new('a', a_),
|
561
|
-
Token.new('c', c_),
|
562
|
-
Token.new('c', c_)
|
563
|
-
]
|
564
|
-
err_msg = <<-MSG
|
565
|
-
Syntax error at or near token 3>>>c<<<:
|
566
|
-
Expected one of: ['a', 'b'], found a 'c' instead.
|
567
|
-
MSG
|
568
|
-
err = StandardError
|
569
|
-
expect { subject.parse(wrong) }
|
570
|
-
.to raise_error(err, err_msg.chomp)
|
571
|
-
=begin
|
572
|
-
# This code is never reached (because of exception)
|
573
|
-
###################### S(0) == . a a c c
|
574
|
-
# Expectation chart[0]:
|
575
|
-
expected = [
|
576
|
-
'S => . A | 0', # start rule
|
577
|
-
"A => . 'a' A 'c' | 0", # predict from 0
|
578
|
-
"A => . 'b' | 0" # predict from 0
|
579
|
-
]
|
580
|
-
compare_state_texts(parse_result.chart[0], expected)
|
581
|
-
|
582
|
-
###################### S(1) == a . a c c
|
583
|
-
expected = [
|
584
|
-
"A => 'a' . A 'c' | 0", # scan from S(0) 1
|
585
|
-
"A => . 'a' A 'c' | 1", # predict from 0
|
586
|
-
"A => . 'b' | 1" # predict from 0
|
587
|
-
]
|
588
|
-
compare_state_texts(parse_result.chart[1], expected)
|
589
|
-
|
590
|
-
###################### S(2) == a a . c c
|
591
|
-
expected = [
|
592
|
-
"A => 'a' . A 'c' | 1", # scan from S(0) 1
|
593
|
-
"A => . 'a' A 'c' | 2", # predict from 0
|
594
|
-
"A => . 'b' | 2" # predict from 0
|
595
|
-
]
|
596
|
-
compare_state_texts(parse_result.chart[2], expected)
|
597
|
-
|
598
|
-
###################### S(3) == a a c? c
|
599
|
-
state_set_3 = parse_result.chart[3]
|
600
|
-
expect(state_set_3.states).to be_empty # This is an error symptom
|
601
|
-
=end
|
602
|
-
end
|
603
|
-
|
604
|
-
it 'should parse a grammar with nullable nonterminals' do
|
605
|
-
# Grammar 4: A grammar with nullable nonterminal
|
606
|
-
# based on example in "Parsing Techniques" book (D. Grune, C. Jabobs)
|
607
|
-
# Z ::= E.
|
608
|
-
# E ::= E Q F.
|
609
|
-
# E ::= F.
|
610
|
-
# F ::= a.
|
611
|
-
# Q ::= *.
|
612
|
-
# Q ::= /.
|
613
|
-
# Q ::=.
|
614
|
-
t_a = Syntax::VerbatimSymbol.new('a')
|
615
|
-
t_star = Syntax::VerbatimSymbol.new('*')
|
616
|
-
t_slash = Syntax::VerbatimSymbol.new('/')
|
617
|
-
|
618
|
-
builder = Syntax::GrammarBuilder.new
|
619
|
-
builder.add_terminals(t_a, t_star, t_slash)
|
620
|
-
builder.add_production('Z' => 'E')
|
621
|
-
builder.add_production('E' => %w(E Q F))
|
622
|
-
builder.add_production('E' => 'F')
|
623
|
-
builder.add_production('F' => t_a)
|
624
|
-
builder.add_production('Q' => t_star)
|
625
|
-
builder.add_production('Q' => t_slash)
|
626
|
-
builder.add_production('Q' => []) # Empty production
|
627
|
-
tokens = [
|
628
|
-
Token.new('a', t_a),
|
629
|
-
Token.new('a', t_a),
|
630
|
-
Token.new('/', t_slash),
|
631
|
-
Token.new('a', t_a)
|
632
|
-
]
|
633
|
-
|
634
|
-
instance = EarleyParser.new(builder.grammar)
|
635
|
-
expect { instance.parse(tokens) }.not_to raise_error
|
636
|
-
parse_result = instance.parse(tokens)
|
637
|
-
expect(parse_result.success?).to eq(true)
|
638
|
-
|
639
|
-
###################### S(0) == . a a / a
|
640
|
-
# Expectation chart[0]:
|
641
|
-
expected = [
|
642
|
-
'Z => . E | 0', # start rule
|
643
|
-
'E => . E Q F | 0', # predict from (1)
|
644
|
-
'E => . F | 0', # predict from (1)
|
645
|
-
"F => . 'a' | 0" # predict from (3)
|
646
|
-
]
|
647
|
-
compare_state_texts(parse_result.chart[0], expected)
|
648
|
-
|
649
|
-
###################### S(1) == a . a / a
|
650
|
-
# Expectation chart[1]:
|
651
|
-
expected = [
|
652
|
-
"F => 'a' . | 0", # scan from S(0) 4
|
653
|
-
'E => F . | 0', # complete from (1) and S(0) 3
|
654
|
-
'Z => E . | 0', # complete from (2) and S(0) 1
|
655
|
-
'E => E . Q F | 0', # complete from (2) and S(0) 2
|
656
|
-
"Q => . '*' | 1", # Predict from (4)
|
657
|
-
"Q => . '/' | 1", # Predict from (4)
|
658
|
-
'Q => . | 1', # Predict from (4)
|
659
|
-
'E => E Q . F | 0', # Modified predict from (4)
|
660
|
-
"F => . 'a' | 1" # Predict from (8)
|
661
|
-
]
|
662
|
-
compare_state_texts(parse_result.chart[1], expected)
|
663
|
-
|
664
|
-
###################### S(2) == a a . / a
|
665
|
-
# Expectation chart[2]:
|
666
|
-
expected = [
|
667
|
-
"F => 'a' . | 1", # scan from S(1) 9
|
668
|
-
'E => E Q F . | 0', # complete from (1) and S(1) 8
|
669
|
-
'Z => E . | 0', # complete from (1) and S(0) 1
|
670
|
-
'E => E . Q F | 0', # complete from (1) and S(0) 2
|
671
|
-
"Q => . '*' | 2", # Predict from (4)
|
672
|
-
"Q => . '/' | 2", # Predict from (4)
|
673
|
-
'Q => . | 2', # Predict from (4)
|
674
|
-
'E => E Q . F | 0', # Complete from (5) and S(1) 4
|
675
|
-
"F => . 'a' | 2" # Predict from (8)
|
676
|
-
]
|
677
|
-
compare_state_texts(parse_result.chart[2], expected)
|
678
|
-
|
679
|
-
|
680
|
-
###################### S(3) == a a / . a
|
681
|
-
# Expectation chart[3]:
|
682
|
-
expected = [
|
683
|
-
"Q => '/' . | 2", # scan from S(2) 6
|
684
|
-
'E => E Q . F | 0', # complete from (1) and S(1) 4
|
685
|
-
"F => . 'a' | 3" # Predict from (2)
|
686
|
-
]
|
687
|
-
compare_state_texts(parse_result.chart[3], expected)
|
688
|
-
|
689
|
-
|
690
|
-
###################### S(4) == a a / a .
|
691
|
-
# Expectation chart[4]:
|
692
|
-
expected = [
|
693
|
-
"F => 'a' . | 3", # scan from S(3) 3
|
694
|
-
'E => E Q F . | 0', # complete from (1) and S(3) 2
|
695
|
-
'Z => E . | 0', # complete from (2) and S(0) 1
|
696
|
-
'E => E . Q F | 0', # complete from (2) and S(0) 2
|
697
|
-
"Q => . '*' | 4", # Predict from (4)
|
698
|
-
"Q => . '/' | 4", # Predict from (4)
|
699
|
-
'Q => . | 4', # Predict from (4)
|
700
|
-
'E => E Q . F | 0', # Modified predict from (4)
|
701
|
-
"F => . 'a' | 4" # Predict from (8)
|
702
|
-
]
|
703
|
-
compare_state_texts(parse_result.chart[4], expected)
|
704
|
-
end
|
705
|
-
end # context
|
706
|
-
end # describe
|
707
|
-
end # module
|
708
|
-
end # module
|
709
|
-
|
710
|
-
# End of file
|