rley 0.4.01 → 0.4.02

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +2 -2
  4. data/README.md +3 -3
  5. data/examples/NLP/mini_en_demo.rb +1 -1
  6. data/examples/data_formats/JSON/JSON_demo.rb +1 -0
  7. data/examples/data_formats/JSON/JSON_lexer.rb +4 -4
  8. data/examples/general/calc/calc_lexer.rb +2 -2
  9. data/lib/rley.rb +1 -1
  10. data/lib/rley/constants.rb +1 -1
  11. data/lib/rley/formatter/debug.rb +2 -2
  12. data/lib/rley/formatter/json.rb +4 -4
  13. data/lib/rley/parse_tree_visitor.rb +9 -9
  14. data/lib/rley/parser/base_parser.rb +1 -1
  15. data/lib/rley/parser/gfg_parsing.rb +9 -0
  16. data/lib/rley/parser/parse_tree_builder.rb +176 -126
  17. data/lib/rley/parser/parse_tree_factory.rb +57 -0
  18. data/lib/rley/ptree/non_terminal_node.rb +10 -9
  19. data/lib/rley/ptree/parse_tree_node.rb +10 -5
  20. data/lib/rley/ptree/terminal_node.rb +14 -6
  21. data/lib/rley/sppf/sppf_node.rb +2 -2
  22. data/lib/rley/{parser → tokens}/token.rb +1 -4
  23. data/lib/rley/{ptree → tokens}/token_range.rb +1 -1
  24. data/spec/rley/formatter/debug_spec.rb +16 -16
  25. data/spec/rley/formatter/json_spec.rb +8 -8
  26. data/spec/rley/parse_forest_visitor_spec.rb +1 -1
  27. data/spec/rley/parse_tree_visitor_spec.rb +28 -28
  28. data/spec/rley/parser/error_reason_spec.rb +3 -3
  29. data/spec/rley/parser/gfg_chart_spec.rb +2 -2
  30. data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
  31. data/spec/rley/parser/gfg_parsing_spec.rb +2 -2
  32. data/spec/rley/parser/groucho_spec.rb +1 -1
  33. data/spec/rley/parser/parse_tracer_spec.rb +2 -2
  34. data/spec/rley/parser/parse_tree_builder_spec.rb +213 -140
  35. data/spec/rley/parser/parse_tree_factory_spec.rb +85 -0
  36. data/spec/rley/parser/parse_walker_factory_spec.rb +11 -10
  37. data/spec/rley/ptree/non_terminal_node_spec.rb +23 -20
  38. data/spec/rley/ptree/terminal_node_spec.rb +7 -12
  39. data/spec/rley/sppf/alternative_node_spec.rb +2 -2
  40. data/spec/rley/sppf/non_terminal_node_spec.rb +2 -2
  41. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  42. data/spec/rley/support/expectation_helper.rb +1 -1
  43. data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
  44. data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
  45. data/spec/rley/support/grammar_helper.rb +3 -3
  46. data/spec/rley/support/grammar_l0_helper.rb +2 -2
  47. data/spec/rley/support/grammar_pb_helper.rb +2 -2
  48. data/spec/rley/{ptree → tokens}/token_range_spec.rb +2 -2
  49. data/spec/rley/{parser → tokens}/token_spec.rb +2 -2
  50. metadata +11 -17
  51. data/lib/rley/parser/chart.rb +0 -82
  52. data/lib/rley/parser/earley_parser.rb +0 -203
  53. data/lib/rley/parser/parsing.rb +0 -265
  54. data/spec/rley/parser/chart_spec.rb +0 -120
  55. data/spec/rley/parser/earley_parser_spec.rb +0 -710
  56. data/spec/rley/parser/parsing_spec.rb +0 -408
@@ -1,120 +0,0 @@
1
- require_relative '../../spec_helper'
2
- require 'stringio'
3
-
4
- require_relative '../../../lib/rley/syntax/terminal'
5
- require_relative '../../../lib/rley/syntax/non_terminal'
6
- require_relative '../../../lib/rley/syntax/production'
7
- require_relative '../../../lib/rley/parser/token'
8
- require_relative '../../../lib/rley/parser/dotted_item'
9
- require_relative '../../../lib/rley/parser/parse_state'
10
- require_relative '../../../lib/rley/parser/parse_tracer'
11
-
12
- # Load the class under test
13
- require_relative '../../../lib/rley/parser/chart'
14
-
15
- module Rley # Open this namespace to avoid module qualifier prefixes
16
- module Parser # Open this namespace to avoid module qualifier prefixes
17
- describe Chart do
18
- let(:count_token) { 20 }
19
- let(:sample_start_symbol) { double('fake_non-terminal') }
20
- let(:dotted_rule) { double('fake-dotted-item') }
21
-
22
- let(:output) { StringIO.new('', 'w') }
23
-
24
- let(:token_seq) do
25
- literals = %w(I saw John with a dog)
26
- literals.map { |lexeme| Token.new(lexeme, nil) }
27
- end
28
-
29
- let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
30
-
31
- # Default instantiation rule
32
- subject do
33
- allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
34
- Chart.new([ dotted_rule ], count_token, sample_tracer)
35
- end
36
-
37
- context 'Initialization:' do
38
- it 'should be created with start dotted rule, token count, tracer' do
39
- allow(dotted_rule).to receive(:lhs).and_return(sample_start_symbol)
40
- expect { Chart.new([ dotted_rule ], count_token, sample_tracer) }
41
- .not_to raise_error
42
- end
43
-
44
- it 'should have a seed state in first state_set' do
45
- seed_state = ParseState.new(dotted_rule, 0)
46
- expect(subject[0].states).to eq([seed_state])
47
-
48
- # Shorthand syntax
49
- expect(subject[0].first).to eq(seed_state)
50
- end
51
-
52
- it 'should have the correct state_set count' do
53
- expect(subject.state_sets.size).to eq(count_token + 1)
54
- end
55
-
56
- it 'should know the start dotted rule' do
57
- expect(subject.start_dotted_rule).to eq(dotted_rule)
58
- end
59
-
60
- it 'should know the start symbol' do
61
- expect(subject.start_symbol).to eq(sample_start_symbol)
62
- end
63
-
64
- it 'should have at least one non-empty state set' do
65
- expect(subject.last_index).to eq(0)
66
- end
67
-
68
- it 'should reference a tracer' do
69
- expect(subject.tracer).to eq(sample_tracer)
70
- end
71
- end # context
72
-
73
- context 'Provided services:' do
74
- let(:t_a) { Syntax::Terminal.new('A') }
75
- let(:t_b) { Syntax::Terminal.new('B') }
76
- let(:t_c) { Syntax::Terminal.new('C') }
77
- let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
78
-
79
- let(:sample_prod) do
80
- Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
81
- end
82
-
83
- let(:origin_val) { 3 }
84
- let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
85
- let(:complete_rule) { DottedItem.new(sample_prod, 3) }
86
- let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
87
- let(:sample_tracer) { ParseTracer.new(1, output, token_seq) }
88
-
89
- # Factory method.
90
- def parse_state(origin, aDottedRule)
91
- ParseState.new(aDottedRule, origin)
92
- end
93
-
94
-
95
- it 'should trace its initialization' do
96
- subject[0] # Force constructor call here
97
- expectation = <<-SNIPPET
98
- ['I', 'saw', 'John', 'with', 'a', 'dog']
99
- |. I . saw . John . with . a . dog .|
100
- |> . . . . . .| [0:0] sentence => A B . C
101
- SNIPPET
102
- expect(output.string).to eq(expectation)
103
- end
104
-
105
- it 'should trace parse state pushing' do
106
- subject[0] # Force constructor call here
107
- output.string = ''
108
-
109
- subject.push_state(dotted_rule, 3, 5, :prediction)
110
- expectation = <<-SNIPPET
111
- |. . . > .| [3:5] sentence => A B . C
112
- SNIPPET
113
- expect(output.string).to eq(expectation)
114
- end
115
- end # context
116
- end # describe
117
- end # module
118
- end # module
119
-
120
- # End of file
@@ -1,710 +0,0 @@
1
- require_relative '../../spec_helper'
2
- require 'stringio'
3
-
4
- require_relative '../../../lib/rley/syntax/verbatim_symbol'
5
- require_relative '../../../lib/rley/syntax/non_terminal'
6
- require_relative '../../../lib/rley/syntax/production'
7
- require_relative '../../../lib/rley/syntax/grammar_builder'
8
- require_relative '../../../lib/rley/parser/token'
9
- require_relative '../../../lib/rley/parser/dotted_item'
10
- require_relative '../support/ambiguous_grammar_helper'
11
- # Load the class under test
12
- require_relative '../../../lib/rley/parser/earley_parser'
13
-
14
- module Rley # Open this namespace to avoid module qualifier prefixes
15
- module Parser # Open this namespace to avoid module qualifier prefixes
16
- describe EarleyParser do
17
- =begin
18
- let(:kw_true) { Syntax::VerbatimSymbol.new('true') }
19
- let(:kw_false) { Syntax::VerbatimSymbol.new('false') }
20
- let(:kw_null) { Syntax::VerbatimSymbol.new('null') }
21
- let(:number) do
22
- number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
23
- Syntax::Literal.new('number', number_pattern)
24
- end
25
- let(:string) do
26
- string_pattern = /"([^\\"]|\\.)*"/
27
- Syntax::Literal('string', string_pattern)
28
- end
29
- let(:lbracket) { Syntax::VerbatimSymbol.new('[') }
30
- let(:rbracket) { Syntax::VerbatimSymbol.new(']') }
31
- let(:comma) { Syntax::VerbatimSymbol.new(',') }
32
- let(:array) { Syntax::NonTerminal.new('Array') }
33
- let(:object) { Syntax::NonTerminal.new('Object') }
34
-
35
- let(:array_prod) do
36
- Production.new(array, )
37
- end
38
- =end
39
-
40
-
41
- # Grammar 1: A very simple language
42
- # (based on example in N. Wirth "Compiler Construction" book, p. 6)
43
- # S => A.
44
- # A => "a" A "c".
45
- # A => "b".
46
- # Let's create the grammar piece by piece
47
- let(:nt_S) { Syntax::NonTerminal.new('S') }
48
- let(:nt_A) { Syntax::NonTerminal.new('A') }
49
- let(:a_) { Syntax::VerbatimSymbol.new('a') }
50
- let(:b_) { Syntax::VerbatimSymbol.new('b') }
51
- let(:c_) { Syntax::VerbatimSymbol.new('c') }
52
- let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
53
- let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
54
- let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
55
- let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
56
-
57
- # Helper method that mimicks the output of a tokenizer
58
- # for the language specified by grammar_abc
59
- def grm1_tokens()
60
- tokens = [
61
- Token.new('a', a_),
62
- Token.new('a', a_),
63
- Token.new('b', b_),
64
- Token.new('c', c_),
65
- Token.new('c', c_)
66
- ]
67
-
68
- return tokens
69
- end
70
-
71
-
72
- # Grammar 2: A simple arithmetic expression language
73
- # (based on example in article on Earley's algorithm in Wikipedia)
74
- # P ::= S.
75
- # S ::= S "+" M.
76
- # S ::= M.
77
- # M ::= M "*" M.
78
- # M ::= T.
79
- # T ::= an integer number token.
80
- # Let's create the grammar piece by piece
81
- let(:nt_P) { Syntax::NonTerminal.new('P') }
82
- let(:nt_M) { Syntax::NonTerminal.new('M') }
83
- let(:nt_T) { Syntax::NonTerminal.new('T') }
84
- let(:plus) { Syntax::VerbatimSymbol.new('+') }
85
- let(:star) { Syntax::VerbatimSymbol.new('*') }
86
- let(:integer) do
87
- integer_pattern = /[-+]?[0-9]+/ # Decimal notation
88
- Syntax::Literal.new('integer', integer_pattern)
89
- end
90
- let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
91
- let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
92
- let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
93
- let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_T]) }
94
- let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
95
- let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
96
- let(:grammar_expr) do
97
- all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
98
- Syntax::Grammar.new(all_prods)
99
- end
100
-
101
- # Helper method that mimicks the output of a tokenizer
102
- # for the language specified by grammar_expr
103
- def grm2_tokens()
104
- tokens = [
105
- Token.new('2', integer),
106
- Token.new('+', plus),
107
- Token.new('3', integer),
108
- Token.new('*', star),
109
- Token.new('4', integer)
110
- ]
111
-
112
- return tokens
113
- end
114
-
115
-
116
- # Default instantiation rule
117
- subject { EarleyParser.new(grammar_abc) }
118
-
119
- context 'Initialization:' do
120
- it 'should be created with a grammar' do
121
- expect { EarleyParser.new(grammar_abc) }.not_to raise_error
122
- expect { EarleyParser.new(grammar_expr) }.not_to raise_error
123
- end
124
-
125
- it 'should know its grammar' do
126
- expect(subject.grammar).to eq(grammar_abc)
127
- end
128
-
129
- it 'should know its dotted items' do
130
- expect(subject.dotted_items.size).to eq(8)
131
- end
132
-
133
- it 'should have its start mapping initialized' do
134
- expect(subject.start_mapping.size).to eq(2)
135
-
136
- start_items_S = subject.start_mapping[nt_S]
137
- expect(start_items_S.size).to eq(1)
138
- expect(start_items_S[0].production).to eq(prod_S)
139
-
140
- start_items_A = subject.start_mapping[nt_A]
141
- expect(start_items_A.size).to eq(2)
142
-
143
- # Assuming that dotted_items are created in same order
144
- # than production in grammar.
145
- expect(start_items_A[0].production).to eq(prod_A1)
146
- expect(start_items_A[1].production).to eq(prod_A2)
147
- end
148
-
149
- it 'should have its next mapping initialized' do
150
- expect(subject.next_mapping.size).to eq(5)
151
- end
152
- end # context
153
-
154
- context 'Parsing: ' do
155
- # Helper method. Compare the data from all the parse states
156
- # of a given StateSet with an array of expectation string.
157
- def compare_state_texts(aStateSet, expectations)
158
- (0...expectations.size).each do |i|
159
- expect(aStateSet.states[i].to_s).to eq(expectations[i])
160
- end
161
- end
162
-
163
- it 'should parse a valid simple input' do
164
- parse_result = subject.parse(grm1_tokens)
165
- expect(parse_result.success?).to eq(true)
166
- expect(parse_result.ambiguous?).to eq(false)
167
-
168
- ######################
169
- # Expectation chart[0]:
170
- expected = [
171
- 'S => . A | 0', # start rule
172
- "A => . 'a' A 'c' | 0", # predict from 0
173
- "A => . 'b' | 0" # predict from 0
174
- ]
175
- compare_state_texts(parse_result.chart[0], expected)
176
-
177
- ######################
178
- # Expectation chart[1]:
179
- expected = [
180
- "A => 'a' . A 'c' | 0", # scan from S(0) 1
181
- "A => . 'a' A 'c' | 1", # predict from 0
182
- "A => . 'b' | 1" # predict from 0
183
- ]
184
- state_set_1 = parse_result.chart[1]
185
- expect(state_set_1.states.size).to eq(3)
186
- compare_state_texts(state_set_1, expected)
187
-
188
- ######################
189
- # Expectation chart[2]:
190
- expected = [
191
- "A => 'a' . A 'c' | 1", # scan from S(0) 1
192
- "A => . 'a' A 'c' | 2", # predict from 0
193
- "A => . 'b' | 2" # predict from 0
194
- ]
195
- state_set_2 = parse_result.chart[2]
196
- expect(state_set_2.states.size).to eq(3)
197
- compare_state_texts(state_set_2, expected)
198
-
199
- ######################
200
- # Expectation chart[3]:
201
- expected = [
202
- "A => 'b' . | 2", # scan from S(2) 2
203
- "A => 'a' A . 'c' | 1" # complete from 0 and S(2) 0
204
- ]
205
- state_set_3 = parse_result.chart[3]
206
- expect(state_set_3.states.size).to eq(2)
207
- compare_state_texts(state_set_3, expected)
208
-
209
-
210
- ######################
211
- # Expectation chart[4]:
212
- expected = [
213
- "A => 'a' A 'c' . | 1", # scan from S(3) 1
214
- "A => 'a' A . 'c' | 0" # complete from 0 and S(1) 0
215
- ]
216
- state_set_4 = parse_result.chart[4]
217
- expect(state_set_4.states.size).to eq(2)
218
- compare_state_texts(state_set_4, expected)
219
-
220
- ######################
221
- # Expectation chart[5]:
222
- expected = [
223
- "A => 'a' A 'c' . | 0", # scan from S(4) 1
224
- 'S => A . | 0' # complete from 0 and S(0) 0
225
- ]
226
- state_set_5 = parse_result.chart[5]
227
- expect(state_set_5.states.size).to eq(2)
228
- compare_state_texts(state_set_5, expected)
229
- end
230
-
231
- it 'should trace a parse with level 1' do
232
- # Substitute temporarily $stdout by a StringIO
233
- prev_ostream = $stdout
234
- $stdout = StringIO.new('', 'w')
235
-
236
- trace_level = 1
237
- subject.parse(grm1_tokens, trace_level)
238
- expectations = <<-SNIPPET
239
- ['a', 'a', 'b', 'c', 'c']
240
- |. a . a . b . c . c .|
241
- |> . . . . .| [0:0] S => . A
242
- |> . . . . .| [0:0] A => . 'a' A 'c'
243
- |> . . . . .| [0:0] A => . 'b'
244
- |[---] . . . .| [0:1] A => 'a' . A 'c'
245
- |. > . . . .| [1:1] A => . 'a' A 'c'
246
- |. > . . . .| [1:1] A => . 'b'
247
- |. [---] . . .| [1:2] A => 'a' . A 'c'
248
- |. . > . . .| [2:2] A => . 'a' A 'c'
249
- |. . > . . .| [2:2] A => . 'b'
250
- |. . [---] . .| [2:3] A => 'b' .
251
- |. [-------> . .| [1:3] A => 'a' A . 'c'
252
- |. . . [---] .| [3:4] A => 'a' A 'c' .
253
- |[---------------> .| [0:4] A => 'a' A . 'c'
254
- |. . . . [---]| [4:5] A => 'a' A 'c' .
255
- |[===================]| [0:5] S => A .
256
- SNIPPET
257
- expect($stdout.string).to eq(expectations)
258
-
259
- # Restore standard ouput stream
260
- $stdout = prev_ostream
261
- end
262
-
263
- it 'should parse a valid simple expression' do
264
- instance = EarleyParser.new(grammar_expr)
265
- parse_result = instance.parse(grm2_tokens)
266
- expect(parse_result.success?).to eq(true)
267
- expect(parse_result.ambiguous?).to eq(false)
268
-
269
- ###################### S(0): . 2 + 3 * 4
270
- # Expectation chart[0]:
271
- expected = [
272
- 'P => . S | 0', # start rule
273
- "S => . S '+' M | 0", # predict from (1)
274
- 'S => . M | 0', # predict from (1)
275
- "M => . M '*' T | 0", # predict from (4)
276
- 'M => . T | 0', # predict from (4)
277
- 'T => . integer | 0' # predict from (4)
278
- ]
279
- compare_state_texts(parse_result.chart[0], expected)
280
-
281
-
282
- ###################### S(1): 2 . + 3 * 4
283
- # Expectation chart[1]:
284
- expected = [
285
- 'T => integer . | 0', # scan from S(0) 6
286
- 'M => T . | 0', # complete from (1) and S(0) 5
287
- 'S => M . | 0', # complete from (2) and S(0) 3
288
- "M => M . '*' T | 0", # complete from (2) and S(0) 4
289
- 'P => S . | 0', # complete from (4) and S(0) 1
290
- "S => S . '+' M | 0" # complete from (4) and S(0) 2
291
- ]
292
- compare_state_texts(parse_result.chart[1], expected)
293
-
294
-
295
- ###################### S(2): 2 + . 3 * 4
296
- # Expectation chart[2]:
297
- expected = [
298
- "S => S '+' . M | 0", # scan from S(1) 6
299
- "M => . M '*' T | 2", # predict from (1)
300
- 'M => . T | 2', # predict from (1)
301
- 'T => . integer | 2' # predict from (3)
302
- ]
303
- compare_state_texts(parse_result.chart[2], expected)
304
-
305
-
306
- ###################### S(3): 2 + 3 . * 4
307
- # Expectation chart[3]:
308
- expected = [
309
- 'T => integer . | 2', # scan from S(2) 4
310
- 'M => T . | 2', # complete from (1) and S(2) 3
311
- "S => S '+' M . | 0", # complete from (1) and S(2) 1
312
- "M => M . '*' T | 2", # complete from (2) and S(2) 2
313
- 'P => S . | 0' # complete from (4) and S(0) 1
314
- ]
315
- compare_state_texts(parse_result.chart[3], expected)
316
-
317
- ###################### S(4): 2 + 3 * . 4
318
- # Expectation chart[4]:
319
- expected = [
320
- "M => M '*' . T | 2", # scan from S(3) 4
321
- 'T => . integer | 4' # predict from (1)
322
- ]
323
- compare_state_texts(parse_result.chart[4], expected)
324
-
325
- ###################### S(5): 2 + 3 * 4 .
326
- # Expectation chart[5]:
327
- expected = [
328
- 'T => integer . | 4', # scan from S(4) 2
329
- "M => M '*' T . | 2", # complete from (1) and S(4) 1
330
- "S => S '+' M . | 0", # complete from (2) and S(2) 1
331
- "M => M . '*' T | 2", # complete from (2) and S(2) 2
332
- 'P => S . | 0' # complete from (3) and S(2) 2
333
- ]
334
- compare_state_texts(parse_result.chart[5], expected)
335
- end
336
-
337
- it 'should parse a nullable grammar' do
338
- # Simple but problematic grammar for the original Earley parser
339
- # (based on example in D. Grune, C. Jacobs "Parsing Techniques" book)
340
- # Ss => A A 'x';
341
- # A => ;
342
- t_x = Syntax::VerbatimSymbol.new('x')
343
-
344
- builder = Syntax::GrammarBuilder.new
345
- builder.add_terminals(t_x)
346
- builder.add_production('Ss' => %w(A A x))
347
- builder.add_production('A' => [])
348
- tokens = [ Token.new('x', t_x) ]
349
-
350
- instance = EarleyParser.new(builder.grammar)
351
- expect { instance.parse(tokens) }.not_to raise_error
352
- parse_result = instance.parse(tokens)
353
- expect(parse_result.success?).to eq(true)
354
- ###################### S(0): . x
355
- # Expectation chart[0]:
356
- expected = [
357
- "Ss => . A A 'x' | 0", # Start rule
358
- 'A => . | 0', # predict from (1)
359
- "Ss => A . A 'x' | 0", # modified predict from (1)
360
- "Ss => A A . 'x' | 0" # modified predict from (1)
361
- ]
362
- compare_state_texts(parse_result.chart[0], expected)
363
-
364
- ###################### S(1): x .
365
- # Expectation chart[1]:
366
- expected = [
367
- "Ss => A A 'x' . | 0" # scan from S(0) 4
368
- ]
369
- compare_state_texts(parse_result.chart[1], expected)
370
- end
371
-
372
- it 'should parse an ambiguous grammar (I)' do
373
- # Grammar 3: A ambiguous arithmetic expression language
374
- # (based on example in article on Earley's algorithm in Wikipedia)
375
- # P => S.
376
- # S => S "+" S.
377
- # S => S "*" S.
378
- # S => L.
379
- # L => an integer number token.
380
- t_int = Syntax::Literal.new('integer', /[-+]?\d+/)
381
- t_plus = Syntax::VerbatimSymbol.new('+')
382
- t_star = Syntax::VerbatimSymbol.new('*')
383
-
384
- builder = Syntax::GrammarBuilder.new
385
- builder.add_terminals(t_int, t_plus, t_star)
386
- builder.add_production('P' => 'S')
387
- builder.add_production('S' => %w(S + S))
388
- builder.add_production('S' => %w(S * S))
389
- builder.add_production('S' => 'L')
390
- builder.add_production('L' => 'integer')
391
- tokens = [
392
- Token.new('2', t_int),
393
- Token.new('+', t_plus),
394
- Token.new('3', t_int),
395
- Token.new('*', t_star),
396
- Token.new('4', t_int)
397
- ]
398
- instance = EarleyParser.new(builder.grammar)
399
- expect { instance.parse(tokens) }.not_to raise_error
400
- parse_result = instance.parse(tokens)
401
- expect(parse_result.success?).to eq(true)
402
- expect(parse_result.ambiguous?).to eq(true)
403
-
404
- ###################### S(0): . 2 + 3 * 4
405
- # Expectation chart[0]:
406
- expected = [
407
- 'P => . S | 0', # Start rule
408
- "S => . S '+' S | 0", # predict from (1)
409
- "S => . S '*' S | 0", # predict from (1)
410
- 'S => . L | 0', # predict from (1)
411
- 'L => . integer | 0' # predict from (4)
412
- ]
413
- compare_state_texts(parse_result.chart[0], expected)
414
-
415
- ###################### S(1): 2 . + 3 * 4
416
- # Expectation chart[1]:
417
- expected = [
418
- 'L => integer . | 0', # scan from S(0) 4
419
- 'S => L . | 0', # complete from (1) and S(0) 4
420
- 'P => S . | 0', # complete from (2) and S(0) 1
421
- "S => S . '+' S | 0", # complete from (2) and S(0) 2
422
- "S => S . '*' S | 0", # complete from (2) and S(0) 3
423
- ]
424
- compare_state_texts(parse_result.chart[1], expected)
425
-
426
- ###################### S(2): 2 + . 3 * 4
427
- # Expectation chart[2]:
428
- expected = [
429
- "S => S '+' . S | 0", # scan from S(1) 4
430
- "S => . S '+' S | 2", # predict from (1)
431
- "S => . S '*' S | 2", # predict from (1)
432
- 'S => . L | 2', # predict from (1)
433
- 'L => . integer | 2' # predict from (4)
434
- ]
435
- compare_state_texts(parse_result.chart[2], expected)
436
-
437
- ###################### S(3): 2 + 3 . * 4
438
- # Expectation chart[3]:
439
- expected = [
440
- 'L => integer . | 2', # scan from S(2) 5
441
- 'S => L . | 2', # complete from (1) and S(2) 4
442
- "S => S '+' S . | 0", # complete from (2) and S(2) 1
443
- "S => S . '+' S | 2", # complete from (2) and S(2) 2
444
- "S => S . '*' S | 2", # complete from (2) and S(2) 3
445
- 'P => S . | 0', # complete from (2) and S(0) 1
446
- "S => S . '+' S | 0", # complete from (2) and S(0) 2
447
- "S => S . '*' S | 0", # complete from (2) and S(0) 3
448
- ]
449
- compare_state_texts(parse_result.chart[3], expected)
450
-
451
- ###################### S(4): 2 + 3 * . 4
452
- # Expectation chart[4]:
453
- expected = [
454
- "S => S '*' . S | 2", # scan from S(3) 5
455
- "S => S '*' . S | 0", # scan from S(3) 8
456
- "S => . S '+' S | 4", # predict from (1)
457
- "S => . S '*' S | 4", # predict from (1)
458
- 'S => . L | 4', # predict from (1)
459
- 'L => . integer | 4' # predict from (4)
460
- ]
461
- compare_state_texts(parse_result.chart[4], expected)
462
-
463
- ###################### S(5): 2 + 3 * 4 .
464
- # Expectation chart[5]:
465
- expected = [
466
- 'L => integer . | 4', # scan from S(4) 6
467
- 'S => L . | 4', # complete from (1) and S(4) 5
468
- "S => S '*' S . | 2", # complete from (2) and S(4) 1
469
- "S => S '*' S . | 0", # complete from (2) and S(4) 2
470
- "S => S . '+' S | 4", # complete from (2) and S(4) 3
471
- "S => S . '*' S | 4", # complete from (2) and S(4) 4
472
- "S => S '+' S . | 0", # complete from (2) and S(2) 1
473
- "S => S . '+' S | 2", # complete from (2) and S(2) 2
474
- "S => S . '*' S | 2", # complete from (2) and S(2) 3
475
- 'P => S . | 0', # complete from (2) and S(0) 1
476
- "S => S . '+' S | 0", # complete from (2) and S(0) 2
477
- "S => S . '*' S | 0" # complete from (2) and S(0) 3
478
- ]
479
- compare_state_texts(parse_result.chart[5], expected)
480
- end
481
-
482
- it 'should parse an ambiguous grammar (II)' do
483
- extend(AmbiguousGrammarHelper)
484
- grammar = grammar_builder.grammar
485
- instance = EarleyParser.new(grammar)
486
- tokens = tokenize('abc + def + ghi', grammar)
487
- expect { instance.parse(tokens) }.not_to raise_error
488
- parse_result = instance.parse(tokens)
489
- expect(parse_result.success?).to eq(true)
490
- expect(parse_result.ambiguous?).to eq(true)
491
-
492
- ###################### S(0): . abc + def + ghi
493
- # Expectation chart[0]:
494
- expected = [
495
- 'S => . E | 0', # Start rule
496
- 'E => . E + E | 0', # predict from (1)
497
- 'E => . id | 0' # predict from (1)
498
- ]
499
- compare_state_texts(parse_result.chart[0], expected)
500
-
501
- ###################### S(1): abc . + def + ghi
502
- # Expectation chart[1]:
503
- expected = [
504
- 'E => id . | 0', # scan from S(0) 3
505
- 'S => E . | 0', # complete from (1) and S(0) 2
506
- 'E => E . + E | 0' # complete from (1) and S(0) 3
507
- ]
508
- compare_state_texts(parse_result.chart[1], expected)
509
-
510
- ###################### S(2): abc + . def + ghi
511
- # Expectation chart[2]:
512
- expected = [
513
- 'E => E + . E | 0', # Scan from S(1) 3
514
- 'E => . E + E | 2', # predict from (1)
515
- 'E => . id | 2' # predict from (1)
516
- ]
517
- compare_state_texts(parse_result.chart[2], expected)
518
-
519
- ###################### S(3): abc + def . + ghi
520
- # Expectation chart[3]:
521
- expected = [
522
- 'E => id . | 2', # Scan from S(2) 3
523
- 'E => E + E . | 0', # complete from (1) and S(2) 1
524
- 'E => E . + E | 2', # complete from (1) and S(2) 2
525
- 'S => E . | 0', # complete from (1) and S(0) 1
526
- 'E => E . + E | 0' # complete from (1) and S(0) 2
527
- ]
528
- compare_state_texts(parse_result.chart[3], expected)
529
-
530
- ###################### S(4): abc + def + . ghi
531
- # Expectation chart[4]:
532
- expected = [
533
- 'E => E + . E | 2', # Scan from S(3) 3
534
- 'E => E + . E | 0', # Scan from S(3) 5
535
- 'E => . E + E | 4', # predict from (1)
536
- 'E => . id | 4' # predict from (1)
537
- ]
538
- compare_state_texts(parse_result.chart[4], expected)
539
-
540
- ###################### S(5): abc + def + ghi .
541
- # Expectation chart[5]:
542
- expected = [
543
- 'E => id . | 4', # Scan from S(4) 4
544
- 'E => E + E . | 2', # complete from (1) and S(4) 1
545
- 'E => E + E . | 0', # complete from (1) and S(4) 2
546
- 'E => E . + E | 4', # complete from (1) and S(4) 3
547
- 'E => E . + E | 2', # complete from (1) and S(2) 2
548
- 'S => E . | 0', # complete from (1) and S(0) 1
549
- 'E => E . + E | 0', # complete from (1) and S(0) 2
550
- ]
551
- compare_state_texts(parse_result.chart[5], expected)
552
- end
553
-
554
-
555
-
556
- it 'should parse an invalid simple input' do
557
- # Parse an erroneous input (b is missing)
558
- wrong = [
559
- Token.new('a', a_),
560
- Token.new('a', a_),
561
- Token.new('c', c_),
562
- Token.new('c', c_)
563
- ]
564
- err_msg = <<-MSG
565
- Syntax error at or near token 3>>>c<<<:
566
- Expected one of: ['a', 'b'], found a 'c' instead.
567
- MSG
568
- err = StandardError
569
- expect { subject.parse(wrong) }
570
- .to raise_error(err, err_msg.chomp)
571
- =begin
572
- # This code is never reached (because of exception)
573
- ###################### S(0) == . a a c c
574
- # Expectation chart[0]:
575
- expected = [
576
- 'S => . A | 0', # start rule
577
- "A => . 'a' A 'c' | 0", # predict from 0
578
- "A => . 'b' | 0" # predict from 0
579
- ]
580
- compare_state_texts(parse_result.chart[0], expected)
581
-
582
- ###################### S(1) == a . a c c
583
- expected = [
584
- "A => 'a' . A 'c' | 0", # scan from S(0) 1
585
- "A => . 'a' A 'c' | 1", # predict from 0
586
- "A => . 'b' | 1" # predict from 0
587
- ]
588
- compare_state_texts(parse_result.chart[1], expected)
589
-
590
- ###################### S(2) == a a . c c
591
- expected = [
592
- "A => 'a' . A 'c' | 1", # scan from S(0) 1
593
- "A => . 'a' A 'c' | 2", # predict from 0
594
- "A => . 'b' | 2" # predict from 0
595
- ]
596
- compare_state_texts(parse_result.chart[2], expected)
597
-
598
- ###################### S(3) == a a c? c
599
- state_set_3 = parse_result.chart[3]
600
- expect(state_set_3.states).to be_empty # This is an error symptom
601
- =end
602
- end
603
-
604
- it 'should parse a grammar with nullable nonterminals' do
605
- # Grammar 4: A grammar with nullable nonterminal
606
- # based on example in "Parsing Techniques" book (D. Grune, C. Jabobs)
607
- # Z ::= E.
608
- # E ::= E Q F.
609
- # E ::= F.
610
- # F ::= a.
611
- # Q ::= *.
612
- # Q ::= /.
613
- # Q ::=.
614
- t_a = Syntax::VerbatimSymbol.new('a')
615
- t_star = Syntax::VerbatimSymbol.new('*')
616
- t_slash = Syntax::VerbatimSymbol.new('/')
617
-
618
- builder = Syntax::GrammarBuilder.new
619
- builder.add_terminals(t_a, t_star, t_slash)
620
- builder.add_production('Z' => 'E')
621
- builder.add_production('E' => %w(E Q F))
622
- builder.add_production('E' => 'F')
623
- builder.add_production('F' => t_a)
624
- builder.add_production('Q' => t_star)
625
- builder.add_production('Q' => t_slash)
626
- builder.add_production('Q' => []) # Empty production
627
- tokens = [
628
- Token.new('a', t_a),
629
- Token.new('a', t_a),
630
- Token.new('/', t_slash),
631
- Token.new('a', t_a)
632
- ]
633
-
634
- instance = EarleyParser.new(builder.grammar)
635
- expect { instance.parse(tokens) }.not_to raise_error
636
- parse_result = instance.parse(tokens)
637
- expect(parse_result.success?).to eq(true)
638
-
639
- ###################### S(0) == . a a / a
640
- # Expectation chart[0]:
641
- expected = [
642
- 'Z => . E | 0', # start rule
643
- 'E => . E Q F | 0', # predict from (1)
644
- 'E => . F | 0', # predict from (1)
645
- "F => . 'a' | 0" # predict from (3)
646
- ]
647
- compare_state_texts(parse_result.chart[0], expected)
648
-
649
- ###################### S(1) == a . a / a
650
- # Expectation chart[1]:
651
- expected = [
652
- "F => 'a' . | 0", # scan from S(0) 4
653
- 'E => F . | 0', # complete from (1) and S(0) 3
654
- 'Z => E . | 0', # complete from (2) and S(0) 1
655
- 'E => E . Q F | 0', # complete from (2) and S(0) 2
656
- "Q => . '*' | 1", # Predict from (4)
657
- "Q => . '/' | 1", # Predict from (4)
658
- 'Q => . | 1', # Predict from (4)
659
- 'E => E Q . F | 0', # Modified predict from (4)
660
- "F => . 'a' | 1" # Predict from (8)
661
- ]
662
- compare_state_texts(parse_result.chart[1], expected)
663
-
664
- ###################### S(2) == a a . / a
665
- # Expectation chart[2]:
666
- expected = [
667
- "F => 'a' . | 1", # scan from S(1) 9
668
- 'E => E Q F . | 0', # complete from (1) and S(1) 8
669
- 'Z => E . | 0', # complete from (1) and S(0) 1
670
- 'E => E . Q F | 0', # complete from (1) and S(0) 2
671
- "Q => . '*' | 2", # Predict from (4)
672
- "Q => . '/' | 2", # Predict from (4)
673
- 'Q => . | 2', # Predict from (4)
674
- 'E => E Q . F | 0', # Complete from (5) and S(1) 4
675
- "F => . 'a' | 2" # Predict from (8)
676
- ]
677
- compare_state_texts(parse_result.chart[2], expected)
678
-
679
-
680
- ###################### S(3) == a a / . a
681
- # Expectation chart[3]:
682
- expected = [
683
- "Q => '/' . | 2", # scan from S(2) 6
684
- 'E => E Q . F | 0', # complete from (1) and S(1) 4
685
- "F => . 'a' | 3" # Predict from (2)
686
- ]
687
- compare_state_texts(parse_result.chart[3], expected)
688
-
689
-
690
- ###################### S(4) == a a / a .
691
- # Expectation chart[4]:
692
- expected = [
693
- "F => 'a' . | 3", # scan from S(3) 3
694
- 'E => E Q F . | 0', # complete from (1) and S(3) 2
695
- 'Z => E . | 0', # complete from (2) and S(0) 1
696
- 'E => E . Q F | 0', # complete from (2) and S(0) 2
697
- "Q => . '*' | 4", # Predict from (4)
698
- "Q => . '/' | 4", # Predict from (4)
699
- 'Q => . | 4', # Predict from (4)
700
- 'E => E Q . F | 0', # Modified predict from (4)
701
- "F => . 'a' | 4" # Predict from (8)
702
- ]
703
- compare_state_texts(parse_result.chart[4], expected)
704
- end
705
- end # context
706
- end # describe
707
- end # module
708
- end # module
709
-
710
- # End of file