rley 0.2.15 → 0.3.00

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/rley/constants.rb +1 -1
  4. data/lib/rley/gfg/call_edge.rb +30 -0
  5. data/lib/rley/gfg/edge.rb +4 -0
  6. data/lib/rley/gfg/end_vertex.rb +1 -1
  7. data/lib/rley/gfg/epsilon_edge.rb +0 -4
  8. data/lib/rley/gfg/grm_flow_graph.rb +32 -7
  9. data/lib/rley/gfg/item_vertex.rb +71 -25
  10. data/lib/rley/gfg/non_terminal_vertex.rb +10 -1
  11. data/lib/rley/gfg/return_edge.rb +31 -0
  12. data/lib/rley/gfg/scan_edge.rb +2 -1
  13. data/lib/rley/gfg/shortcut_edge.rb +26 -0
  14. data/lib/rley/gfg/start_vertex.rb +2 -2
  15. data/lib/rley/gfg/vertex.rb +27 -1
  16. data/lib/rley/parse_forest_visitor.rb +115 -0
  17. data/lib/rley/parser/base_parser.rb +27 -0
  18. data/lib/rley/parser/dotted_item.rb +11 -0
  19. data/lib/rley/parser/earley_parser.rb +3 -15
  20. data/lib/rley/parser/gfg_chart.rb +106 -0
  21. data/lib/rley/parser/gfg_earley_parser.rb +139 -0
  22. data/lib/rley/parser/gfg_parsing.rb +384 -0
  23. data/lib/rley/parser/parse_entry.rb +148 -0
  24. data/lib/rley/parser/parse_entry_set.rb +104 -0
  25. data/lib/rley/parser/parse_entry_tracker.rb +56 -0
  26. data/lib/rley/parser/parse_forest_builder.rb +229 -0
  27. data/lib/rley/parser/parse_forest_factory.rb +54 -0
  28. data/lib/rley/parser/parse_walker_factory.rb +237 -0
  29. data/lib/rley/ptree/token_range.rb +14 -1
  30. data/lib/rley/sppf/alternative_node.rb +34 -0
  31. data/lib/rley/sppf/composite_node.rb +27 -0
  32. data/lib/rley/sppf/epsilon_node.rb +27 -0
  33. data/lib/rley/sppf/leaf_node.rb +12 -0
  34. data/lib/rley/sppf/non_terminal_node.rb +38 -0
  35. data/lib/rley/sppf/parse_forest.rb +48 -0
  36. data/lib/rley/sppf/sppf_node.rb +24 -0
  37. data/lib/rley/sppf/token_node.rb +29 -0
  38. data/lib/rley/syntax/grammar_builder.rb +16 -12
  39. data/lib/rley/syntax/grm_symbol.rb +6 -0
  40. data/lib/rley/syntax/terminal.rb +5 -0
  41. data/spec/rley/gfg/call_edge_spec.rb +51 -0
  42. data/spec/rley/gfg/end_vertex_spec.rb +1 -0
  43. data/spec/rley/gfg/grm_flow_graph_spec.rb +24 -2
  44. data/spec/rley/gfg/item_vertex_spec.rb +75 -6
  45. data/spec/rley/gfg/non_terminal_vertex_spec.rb +14 -0
  46. data/spec/rley/gfg/return_edge_spec.rb +51 -0
  47. data/spec/rley/gfg/shortcut_edge_spec.rb +43 -0
  48. data/spec/rley/gfg/vertex_spec.rb +52 -37
  49. data/spec/rley/parse_forest_visitor_spec.rb +238 -0
  50. data/spec/rley/parser/dotted_item_spec.rb +29 -8
  51. data/spec/rley/parser/gfg_chart_spec.rb +138 -0
  52. data/spec/rley/parser/gfg_earley_parser_spec.rb +918 -0
  53. data/spec/rley/parser/gfg_parsing_spec.rb +565 -0
  54. data/spec/rley/parser/parse_entry_set_spec.rb +179 -0
  55. data/spec/rley/parser/parse_entry_spec.rb +208 -0
  56. data/spec/rley/parser/parse_forest_builder_spec.rb +382 -0
  57. data/spec/rley/parser/parse_forest_factory_spec.rb +81 -0
  58. data/spec/rley/parser/parse_walker_factory_spec.rb +235 -0
  59. data/spec/rley/parser/state_set_spec.rb +4 -0
  60. data/spec/rley/sppf/alternative_node_spec.rb +72 -0
  61. data/spec/rley/sppf/antecedence_graph.rb +87 -0
  62. data/spec/rley/sppf/forest_representation.rb +136 -0
  63. data/spec/rley/sppf/gfg_representation.rb +111 -0
  64. data/spec/rley/sppf/non_terminal_node_spec.rb +64 -0
  65. data/spec/rley/support/ambiguous_grammar_helper.rb +36 -36
  66. data/spec/rley/support/expectation_helper.rb +36 -0
  67. data/spec/rley/support/grammar_helper.rb +28 -0
  68. data/spec/rley/support/grammar_sppf_helper.rb +25 -0
  69. data/spec/rley/syntax/grammar_builder_spec.rb +5 -0
  70. data/spec/rley/syntax/non_terminal_spec.rb +4 -0
  71. data/spec/rley/syntax/terminal_spec.rb +4 -0
  72. metadata +58 -2
@@ -0,0 +1,918 @@
1
+ require_relative '../../spec_helper'
2
+ require 'stringio'
3
+ require_relative '../../../lib/rley/syntax/verbatim_symbol'
4
+ require_relative '../../../lib/rley/syntax/non_terminal'
5
+ require_relative '../../../lib/rley/syntax/production'
6
+ require_relative '../../../lib/rley/syntax/grammar_builder'
7
+ require_relative '../../../lib/rley/parser/token'
8
+ require_relative '../../../lib/rley/parser/dotted_item'
9
+ require_relative '../../../lib/rley/parser/gfg_parsing'
10
+ require_relative '../support/grammar_abc_helper'
11
+ require_relative '../support/ambiguous_grammar_helper'
12
+ require_relative '../support/grammar_helper'
13
+ require_relative '../support/expectation_helper'
14
+
15
+ # Load the class under test
16
+ require_relative '../../../lib/rley/parser/gfg_earley_parser'
17
+
18
+ module Rley # Open this namespace to avoid module qualifier prefixes
19
+ module Parser # Open this namespace to avoid module qualifier prefixes
20
+ describe GFGEarleyParser do
21
+ include GrammarABCHelper # Mix-in module with builder for grammar abc
22
+ include GrammarHelper # Mix-in with method for creating token sequence
23
+ include ExpectationHelper # Mix-in with expectation on parse entry sets
24
+
25
+ # Factory method. Build a production with the given sequence
26
+ # of symbols as its rhs.
27
+ let(:grammar_abc) do
28
+ builder = grammar_abc_builder
29
+ builder.grammar
30
+ end
31
+
32
+ let(:grm1_tokens) do
33
+ build_token_sequence(%w(a a b c c), grammar_abc)
34
+ end
35
+
36
+
37
+ # Grammar 2: A simple arithmetic expression language
38
+ # (based on example in article on Earley's algorithm in Wikipedia)
39
+ # P ::= S.
40
+ # S ::= S "+" M.
41
+ # S ::= M.
42
+ # M ::= M "*" T.
43
+ # M ::= T.
44
+ # T ::= an integer number token.
45
+ # Let's create the grammar piece by piece
46
+ let(:nt_P) { Syntax::NonTerminal.new('P') }
47
+ let(:nt_S) { Syntax::NonTerminal.new('S') }
48
+ let(:nt_M) { Syntax::NonTerminal.new('M') }
49
+ let(:nt_T) { Syntax::NonTerminal.new('T') }
50
+ let(:plus) { Syntax::VerbatimSymbol.new('+') }
51
+ let(:star) { Syntax::VerbatimSymbol.new('*') }
52
+ let(:integer) do
53
+ integer_pattern = /[-+]?[0-9]+/ # Decimal notation
54
+ Syntax::Literal.new('integer', integer_pattern)
55
+ end
56
+ let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
57
+ let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
58
+ let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
59
+ let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_T]) }
60
+ let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
61
+ let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
62
+ let(:grammar_expr) do
63
+ all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
64
+ Syntax::Grammar.new(all_prods)
65
+ end
66
+
67
+ # Helper method that mimicks the output of a tokenizer
68
+ # for the language specified by grammar_expr
69
+ def grm2_tokens()
70
+ input_sequence = [ {'2' => 'integer'}, '+', {'3' => 'integer'},
71
+ '*', {'4' => 'integer'}
72
+ ]
73
+ return build_token_sequence(input_sequence, grammar_expr)
74
+ end
75
+
76
+ # Default instantiation rule
77
+ subject { GFGEarleyParser.new(grammar_abc) }
78
+
79
+ context 'Initialization:' do
80
+ it 'should be created with a grammar' do
81
+ expect { GFGEarleyParser.new(grammar_abc) }.not_to raise_error
82
+ end
83
+
84
+ it 'should know its grammar' do
85
+ expect(subject.grammar).to eq(grammar_abc)
86
+ end
87
+
88
+ it 'should know its dotted items' do
89
+ expect(subject.dotted_items.size).to eq(8)
90
+ end
91
+
92
+ it 'should know its flow graph' do
93
+ expect(subject.gf_graph).to be_kind_of(GFG::GrmFlowGraph)
94
+ end
95
+ end # context
96
+
97
+ context 'Parsing: ' do
98
+ it 'should parse a valid simple input' do
99
+ parse_result = subject.parse(grm1_tokens)
100
+ expect(parse_result.success?).to eq(true)
101
+ # expect(parse_result.ambiguous?).to eq(false)
102
+ ######################
103
+ # Expectation chart[0]:
104
+ expected = [
105
+ '.S | 0', # initialization
106
+ 'S => . A | 0', # start rule
107
+ '.A | 0', # call rule
108
+ 'A => . a A c | 0', # start rule
109
+ 'A => . b | 0' # start rule
110
+ ]
111
+ compare_entry_texts(parse_result.chart[0], expected)
112
+ expected_terminals(parse_result.chart[0], %w(a b))
113
+
114
+ ######################
115
+ # Expectation chart[1]:
116
+ expected = [
117
+ 'A => a . A c | 0', # scan 'a'
118
+ '.A | 1', # call rule
119
+ 'A => . a A c | 1', # start rule
120
+ 'A => . b | 1' # start rule
121
+ ]
122
+ entry_set_1 = parse_result.chart[1]
123
+ expect(entry_set_1.entries.size).to eq(4)
124
+ compare_entry_texts(entry_set_1, expected)
125
+ expected_terminals(parse_result.chart[1], %w(a b))
126
+
127
+ ######################
128
+ # Expectation chart[2]:
129
+ expected = [
130
+ 'A => a . A c | 1', # scan 'a'
131
+ '.A | 2', # call rule
132
+ 'A => . a A c | 2', # start rule
133
+ 'A => . b | 2' # start rule
134
+ ]
135
+ entry_set_2 = parse_result.chart[2]
136
+ expect(entry_set_2.entries.size).to eq(4)
137
+ compare_entry_texts(entry_set_2, expected)
138
+ expected_terminals(parse_result.chart[2], %w(a b))
139
+
140
+ ######################
141
+ # Expectation chart[3]:
142
+ expected = [
143
+ 'A => b . | 2', # scan 'b'
144
+ 'A. | 2', # exit rule
145
+ 'A => a A . c | 1', # end rule
146
+ ]
147
+ entry_set_3 = parse_result.chart[3]
148
+ expect(entry_set_3.entries.size).to eq(3)
149
+ compare_entry_texts(entry_set_3, expected)
150
+ expected_terminals(parse_result.chart[3], %w(c))
151
+
152
+
153
+ ######################
154
+ # Expectation chart[4]:
155
+ expected = [
156
+ 'A => a A c . | 1', # scan 'c'
157
+ 'A. | 1', # exit rule
158
+ 'A => a A . c | 0' # end rule
159
+ ]
160
+ entry_set_4 = parse_result.chart[4]
161
+ expect(entry_set_4.entries.size).to eq(3)
162
+ compare_entry_texts(entry_set_4, expected)
163
+ expected_terminals(parse_result.chart[4], %w(c))
164
+
165
+ ######################
166
+ # Expectation chart[5]:
167
+ expected = [
168
+ 'A => a A c . | 0', # scan 'c'
169
+ 'A. | 0', # exit rule
170
+ 'S => A . | 0', # end rule
171
+ 'S. | 0' # exit rule
172
+ ]
173
+ entry_set_5 = parse_result.chart[5]
174
+ expect(entry_set_5.entries.size).to eq(4)
175
+ compare_entry_texts(entry_set_5, expected)
176
+ end
177
+ =begin
178
+ it 'should trace a parse with level 1' do
179
+ # Substitute temporarily $stdout by a StringIO
180
+ prev_ostream = $stdout
181
+ $stdout = StringIO.new('', 'w')
182
+
183
+ trace_level = 1
184
+ subject.parse(grm1_tokens, trace_level)
185
+ expectations = <<-SNIPPET
186
+ ['a', 'a', 'b', 'c', 'c']
187
+ |. a . a . b . c . c .|
188
+ |> . . . . .| [0:0] S => . A
189
+ |> . . . . .| [0:0] A => . 'a' A 'c'
190
+ |> . . . . .| [0:0] A => . 'b'
191
+ |[---] . . . .| [0:1] A => 'a' . A 'c'
192
+ |. > . . . .| [1:1] A => . 'a' A 'c'
193
+ |. > . . . .| [1:1] A => . 'b'
194
+ |. [---] . . .| [1:2] A => 'a' . A 'c'
195
+ |. . > . . .| [2:2] A => . 'a' A 'c'
196
+ |. . > . . .| [2:2] A => . 'b'
197
+ |. . [---] . .| [2:3] A => 'b' .
198
+ |. [-------> . .| [1:3] A => 'a' A . 'c'
199
+ |. . . [---] .| [3:4] A => 'a' A 'c' .
200
+ |[---------------> .| [0:4] A => 'a' A . 'c'
201
+ |. . . . [---]| [4:5] A => 'a' A 'c' .
202
+ |[===================]| [0:5] S => A .
203
+ SNIPPET
204
+ expect($stdout.string).to eq(expectations)
205
+
206
+ # Restore standard ouput stream
207
+ $stdout = prev_ostream
208
+ end
209
+ =end
210
+
211
+ it 'should parse a valid simple expression' do
212
+ instance = GFGEarleyParser.new(grammar_expr)
213
+ parse_result = instance.parse(grm2_tokens)
214
+ expect(parse_result.success?).to eq(true)
215
+ # expect(parse_result.ambiguous?).to eq(false)
216
+
217
+ ###################### S(0): . 2 + 3 * 4
218
+ # Expectation chart[0]:
219
+ expected = [
220
+ '.P | 0', # Initialization
221
+ 'P => . S | 0', # start rule
222
+ '.S | 0', # call rule
223
+ "S => . S '+' M | 0", # start rule
224
+ 'S => . M | 0', # start rule
225
+ '.M | 0', # call rule
226
+ "M => . M '*' T | 0", # start rule
227
+ 'M => . T | 0', # start rule
228
+ '.T | 0', # call rule
229
+ 'T => . integer | 0' # start rule
230
+ ]
231
+ compare_entry_texts(parse_result.chart[0], expected)
232
+
233
+
234
+ ###################### S(1): 2 . + 3 * 4
235
+ # Expectation chart[1]:
236
+ expected = [
237
+ 'T => integer . | 0', # scan '2'
238
+ 'T. | 0', # exit rule
239
+ 'M => T . | 0', # end rule
240
+ 'M. | 0', # exit rule
241
+ 'S => M . | 0', # end rule
242
+ "M => M . '*' T | 0", # end rule
243
+ 'S. | 0', # exit rule
244
+ 'P => S . | 0', # end rule
245
+ "S => S . '+' M | 0", # end rule
246
+ 'P. | 0' # exit rule
247
+ ]
248
+ compare_entry_texts(parse_result.chart[1], expected)
249
+
250
+
251
+ ###################### S(2): 2 + . 3 * 4
252
+ # Expectation chart[2]:
253
+ expected = [
254
+ "S => S '+' . M | 0", # scan '+'
255
+ '.M | 2', # call rule
256
+ "M => . M '*' T | 2", # start rule
257
+ 'M => . T | 2', # start rule
258
+ '.T | 2', # call rule
259
+ 'T => . integer | 2' # start rule
260
+ ]
261
+ compare_entry_texts(parse_result.chart[2], expected)
262
+
263
+
264
+ ###################### S(3): 2 + 3 . * 4
265
+ # Expectation chart[3]:
266
+ expected = [
267
+ 'T => integer . | 2', # scan '3'
268
+ 'T. | 2', # exit rule
269
+ 'M => T . | 2', # end rule
270
+ 'M. | 2', # exit rule
271
+ "S => S '+' M . | 0", # end rule
272
+ "M => M . '*' T | 2", # end rule
273
+ 'S. | 0', # exit rule
274
+ 'P => S . | 0', # end rule
275
+ "S => S . '+' M | 0", # end rule
276
+ 'P. | 0' # exit rule
277
+ ]
278
+ compare_entry_texts(parse_result.chart[3], expected)
279
+
280
+ ###################### S(4): 2 + 3 * . 4
281
+ # Expectation chart[4]:
282
+ expected = [
283
+ "M => M '*' . T | 2", # scan '*'
284
+ '.T | 4', # call rule
285
+ 'T => . integer | 4' # entry rule
286
+ ]
287
+ compare_entry_texts(parse_result.chart[4], expected)
288
+
289
+ ###################### S(5): 2 + 3 * 4 .
290
+ # Expectation chart[5]:
291
+ expected = [
292
+ 'T => integer . | 4', # scan '4'
293
+ 'T. | 4', # exit rule
294
+ "M => M '*' T . | 2", # end rule
295
+ 'M. | 2', # exit rule
296
+ "S => S '+' M . | 0", # end rule
297
+ "M => M . '*' T | 2", # end rule
298
+ 'S. | 0', # exit rule
299
+ 'P => S . | 0', # end rule
300
+ "S => S . '+' M | 0", # end rule
301
+ 'P. | 0' # end rule
302
+ ]
303
+ compare_entry_texts(parse_result.chart[5], expected)
304
+ end
305
+
306
+ it 'should parse a nullable grammar' do
307
+ # Simple but problematic grammar for the original Earley parser
308
+ # (based on example in D. Grune, C. Jacobs "Parsing Techniques" book)
309
+ # Ss => A A 'x';
310
+ # A => ;
311
+ t_x = Syntax::VerbatimSymbol.new('x')
312
+
313
+ builder = Syntax::GrammarBuilder.new
314
+ builder.add_terminals(t_x)
315
+ builder.add_production('Ss' => %w(A A x))
316
+ builder.add_production('A' => [])
317
+ tokens = [ Token.new('x', t_x) ]
318
+
319
+ instance = GFGEarleyParser.new(builder.grammar)
320
+ expect { instance.parse(tokens) }.not_to raise_error
321
+ parse_result = instance.parse(tokens)
322
+ expect(parse_result.success?).to eq(true)
323
+ ###################### S(0): . x
324
+ # Expectation chart[0]:
325
+ expected = [
326
+ '.Ss | 0', # Initialization
327
+ "Ss => . A A 'x' | 0", # start rule
328
+ '.A | 0', # call rule
329
+ 'A => . | 0', # start rule
330
+ 'A. | 0', # exit rule
331
+ "Ss => A . A 'x' | 0", # end rule
332
+ "Ss => A A . 'x' | 0" # end rule
333
+ ]
334
+ compare_entry_texts(parse_result.chart[0], expected)
335
+
336
+ ###################### S(1): x .
337
+ # Expectation chart[1]:
338
+ expected = [
339
+ "Ss => A A 'x' . | 0", # scan 'x'
340
+ 'Ss. | 0' # exit rule
341
+ ]
342
+ compare_entry_texts(parse_result.chart[1], expected)
343
+ end
344
+
345
+ it 'should parse an ambiguous grammar (I)' do
346
+ # Grammar 3: A ambiguous arithmetic expression language
347
+ # (based on example in article on Earley's algorithm in Wikipedia)
348
+ # P => S.
349
+ # S => S "+" S.
350
+ # S => S "*" S.
351
+ # S => L.
352
+ # L => an integer number token.
353
+ t_int = Syntax::Literal.new('integer', /[-+]?\d+/)
354
+ t_plus = Syntax::VerbatimSymbol.new('+')
355
+ t_star = Syntax::VerbatimSymbol.new('*')
356
+
357
+ builder = Syntax::GrammarBuilder.new
358
+ builder.add_terminals(t_int, t_plus, t_star)
359
+ builder.add_production('P' => 'S')
360
+ builder.add_production('S' => %w(S + S))
361
+ builder.add_production('S' => %w(S * S))
362
+ builder.add_production('S' => 'L')
363
+ builder.add_production('L' => 'integer')
364
+ input_sequence = [
365
+ {'2' => 'integer'},
366
+ '+',
367
+ {'3' => 'integer'},
368
+ '*',
369
+ {'4' => 'integer'}
370
+ ]
371
+ tokens = build_token_sequence(input_sequence, builder.grammar)
372
+ instance = GFGEarleyParser.new(builder.grammar)
373
+ expect { instance.parse(tokens) }.not_to raise_error
374
+ parse_result = instance.parse(tokens)
375
+ expect(parse_result.success?).to eq(true)
376
+ # expect(parse_result.ambiguous?).to eq(true)
377
+
378
+ ###################### S(0): . 2 + 3 * 4
379
+ # Expectation chart[0]:
380
+ expected = [
381
+ '.P | 0', # Initialization
382
+ 'P => . S | 0', # start rule
383
+ '.S | 0', # call rule
384
+ "S => . S '+' S | 0", # entry rule
385
+ "S => . S '*' S | 0", # entry rule
386
+ 'S => . L | 0', # entry rule
387
+ '.L | 0', # call rule
388
+ 'L => . integer | 0' # entry rule
389
+ ]
390
+ compare_entry_texts(parse_result.chart[0], expected)
391
+
392
+ ###################### S(1): 2 . + 3 * 4
393
+ # Expectation chart[1]:
394
+ expected = [
395
+ 'L => integer . | 0', # scan '2'
396
+ 'L. | 0', # exit rule
397
+ 'S => L . | 0', # end rule
398
+ 'S. | 0', # exit rule
399
+ 'P => S . | 0', # end rule
400
+ "S => S . '+' S | 0", # end rule
401
+ "S => S . '*' S | 0", # end rule
402
+ 'P. | 0' # exit rule
403
+ ]
404
+ compare_entry_texts(parse_result.chart[1], expected)
405
+
406
+ ###################### S(2): 2 + . 3 * 4
407
+ # Expectation chart[2]:
408
+ expected = [
409
+ "S => S '+' . S | 0", # scan '+'
410
+ '.S | 2', # call rule
411
+ "S => . S '+' S | 2", # entry rule
412
+ "S => . S '*' S | 2", # entry rule
413
+ 'S => . L | 2', # entry rule
414
+ '.L | 2', # call rule
415
+ 'L => . integer | 2' # entry rule
416
+ ]
417
+ compare_entry_texts(parse_result.chart[2], expected)
418
+
419
+ ###################### S(3): 2 + 3 . * 4
420
+ # Expectation chart[3]:
421
+ expected = [
422
+ 'L => integer . | 2', # scan '3'
423
+ 'L. | 2', # exit rule
424
+ 'S => L . | 2', # end rule
425
+ 'S. | 2', # exit rule
426
+ "S => S '+' S . | 0", # end rule
427
+ "S => S . '+' S | 2", # end rule
428
+ "S => S . '*' S | 2", # end rule
429
+ 'S. | 0', # exit rule
430
+ 'P => S . | 0', # end rule
431
+ "S => S . '+' S | 0", # end rule
432
+ "S => S . '*' S | 0", # end rule
433
+ 'P. | 0', # exit rule
434
+ ]
435
+ compare_entry_texts(parse_result.chart[3], expected)
436
+
437
+ ###################### S(4): 2 + 3 * . 4
438
+ # Expectation chart[4]:
439
+ expected = [
440
+ "S => S '*' . S | 2", # scan '*'
441
+ "S => S '*' . S | 0", # scan '*'
442
+ '.S | 4', # call rule
443
+ "S => . S '+' S | 4", # entry rule
444
+ "S => . S '*' S | 4", # entry rule
445
+ 'S => . L | 4', # entry rule
446
+ '.L | 4', # call rule
447
+ 'L => . integer | 4' # entry rule
448
+ ]
449
+ compare_entry_texts(parse_result.chart[4], expected)
450
+
451
+ ###################### S(5): 2 + 3 * 4 .
452
+ # Expectation chart[5]:
453
+ expected = [
454
+ 'L => integer . | 4', # scan '4'
455
+ 'L. | 4', # exit rule
456
+ 'S => L . | 4', # end rule
457
+ 'S. | 4', # exit rule
458
+ "S => S '*' S . | 2", # end rule
459
+ "S => S '*' S . | 0", # end rule
460
+ "S => S . '+' S | 4", # end rule
461
+ "S => S . '*' S | 4", # end rule
462
+ 'S. | 2', # exit rule
463
+ 'S. | 0', # exit rule
464
+ "S => S '+' S . | 0", # end rule
465
+ "S => S . '+' S | 2", # end rule
466
+ "S => S . '*' S | 2", # end rule
467
+ 'P => S . | 0', # end rule
468
+ "S => S . '+' S | 0", # end rule
469
+ "S => S . '*' S | 0", # end rule
470
+ 'P. | 0' # exit rule
471
+ ]
472
+ compare_entry_texts(parse_result.chart[5], expected)
473
+
474
+ expected_antecedents = {
475
+ 'L => integer . | 4' => ['L => . integer | 4'],
476
+ 'L. | 4' => ['L => integer . | 4'],
477
+ 'S => L . | 4' => ['L. | 4'],
478
+ 'S. | 4' => ['S => L . | 4'],
479
+ "S => S '*' S . | 2" => ['S. | 4'],
480
+ "S => S '*' S . | 0" => ['S. | 4'],
481
+ "S => S . '+' S | 4" => ['S. | 4'],
482
+ "S => S . '*' S | 4" => ['S. | 4'],
483
+ 'S. | 2' => ["S => S '*' S . | 2"],
484
+ 'S. | 0' => ["S => S '*' S . | 0", "S => S '+' S . | 0"],
485
+ "S => S '+' S . | 0" => ['S. | 2'],
486
+ "S => S . '+' S | 2" => ['S. | 2'],
487
+ "S => S . '*' S | 2" => ['S. | 2'],
488
+ 'P => S . | 0' => ['S. | 0'],
489
+ "S => S . '+' S | 0" => ['S. | 0'],
490
+ "S => S . '*' S | 0" => ['S. | 0'],
491
+ 'P. | 0' => ['P => S . | 0']
492
+ }
493
+ check_antecedence(parse_result, 5, expected_antecedents)
494
+ end
495
+
496
+ it 'should parse an ambiguous grammar (II)' do
497
+ extend(AmbiguousGrammarHelper)
498
+ grammar = grammar_builder.grammar
499
+ instance = GFGEarleyParser.new(grammar)
500
+ tokens = tokenize('abc + def + ghi', grammar)
501
+ expect { instance.parse(tokens) }.not_to raise_error
502
+ parse_result = instance.parse(tokens)
503
+ expect(parse_result.success?).to eq(true)
504
+ # expect(parse_result.ambiguous?).to eq(true)
505
+
506
+ ###################### S(0): . abc + def + ghi
507
+ # Expectation chart[0]:
508
+ expected = [
509
+ '.S | 0', # Initialization
510
+ 'S => . E | 0', # start rule
511
+ '.E | 0', # call rule
512
+ 'E => . E + E | 0', # start rule
513
+ 'E => . id | 0' # start rule
514
+ ]
515
+ compare_entry_texts(parse_result.chart[0], expected)
516
+
517
+ ###################### S(1): abc . + def + ghi
518
+ # Expectation chart[1]:
519
+ expected = [
520
+ 'E => id . | 0', # scan 'abc'
521
+ 'E. | 0', # exit rule
522
+ 'S => E . | 0', # end rule
523
+ 'E => E . + E | 0', # end rule
524
+ 'S. | 0' # exit rule
525
+ ]
526
+ compare_entry_texts(parse_result.chart[1], expected)
527
+
528
+ ###################### S(2): abc + . def + ghi
529
+ # Expectation chart[2]:
530
+ expected = [
531
+ 'E => E + . E | 0', # Scan '+'
532
+ '.E | 2', # call rule
533
+ 'E => . E + E | 2', # entry rule
534
+ 'E => . id | 2' # entry rule
535
+ ]
536
+ compare_entry_texts(parse_result.chart[2], expected)
537
+
538
+ ###################### S(3): abc + def . + ghi
539
+ # Expectation chart[3]:
540
+ expected = [
541
+ 'E => id . | 2', # Scan 'def'
542
+ 'E. | 2', # exit rule
543
+ 'E => E + E . | 0', # end rule
544
+ 'E => E . + E | 2', # end rule
545
+ 'E. | 0', # exit rule
546
+ 'S => E . | 0', # end rule
547
+ 'E => E . + E | 0', # end rule
548
+ 'S. | 0' # exit rule
549
+ ]
550
+ compare_entry_texts(parse_result.chart[3], expected)
551
+
552
+ ###################### S(4): abc + def + . ghi
553
+ # Expectation chart[4]:
554
+ expected = [
555
+ 'E => E + . E | 2', # Scan '+'
556
+ 'E => E + . E | 0', # Scan '+'
557
+ '.E | 4', # call rule
558
+ 'E => . E + E | 4', # start rule
559
+ 'E => . id | 4' # start rule
560
+ ]
561
+ compare_entry_texts(parse_result.chart[4], expected)
562
+
563
+ ###################### S(5): abc + def + ghi .
564
+ # Expectation chart[5]:
565
+ expected = [
566
+ 'E => id . | 4', # Scan 'ghi'
567
+ 'E. | 4', # exit rule
568
+ 'E => E + E . | 2', # end rule
569
+ 'E => E + E . | 0', # end rule
570
+ 'E => E . + E | 4', # end rule
571
+ 'E. | 2', # exit rule
572
+ 'E. | 0', # exit rule
573
+ 'E => E . + E | 2', # end rule
574
+ 'S => E . | 0', # end rule
575
+ 'E => E . + E | 0', # end rule
576
+ 'S. | 0', # exit rule
577
+ ]
578
+ compare_entry_texts(parse_result.chart[5], expected)
579
+ end
580
+
581
+ it 'should parse an invalid simple input' do
582
+ # Parse an erroneous input (b is missing)
583
+ wrong = build_token_sequence(%w(a a c c), grammar_abc)
584
+
585
+ err_msg = <<-MSG
586
+ Syntax error at or near token 3>>>c<<<:
587
+ Expected one of: ['a', 'b'], found a 'c' instead.
588
+ MSG
589
+ err = StandardError
590
+ expect { subject.parse(wrong) }
591
+ .to raise_error(err, err_msg.chomp)
592
+ end
593
+
594
+ it 'should parse a common sample' do
595
+ # Grammar based on example found in paper of K. Pingali, G. Bilardi:
596
+ # "A Graphical Model for Context-Free Gammar Parsing"
597
+ t_int = Syntax::Literal.new('int', /[-+]?\d+/)
598
+ t_plus = Syntax::VerbatimSymbol.new('+')
599
+ t_lparen = Syntax::VerbatimSymbol.new('(')
600
+ t_rparen = Syntax::VerbatimSymbol.new(')')
601
+
602
+ builder = Syntax::GrammarBuilder.new
603
+ builder.add_terminals(t_int, t_plus, t_lparen, t_rparen)
604
+ builder.add_production('S' => 'E')
605
+ builder.add_production('E' => 'int')
606
+ builder.add_production('E' => %w[( E + E )])
607
+ builder.add_production('E' => %w( E + E ))
608
+ input_sequence = [
609
+ {'7' => 'int'},
610
+ '+',
611
+ {'8' => 'int'},
612
+ '+',
613
+ {'9' => 'int'}
614
+ ]
615
+ tokens = build_token_sequence(input_sequence, builder.grammar)
616
+ instance = GFGEarleyParser.new(builder.grammar)
617
+ parse_result = instance.parse(tokens)
618
+ expect(parse_result.success?).to eq(true)
619
+ ###################### S(0) == . 7 + 8 + 9
620
+ # Expectation chart[0]:
621
+ expected = [
622
+ '.S | 0', # initialization
623
+ 'S => . E | 0', # start rule
624
+ '.E | 0', # call rule
625
+ 'E => . int | 0', # start rule
626
+ "E => . '(' E '+' E ')' | 0", # start rule
627
+ "E => . E '+' E | 0" # start rule
628
+ ]
629
+ compare_entry_texts(parse_result.chart[0], expected)
630
+
631
+ ###################### S(1) == 7 . + 8 + 9
632
+ # Expectation chart[1]:
633
+ expected = [
634
+ 'E => int . | 0', # scan '7'
635
+ 'E. | 0', # exit rule
636
+ 'S => E . | 0', # end rule
637
+ "E => E . '+' E | 0", # end rule
638
+ 'S. | 0' # exit rule
639
+ ]
640
+ compare_entry_texts(parse_result.chart[1], expected)
641
+
642
+ ###################### S(2) == 7 + . 8 + 9
643
+ # Expectation chart[2]:
644
+ expected = [
645
+ "E => E '+' . E | 0", # scan '+'
646
+ '.E | 2', # exit rule
647
+ 'E => . int | 2', # start rule
648
+ "E => . '(' E '+' E ')' | 2", # start rule
649
+ "E => . E '+' E | 2" # start rule
650
+ ]
651
+ compare_entry_texts(parse_result.chart[2], expected)
652
+
653
+ ###################### S(3) == 7 + 8 . + 9
654
+ # Expectation chart[3]:
655
+ expected = [
656
+ 'E => int . | 2', # scan '8'
657
+ 'E. | 2', # exit rule
658
+ "E => E '+' E . | 0", # end rule
659
+ "E => E . '+' E | 2", # end rule
660
+ 'E. | 0', # exit rule
661
+ 'S => E . | 0', # end rule
662
+ "E => E . '+' E | 0", # end rule
663
+ 'S. | 0' # exit rule
664
+ ]
665
+ compare_entry_texts(parse_result.chart[3], expected)
666
+
667
+ ###################### S(4) == 7 + 8 + . 9
668
+ # Expectation chart[4]:
669
+ expected = [
670
+ "E => E '+' . E | 2", # scan '+'
671
+ "E => E '+' . E | 0", # scan '+'
672
+ '.E | 4', # exit rule
673
+ 'E => . int | 4', # start rule
674
+ "E => . '(' E '+' E ')' | 4", # start rule
675
+ "E => . E '+' E | 4" # start rule
676
+ ]
677
+ compare_entry_texts(parse_result.chart[4], expected)
678
+
679
+ ###################### S(5) == 7 + 8 + 9 .
680
+ # Expectation chart[5]:
681
+ expected = [
682
+ 'E => int . | 4', # scan '9'
683
+ 'E. | 4', # exit rule
684
+ "E => E '+' E . | 2", # end rule
685
+ "E => E '+' E . | 0", # end rule
686
+ "E => E . '+' E | 4", # exit rule (not shown in paper)
687
+ 'E. | 2', # exit rule
688
+ 'E. | 0', # exit rule
689
+ "E => E . '+' E | 2", # end rule
690
+ 'S => E . | 0', # end rule
691
+ "E => E . '+' E | 0", # end rule
692
+ 'S. | 0'
693
+ ]
694
+ compare_entry_texts(parse_result.chart[5], expected)
695
+ end
696
+
697
+ it 'should parse a grammar with nullable nonterminals' do
698
+ # Grammar 4: A grammar with nullable nonterminal
699
+ # based on example from "Parsing Techniques" book
700
+ # (D. Grune, C. Jabobs)
701
+ # Z ::= E.
702
+ # E ::= E Q F.
703
+ # E ::= F.
704
+ # F ::= a.
705
+ # Q ::= *.
706
+ # Q ::= /.
707
+ # Q ::=.
708
+ t_a = Syntax::VerbatimSymbol.new('a')
709
+ t_star = Syntax::VerbatimSymbol.new('*')
710
+ t_slash = Syntax::VerbatimSymbol.new('/')
711
+
712
+ builder = Syntax::GrammarBuilder.new
713
+ builder.add_terminals(t_a, t_star, t_slash)
714
+ builder.add_production('Z' => 'E')
715
+ builder.add_production('E' => %w(E Q F))
716
+ builder.add_production('E' => 'F')
717
+ builder.add_production('F' => t_a)
718
+ builder.add_production('Q' => t_star)
719
+ builder.add_production('Q' => t_slash)
720
+ builder.add_production('Q' => []) # Empty production
721
+
722
+ tokens = build_token_sequence(%w(a a / a), builder.grammar)
723
+ instance = GFGEarleyParser.new(builder.grammar)
724
+ expect { instance.parse(tokens) }.not_to raise_error
725
+ parse_result = instance.parse(tokens)
726
+ expect(parse_result.success?).to eq(true)
727
+
728
+ ###################### S(0) == . a a / a
729
+ # Expectation chart[0]:
730
+ expected = [
731
+ '.Z | 0', # initialization
732
+ 'Z => . E | 0', # start rule
733
+ '.E | 0', # call rule
734
+ 'E => . E Q F | 0', # start rule
735
+ 'E => . F | 0', # start rule
736
+ '.F | 0', # call rule
737
+ "F => . 'a' | 0" # start rule
738
+ ]
739
+ compare_entry_texts(parse_result.chart[0], expected)
740
+
741
+ ###################### S(1) == a . a / a
742
+ # Expectation chart[1]:
743
+ expected = [
744
+ "F => 'a' . | 0", # scan 'a'
745
+ 'F. | 0', # exit rule
746
+ 'E => F . | 0', # end rule
747
+ 'E. | 0', # exit rule
748
+ 'Z => E . | 0', # end rule
749
+ 'E => E . Q F | 0', # end rule
750
+ 'Z. | 0', # exit rule
751
+ '.Q | 1', # call rule
752
+ "Q => . '*' | 1", # start rule
753
+ "Q => . '/' | 1", # start rule
754
+ 'Q => . | 1', # start rule
755
+ 'Q. | 1', # exit rule
756
+ 'E => E Q . F | 0', # end rule
757
+ '.F | 1', # call rule
758
+ "F => . 'a' | 1" # start rule
759
+ ]
760
+ compare_entry_texts(parse_result.chart[1], expected)
761
+
762
+ ###################### S(2) == a a . / a
763
+ # Expectation chart[2]:
764
+ expected = [
765
+ "F => 'a' . | 1", # scan 'a'
766
+ 'F. | 1', # exit rule
767
+ 'E => E Q F . | 0', # end rule
768
+ 'E. | 0', # exit rule
769
+ 'Z => E . | 0', # end rule
770
+ 'E => E . Q F | 0', # end rule
771
+ 'Z. | 0', # exit rule
772
+ '.Q | 2', # call rule
773
+ "Q => . '*' | 2", # start rule
774
+ "Q => . '/' | 2", # start rule
775
+ 'Q => . | 2', # start rule
776
+ 'Q. | 2', # exit rule
777
+ 'E => E Q . F | 0', # end rule
778
+ '.F | 2', # call rule
779
+ "F => . 'a' | 2" # start rule
780
+ ]
781
+ compare_entry_texts(parse_result.chart[2], expected)
782
+
783
+
784
+ ###################### S(3) == a a / . a
785
+ # Expectation chart[3]:
786
+ expected = [
787
+ "Q => '/' . | 2", # scan '/'
788
+ 'Q. | 2', # exit rule
789
+ 'E => E Q . F | 0', # end rule
790
+ '.F | 3', # call rule
791
+ "F => . 'a' | 3" # entry rule
792
+ ]
793
+ compare_entry_texts(parse_result.chart[3], expected)
794
+
795
+
796
+ ###################### S(4) == a a / a .
797
+ # Expectation chart[4]:
798
+ expected = [
799
+ "F => 'a' . | 3", # scan 'a'
800
+ 'F. | 3', # exit rule
801
+ 'E => E Q F . | 0', # end rule
802
+ 'E. | 0', # exit rule
803
+ 'Z => E . | 0', # end rule
804
+ 'E => E . Q F | 0', # end rule
805
+ 'Z. | 0', # exit rule
806
+ '.Q | 4', # call rule
807
+ "Q => . '*' | 4", # start rule
808
+ "Q => . '/' | 4", # start rule
809
+ 'Q => . | 4', # start rule
810
+ 'Q. | 4', # exit rule
811
+ 'E => E Q . F | 0', # end rule
812
+ '.F | 4', # call rule
813
+ "F => . 'a' | 4" # entry rule
814
+ ]
815
+ compare_entry_texts(parse_result.chart[4], expected)
816
+ end
817
+
818
+ it 'should parse a right recursive grammar' do
819
+ # Simple right recursive grammar
820
+ # based on example in D. Grune, C. Jacobs "Parsing Techniques" book
821
+ # pp. 224 et sq.
822
+ # S => a S;
823
+ # S => ;
824
+ # This grammar requires a time that is quadratic in the number of
825
+ # input tokens
826
+
827
+ t_x = Syntax::VerbatimSymbol.new('x')
828
+
829
+ builder = Syntax::GrammarBuilder.new
830
+ builder.add_terminals('a')
831
+ builder.add_production('S' => %w(a S))
832
+ builder.add_production('S' => [])
833
+ grammar = builder.grammar
834
+ tokens = build_token_sequence(%w(a a a a), grammar)
835
+
836
+ instance = GFGEarleyParser.new(grammar)
837
+ parse_result = instance.parse(tokens)
838
+ expect(parse_result.success?).to eq(true)
839
+ ###################### S(0): . a a a a
840
+ # Expectation chart[0]:
841
+ expected = [
842
+ '.S | 0', # Initialization
843
+ 'S => . a S | 0', # start rule
844
+ 'S => . | 0', # start rule
845
+ 'S. | 0' # exit rule
846
+ ]
847
+ compare_entry_texts(parse_result.chart[0], expected)
848
+
849
+ ###################### S(1): a . a a a
850
+ # Expectation chart[1]:
851
+ expected = [
852
+ 'S => a . S | 0', # scan 'a'
853
+ '.S | 1', # call rule
854
+ 'S => . a S | 1', # start rule
855
+ 'S => . | 1', # start rule
856
+ 'S. | 1', # exit rule
857
+ 'S => a S . | 0' # end rule
858
+ ]
859
+ compare_entry_texts(parse_result.chart[1], expected)
860
+
861
+ ###################### S(2): a a . a a
862
+ # Expectation chart[2]:
863
+ expected = [
864
+ 'S => a . S | 1', # scan 'a'
865
+ '.S | 2', # call rule
866
+ 'S => . a S | 2', # start rule
867
+ 'S => . | 2', # start rule
868
+ 'S. | 2', # exit rule
869
+ 'S => a S . | 1', # end rule
870
+ 'S. | 1', # exit rule
871
+ 'S => a S . | 0', # end rule
872
+ 'S. | 0' # exit rule
873
+ ]
874
+ compare_entry_texts(parse_result.chart[2], expected)
875
+
876
+ ###################### S(3): a a a . a
877
+ # Expectation chart[3]:
878
+ expected = [
879
+ 'S => a . S | 2', # scan 'a'
880
+ '.S | 3', # call rule
881
+ 'S => . a S | 3', # start rule
882
+ 'S => . | 3', # start rule
883
+ 'S. | 3', # exit rule
884
+ 'S => a S . | 2', # end rule
885
+ 'S. | 2', # exit rule
886
+ 'S => a S . | 1', # end rule
887
+ 'S. | 1', # exit rule
888
+ 'S => a S . | 0', # end rule
889
+ 'S. | 0' # exit rule
890
+ ]
891
+ compare_entry_texts(parse_result.chart[3], expected)
892
+
893
+ ###################### S(4): a a a a .
894
+ # Expectation chart[4]:
895
+ expected = [
896
+ 'S => a . S | 3', # scan 'a'
897
+ '.S | 4', # call rule
898
+ 'S => . a S | 4', # start rule
899
+ 'S => . | 4', # start rule
900
+ 'S. | 4', # exit rule
901
+ 'S => a S . | 3', # end rule
902
+ 'S. | 3', # exit rule
903
+ 'S => a S . | 2', # end rule
904
+ 'S. | 2', # exit rule
905
+ 'S => a S . | 1', # end rule
906
+ 'S. | 1', # exit rule
907
+ 'S => a S . | 0', # end rule
908
+ 'S. | 0' # exit rule
909
+ ]
910
+ compare_entry_texts(parse_result.chart[4], expected)
911
+ end
912
+
913
+ end # context
914
+ end # describe
915
+ end # module
916
+ end # module
917
+
918
+ # End of module