rley 0.7.06 → 0.8.01

Sign up to get free protection for your applications and to get access to all the features.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +362 -62
  3. data/.travis.yml +6 -6
  4. data/CHANGELOG.md +20 -4
  5. data/LICENSE.txt +1 -1
  6. data/README.md +7 -7
  7. data/examples/NLP/engtagger.rb +193 -190
  8. data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
  9. data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
  10. data/examples/NLP/pico_en_demo.rb +2 -2
  11. data/examples/data_formats/JSON/cli_options.rb +1 -1
  12. data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
  13. data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
  14. data/examples/data_formats/JSON/json_demo.rb +1 -2
  15. data/examples/data_formats/JSON/json_grammar.rb +13 -13
  16. data/examples/data_formats/JSON/json_lexer.rb +8 -8
  17. data/examples/data_formats/JSON/json_minifier.rb +1 -1
  18. data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
  19. data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
  20. data/examples/general/calc_iter1/calc_grammar.rb +7 -6
  21. data/examples/general/calc_iter1/calc_lexer.rb +6 -4
  22. data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
  23. data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
  24. data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
  25. data/examples/general/calc_iter2/calc_grammar.rb +12 -12
  26. data/examples/general/calc_iter2/calc_lexer.rb +11 -10
  27. data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
  28. data/examples/general/left.rb +2 -2
  29. data/examples/general/right.rb +2 -2
  30. data/lib/rley.rb +1 -1
  31. data/lib/rley/base/dotted_item.rb +28 -31
  32. data/lib/rley/base/grm_items_builder.rb +6 -0
  33. data/lib/rley/constants.rb +2 -2
  34. data/lib/rley/engine.rb +22 -25
  35. data/lib/rley/formatter/asciitree.rb +3 -3
  36. data/lib/rley/formatter/bracket_notation.rb +1 -8
  37. data/lib/rley/formatter/debug.rb +6 -6
  38. data/lib/rley/formatter/json.rb +2 -2
  39. data/lib/rley/gfg/call_edge.rb +1 -1
  40. data/lib/rley/gfg/edge.rb +5 -5
  41. data/lib/rley/gfg/end_vertex.rb +2 -6
  42. data/lib/rley/gfg/epsilon_edge.rb +1 -5
  43. data/lib/rley/gfg/grm_flow_graph.rb +27 -23
  44. data/lib/rley/gfg/item_vertex.rb +10 -10
  45. data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
  46. data/lib/rley/gfg/scan_edge.rb +1 -1
  47. data/lib/rley/gfg/shortcut_edge.rb +2 -2
  48. data/lib/rley/gfg/start_vertex.rb +4 -8
  49. data/lib/rley/gfg/vertex.rb +43 -39
  50. data/lib/rley/interface.rb +16 -0
  51. data/lib/rley/lexical/token_range.rb +6 -6
  52. data/lib/rley/notation/all_notation_nodes.rb +2 -0
  53. data/lib/rley/notation/ast_builder.rb +191 -0
  54. data/lib/rley/notation/ast_node.rb +44 -0
  55. data/lib/rley/notation/ast_visitor.rb +113 -0
  56. data/lib/rley/notation/grammar.rb +49 -0
  57. data/lib/rley/notation/grammar_builder.rb +504 -0
  58. data/lib/rley/notation/grouping_node.rb +23 -0
  59. data/lib/rley/notation/parser.rb +56 -0
  60. data/lib/rley/notation/sequence_node.rb +35 -0
  61. data/lib/rley/notation/symbol_node.rb +29 -0
  62. data/lib/rley/notation/tokenizer.rb +192 -0
  63. data/lib/rley/parse_forest_visitor.rb +5 -5
  64. data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
  65. data/lib/rley/parse_rep/cst_builder.rb +5 -6
  66. data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
  67. data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
  68. data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
  69. data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
  70. data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
  71. data/lib/rley/parse_tree_visitor.rb +1 -1
  72. data/lib/rley/parser/error_reason.rb +4 -5
  73. data/lib/rley/parser/gfg_chart.rb +118 -26
  74. data/lib/rley/parser/gfg_parsing.rb +22 -33
  75. data/lib/rley/parser/parse_entry.rb +25 -31
  76. data/lib/rley/parser/parse_entry_set.rb +19 -16
  77. data/lib/rley/parser/parse_entry_tracker.rb +4 -4
  78. data/lib/rley/parser/parse_tracer.rb +13 -13
  79. data/lib/rley/parser/parse_walker_factory.rb +23 -28
  80. data/lib/rley/ptree/non_terminal_node.rb +7 -5
  81. data/lib/rley/ptree/parse_tree.rb +3 -3
  82. data/lib/rley/ptree/parse_tree_node.rb +5 -5
  83. data/lib/rley/ptree/terminal_node.rb +7 -7
  84. data/lib/rley/rley_error.rb +12 -12
  85. data/lib/rley/sppf/alternative_node.rb +6 -6
  86. data/lib/rley/sppf/composite_node.rb +7 -7
  87. data/lib/rley/sppf/epsilon_node.rb +3 -3
  88. data/lib/rley/sppf/leaf_node.rb +3 -3
  89. data/lib/rley/sppf/parse_forest.rb +16 -16
  90. data/lib/rley/sppf/sppf_node.rb +7 -8
  91. data/lib/rley/sppf/token_node.rb +3 -3
  92. data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
  93. data/lib/rley/syntax/grammar.rb +5 -5
  94. data/lib/rley/syntax/grm_symbol.rb +7 -7
  95. data/lib/rley/syntax/match_closest.rb +43 -0
  96. data/lib/rley/syntax/non_terminal.rb +9 -15
  97. data/lib/rley/syntax/production.rb +16 -10
  98. data/lib/rley/syntax/symbol_seq.rb +7 -9
  99. data/lib/rley/syntax/terminal.rb +4 -5
  100. data/lib/rley/syntax/verbatim_symbol.rb +3 -3
  101. data/lib/support/base_tokenizer.rb +19 -18
  102. data/spec/rley/base/dotted_item_spec.rb +2 -2
  103. data/spec/rley/engine_spec.rb +23 -21
  104. data/spec/rley/formatter/asciitree_spec.rb +7 -7
  105. data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
  106. data/spec/rley/formatter/json_spec.rb +1 -1
  107. data/spec/rley/gfg/end_vertex_spec.rb +5 -5
  108. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
  109. data/spec/rley/gfg/item_vertex_spec.rb +10 -10
  110. data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
  111. data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
  112. data/spec/rley/gfg/start_vertex_spec.rb +5 -5
  113. data/spec/rley/gfg/vertex_spec.rb +3 -3
  114. data/spec/rley/lexical/token_range_spec.rb +16 -16
  115. data/spec/rley/lexical/token_spec.rb +2 -2
  116. data/spec/rley/notation/grammar_builder_spec.rb +302 -0
  117. data/spec/rley/notation/parser_spec.rb +184 -0
  118. data/spec/rley/notation/tokenizer_spec.rb +370 -0
  119. data/spec/rley/parse_forest_visitor_spec.rb +165 -163
  120. data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
  121. data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
  122. data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
  123. data/spec/rley/parse_rep/groucho_spec.rb +24 -26
  124. data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
  125. data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
  126. data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
  127. data/spec/rley/parse_tree_visitor_spec.rb +10 -8
  128. data/spec/rley/parser/dangling_else_spec.rb +445 -0
  129. data/spec/rley/parser/error_reason_spec.rb +6 -6
  130. data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
  131. data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
  132. data/spec/rley/parser/parse_entry_spec.rb +19 -19
  133. data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
  134. data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
  135. data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
  136. data/spec/rley/ptree/terminal_node_spec.rb +6 -6
  137. data/spec/rley/sppf/alternative_node_spec.rb +6 -6
  138. data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
  139. data/spec/rley/sppf/token_node_spec.rb +4 -4
  140. data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
  141. data/spec/rley/support/grammar_abc_helper.rb +3 -5
  142. data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
  143. data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
  144. data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
  145. data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
  146. data/spec/rley/support/grammar_l0_helper.rb +14 -17
  147. data/spec/rley/support/grammar_pb_helper.rb +8 -7
  148. data/spec/rley/support/grammar_sppf_helper.rb +3 -3
  149. data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
  150. data/spec/rley/syntax/grammar_spec.rb +6 -6
  151. data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
  152. data/spec/rley/syntax/match_closest_spec.rb +46 -0
  153. data/spec/rley/syntax/non_terminal_spec.rb +8 -8
  154. data/spec/rley/syntax/production_spec.rb +17 -13
  155. data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
  156. data/spec/rley/syntax/terminal_spec.rb +5 -5
  157. data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
  158. data/spec/spec_helper.rb +0 -12
  159. data/spec/support/base_tokenizer_spec.rb +7 -2
  160. metadata +48 -74
  161. data/.simplecov +0 -7
  162. data/lib/rley/parser/parse_state.rb +0 -83
  163. data/lib/rley/parser/parse_state_tracker.rb +0 -59
  164. data/lib/rley/parser/state_set.rb +0 -101
  165. data/spec/rley/parser/parse_state_spec.rb +0 -125
  166. data/spec/rley/parser/parse_tracer_spec.rb +0 -200
  167. data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -4,7 +4,7 @@ require_relative '../../spec_helper'
4
4
 
5
5
  require_relative '../../../lib/rley/parser/gfg_earley_parser'
6
6
 
7
- require_relative '../../../lib/rley/syntax/grammar_builder'
7
+ require_relative '../../../lib/rley/syntax/base_grammar_builder'
8
8
  require_relative '../support/grammar_helper'
9
9
  require_relative '../support/expectation_helper'
10
10
 
@@ -22,15 +22,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
22
22
  # "SPPF-Style Parsing From Earley Recognizers" in
23
23
  # Notes in Theoretical Computer Science 203, (2008), pp. 53-67
24
24
  # contains a hidden left recursion and a cycle
25
- builder = Syntax::GrammarBuilder.new do
25
+ builder = Syntax::BaseGrammarBuilder.new do
26
26
  add_terminals('a', 'b')
27
27
  rule 'Phi' => 'S'
28
- rule 'S' => %w[A T]
29
- rule 'S' => %w[a T]
28
+ rule 'S' => 'A T'
29
+ rule 'S' => 'a T'
30
30
  rule 'A' => 'a'
31
- rule 'A' => %w[B A]
31
+ rule 'A' => 'B A'
32
32
  rule 'B' => []
33
- rule 'T' => %w[b b b]
33
+ rule 'T' => 'b b b'
34
34
  end
35
35
  builder.grammar
36
36
  end
@@ -49,11 +49,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
49
49
  end
50
50
 
51
51
  # Emit a text representation of the current path.
52
- def path_to_s()
52
+ def path_to_s
53
53
  text_parts = subject.curr_path.map do |path_element|
54
54
  path_element.to_string(0)
55
55
  end
56
- return text_parts.join('/')
56
+ text_parts.join('/')
57
57
  end
58
58
 
59
59
  context 'Initialization:' do
@@ -3,7 +3,7 @@
3
3
  require_relative '../../spec_helper'
4
4
 
5
5
  require_relative '../../../lib/rley/parser/gfg_earley_parser'
6
- require_relative '../../../lib/rley/syntax/grammar_builder'
6
+ require_relative '../../../lib/rley/syntax/base_grammar_builder'
7
7
  require_relative '../support/grammar_helper'
8
8
  require_relative '../support/grammar_abc_helper'
9
9
  require_relative '../support/expectation_helper'
@@ -38,11 +38,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
38
38
  end
39
39
 
40
40
  # Emit a text representation of the current path.
41
- def path_to_s()
41
+ def path_to_s
42
42
  text_parts = subject.curr_path.map do |path_element|
43
43
  path_element.to_string(0)
44
44
  end
45
- return text_parts.join('/')
45
+ text_parts.join('/')
46
46
  end
47
47
 
48
48
 
@@ -11,7 +11,7 @@ require_relative './support/grammar_sppf_helper'
11
11
  require_relative '../../lib/rley/parse_tree_visitor'
12
12
 
13
13
  module Rley # Open this namespace to avoid module qualifier prefixes
14
- describe ParseTreeVisitor do
14
+ describe ParseTreeVisitor do
15
15
  let(:grammar_abc) do
16
16
  sandbox = Object.new
17
17
  sandbox.extend(GrammarABCHelper)
@@ -45,7 +45,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
45
45
  # Capital letters represent non-terminal nodes
46
46
  let(:grm_abc_ptree1) do
47
47
  engine = Rley::Engine.new
48
- engine.use_grammar(grammar_abc)
48
+ engine.use_grammar(grammar_abc)
49
49
  parse_result = engine.parse(grm_abc_tokens1)
50
50
  ptree = engine.convert(parse_result)
51
51
  ptree
@@ -161,6 +161,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
161
161
  subject.end_visit_ptree(grm_abc_ptree1)
162
162
  end
163
163
 
164
+ # rubocop: disable Naming/VariableNumber
164
165
  it 'should begin the visit when requested' do
165
166
  # Reminder: parse tree structure is
166
167
  # S[0,5]
@@ -209,11 +210,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
209
210
  expectations.each do |(msg, args)|
210
211
  expect(listener1).to receive(msg).with(*args).ordered
211
212
  end
212
-
213
+
213
214
  # Here we go...
214
215
  subject.start
215
216
  end
216
-
217
+
217
218
  it 'should also visit in pre-order' do
218
219
  # Reminder: parse tree structure is
219
220
  # S[0,5]
@@ -228,7 +229,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
228
229
  root = grm_abc_ptree1.root
229
230
  # Here we defeat encapsulation for the good cause
230
231
  subject.instance_variable_set(:@traversal, :pre_order)
231
-
232
+
232
233
  children = root.subnodes
233
234
  big_a_1 = children[0]
234
235
  big_a_1_children = big_a_1.subnodes
@@ -239,7 +240,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
239
240
  expectations = [
240
241
  [:before_ptree, [grm_abc_ptree1]]
241
242
  # TODO: fix this test
242
- # [:before_subnodes, [root, children]],
243
+ # [:before_subnodes, [root, children]],
243
244
  # [:before_non_terminal, [root]],
244
245
 
245
246
  # [:before_non_terminal, [big_a_1]],
@@ -267,10 +268,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
267
268
  expectations.each do |(msg, args)|
268
269
  expect(listener1).to receive(msg).with(*args).ordered
269
270
  end
270
-
271
+
271
272
  # Here we go...
272
273
  subject.start
273
- end
274
+ end
275
+ # rubocop: enable Naming/VariableNumber
274
276
  end # context
275
277
  end # describe
276
278
  end # module
@@ -0,0 +1,445 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require 'stringio'
5
+ require_relative '../../../lib/rley/syntax/match_closest'
6
+ require_relative '../../../lib/rley/syntax/non_terminal'
7
+ require_relative '../../../lib/rley/syntax/production'
8
+ require_relative '../../../lib/rley/syntax/base_grammar_builder'
9
+ require_relative '../../../lib/rley/lexical/token'
10
+ require_relative '../../../lib/rley/base/dotted_item'
11
+ require_relative '../../../lib/rley/parser/gfg_parsing'
12
+
13
+ require_relative '../support/expectation_helper'
14
+
15
+ # Load the class under test
16
+ require_relative '../../../lib/rley/parser/gfg_earley_parser'
17
+
18
+ module Rley # Open this namespace to avoid module qualifier prefixes
19
+ module Parser # Open this namespace to avoid module qualifier prefixes
20
+ describe GFGEarleyParser do
21
+ include ExpectationHelper # Mix-in with expectation on parse entry sets
22
+
23
+ Keyword = {
24
+ 'else' => 'ELSE',
25
+ 'false' => 'FALSE',
26
+ 'if' => 'IF',
27
+ 'then' => 'THEN',
28
+ 'true' => 'TRUE'
29
+ }.freeze
30
+
31
+ def tokenizer(aTextToParse)
32
+ scanner = StringScanner.new(aTextToParse)
33
+ tokens = []
34
+
35
+ loop do
36
+ scanner.skip(/\s+/)
37
+ break if scanner.eos?
38
+ curr_pos = scanner.pos
39
+ lexeme = scanner.scan(/\S+/)
40
+
41
+ term_name = Keyword[lexeme]
42
+ unless term_name
43
+ if lexeme =~ /\d+/
44
+ term_name = 'INTEGER'
45
+ else
46
+ err_msg = "Unknown token '#{lexeme}'"
47
+ raise StandardError, err_msg
48
+ end
49
+ end
50
+ pos = Rley::Lexical::Position.new(1, curr_pos + 1)
51
+ tokens << Rley::Lexical::Token.new(lexeme, term_name, pos)
52
+ end
53
+
54
+ tokens
55
+ end
56
+
57
+ let(:input) { 'if false then if true then 1 else 2' }
58
+
59
+ context 'Ambiguous parse: ' do
60
+ # Factory method. Creates a grammar builder for a simple grammar.
61
+ def grammar_if_else_amb
62
+ builder = Rley::Syntax::BaseGrammarBuilder.new do
63
+ add_terminals('IF', 'THEN', 'ELSE')
64
+ add_terminals('FALSE', 'TRUE', 'INTEGER')
65
+
66
+ rule 'program' => 'stmt'
67
+ rule 'stmt' => 'IF boolean THEN stmt'
68
+ rule 'stmt' => 'IF boolean THEN stmt ELSE stmt'
69
+ rule 'stmt' => 'literal'
70
+ rule 'literal' => 'boolean'
71
+ rule 'literal' => 'INTEGER'
72
+ rule 'boolean' => 'FALSE'
73
+ rule 'boolean' => 'TRUE'
74
+ end
75
+
76
+ builder.grammar
77
+ end
78
+
79
+ subject { GFGEarleyParser.new(grammar_if_else_amb) }
80
+
81
+ it 'should parse a valid simple input' do
82
+ tokens = tokenizer(input)
83
+ parse_result = subject.parse(tokens)
84
+ expect(parse_result.success?).to eq(true)
85
+ expect(parse_result.ambiguous?).to eq(true)
86
+ ######################
87
+ # Expectation chart[0]:
88
+ expected = [
89
+ '.program | 0', # initialization
90
+ 'program => . stmt | 0', # start rule
91
+ '.stmt | 0', # call rule
92
+ 'stmt => . IF boolean THEN stmt | 0', # start rule
93
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 0', # start rule
94
+ 'stmt => . literal | 0', # start rule
95
+ '.literal | 0', # call rule
96
+ 'literal => . boolean | 0', # start rule
97
+ 'literal => . INTEGER | 0', # start rule
98
+ '.boolean | 0', # call rule
99
+ 'boolean => . FALSE | 0', # start rule
100
+ 'boolean => . TRUE | 0' # start rule
101
+ ]
102
+ compare_entry_texts(parse_result.chart[0], expected)
103
+ expected_terminals(parse_result.chart[0], %w[FALSE IF INTEGER TRUE])
104
+
105
+ ######################
106
+ # Expectation chart[1]:
107
+ expected = [
108
+ 'stmt => IF . boolean THEN stmt | 0', # start rule
109
+ 'stmt => IF . boolean THEN stmt ELSE stmt | 0', # start rule
110
+ '.boolean | 1',
111
+ 'boolean => . FALSE | 1', # start rule
112
+ 'boolean => . TRUE | 1' # start rule
113
+ ]
114
+ result1 = parse_result.chart[1]
115
+ expect(result1.entries.size).to eq(5)
116
+ compare_entry_texts(result1, expected)
117
+ expected_terminals(result1, %w[FALSE TRUE])
118
+
119
+ ######################
120
+ # Expectation chart[2]:
121
+ expected = [
122
+ 'boolean => FALSE . | 1',
123
+ 'boolean. | 1',
124
+ 'stmt => IF boolean . THEN stmt | 0',
125
+ 'stmt => IF boolean . THEN stmt ELSE stmt | 0'
126
+ ]
127
+ result2 = parse_result.chart[2]
128
+ expect(result2.entries.size).to eq(4)
129
+ compare_entry_texts(result2, expected)
130
+ expected_terminals(result2, %w[THEN])
131
+
132
+ ######################
133
+ # Expectation chart[3]:
134
+ expected = [
135
+ 'stmt => IF boolean THEN . stmt | 0',
136
+ 'stmt => IF boolean THEN . stmt ELSE stmt | 0',
137
+ '.stmt | 3',
138
+ 'stmt => . IF boolean THEN stmt | 3',
139
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 3',
140
+ 'stmt => . literal | 3',
141
+ '.literal | 3',
142
+ 'literal => . boolean | 3',
143
+ 'literal => . INTEGER | 3',
144
+ '.boolean | 3',
145
+ 'boolean => . FALSE | 3',
146
+ 'boolean => . TRUE | 3'
147
+ ]
148
+ result3 = parse_result.chart[3]
149
+ expect(result3.entries.size).to eq(12)
150
+ compare_entry_texts(result3, expected)
151
+ expected_terminals(result3, %w[FALSE IF INTEGER TRUE])
152
+
153
+
154
+ ######################
155
+ # Expectation chart[4]:
156
+ expected = [
157
+ 'stmt => IF . boolean THEN stmt | 3',
158
+ 'stmt => IF . boolean THEN stmt ELSE stmt | 3',
159
+ '.boolean | 4',
160
+ 'boolean => . FALSE | 4',
161
+ 'boolean => . TRUE | 4'
162
+ ]
163
+ result4 = parse_result.chart[4]
164
+ expect(result4.entries.size).to eq(5)
165
+ compare_entry_texts(result4, expected)
166
+ expected_terminals(result4, %w[FALSE TRUE])
167
+
168
+ ######################
169
+ # Expectation chart[5]:
170
+ expected = [
171
+ 'boolean => TRUE . | 4',
172
+ 'boolean. | 4',
173
+ 'stmt => IF boolean . THEN stmt | 3',
174
+ 'stmt => IF boolean . THEN stmt ELSE stmt | 3'
175
+ ]
176
+ result5 = parse_result.chart[5]
177
+ expect(result5.entries.size).to eq(4)
178
+ compare_entry_texts(result5, expected)
179
+ expected_terminals(result5, %w[THEN])
180
+
181
+ ######################
182
+ # Expectation chart[6]:
183
+ expected = [
184
+ 'stmt => IF boolean THEN . stmt | 3',
185
+ 'stmt => IF boolean THEN . stmt ELSE stmt | 3',
186
+ '.stmt | 6',
187
+ 'stmt => . IF boolean THEN stmt | 6',
188
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 6',
189
+ 'stmt => . literal | 6',
190
+ '.literal | 6',
191
+ 'literal => . boolean | 6',
192
+ 'literal => . INTEGER | 6',
193
+ '.boolean | 6',
194
+ 'boolean => . FALSE | 6',
195
+ 'boolean => . TRUE | 6'
196
+ ]
197
+ result6 = parse_result.chart[6]
198
+ expect(result6.entries.size).to eq(12)
199
+ compare_entry_texts(result6, expected)
200
+ expected_terminals(result6, %w[FALSE IF INTEGER TRUE])
201
+
202
+ ######################
203
+ # Expectation chart[7]:
204
+ expected = [
205
+ 'literal => INTEGER . | 6',
206
+ 'literal. | 6',
207
+ 'stmt => literal . | 6',
208
+ 'stmt. | 6',
209
+ 'stmt => IF boolean THEN stmt . | 3',
210
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 3',
211
+ 'stmt. | 3',
212
+ 'stmt => IF boolean THEN stmt . | 0',
213
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 0',
214
+ 'stmt. | 0',
215
+ 'program => stmt . | 0',
216
+ 'program. | 0'
217
+ ]
218
+ result7 = parse_result.chart[7]
219
+ expect(result7.entries.size).to eq(12)
220
+ compare_entry_texts(result7, expected)
221
+ expected_terminals(result7, %w[ELSE])
222
+
223
+ # Expectation chart[8]:
224
+ expected = [
225
+ 'stmt => IF boolean THEN stmt ELSE . stmt | 3',
226
+ 'stmt => IF boolean THEN stmt ELSE . stmt | 0',
227
+ '.stmt | 8',
228
+ 'stmt => . IF boolean THEN stmt | 8',
229
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 8',
230
+ 'stmt => . literal | 8',
231
+ '.literal | 8',
232
+ 'literal => . boolean | 8',
233
+ 'literal => . INTEGER | 8',
234
+ '.boolean | 8',
235
+ 'boolean => . FALSE | 8',
236
+ 'boolean => . TRUE | 8'
237
+ ]
238
+ result8 = parse_result.chart[8]
239
+ expect(result8.entries.size).to eq(12)
240
+ compare_entry_texts(result8, expected)
241
+ expected_terminals(result8, %w[FALSE IF INTEGER TRUE])
242
+
243
+ ######################
244
+ # Expectation chart[9]:
245
+ expected = [
246
+ 'literal => INTEGER . | 8',
247
+ 'literal. | 8',
248
+ 'stmt => literal . | 8',
249
+ 'stmt. | 8',
250
+ 'stmt => IF boolean THEN stmt ELSE stmt . | 3',
251
+ 'stmt => IF boolean THEN stmt ELSE stmt . | 0',
252
+ 'stmt. | 3',
253
+ 'stmt. | 0',
254
+ 'stmt => IF boolean THEN stmt . | 0',
255
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 0',
256
+ 'program => stmt . | 0',
257
+ 'program. | 0'
258
+ ]
259
+ result9 = parse_result.chart[9]
260
+ expect(result9.entries.size).to eq(12)
261
+ compare_entry_texts(result9, expected)
262
+ expected_terminals(result9, %w[ELSE])
263
+
264
+ ######################
265
+ # Expectation chart[10]:
266
+ result10 = parse_result.chart[10]
267
+ expect(result10).to be_nil
268
+
269
+ # The parse is ambiguous since there more than one dotted item
270
+ # that matches the stmt. | 0 exit node on chart[9]:
271
+ # stmt => IF boolean THEN stmt ELSE stmt . | 0'
272
+ # stmt => IF boolean THEN stmt . | 0'
273
+ #
274
+ # This is related to the "dangling else problem"
275
+ end
276
+ end # context
277
+
278
+ context 'Disambiguated parse: ' do
279
+ def match_else_with_if(grammar)
280
+ # Brittle code
281
+ prod = grammar.rules[2]
282
+ constraint = Syntax::MatchClosest.new(prod.rhs.members, 4, 'IF')
283
+ prod.constraints << constraint
284
+ end
285
+
286
+ # Factory method. Creates a grammar builder for a simple grammar.
287
+ def grammar_if_else
288
+ builder = Rley::Syntax::BaseGrammarBuilder.new do
289
+ add_terminals('IF', 'THEN', 'ELSE')
290
+ add_terminals('FALSE', 'TRUE', 'INTEGER')
291
+
292
+ rule 'program' => 'stmt'
293
+ rule 'stmt' => 'IF boolean THEN stmt'
294
+
295
+ # To prevent dangling else issue, the ELSE must match the closest preceding IF
296
+ # rule 'stmt' => 'IF boolean THEN stmt ELSE{closest IF} stmt'
297
+ rule 'stmt' => 'IF boolean THEN stmt ELSE stmt'
298
+ rule 'stmt' => 'literal'
299
+ rule 'literal' => 'boolean'
300
+ rule 'literal' => 'INTEGER'
301
+ rule 'boolean' => 'FALSE'
302
+ rule 'boolean' => 'TRUE'
303
+ end
304
+
305
+ grm = builder.grammar
306
+ match_else_with_if(grm)
307
+
308
+ grm
309
+ end
310
+
311
+ subject { GFGEarleyParser.new(grammar_if_else) }
312
+
313
+ it 'should cope with dangling else problem' do
314
+ tokens = tokenizer(input)
315
+ parse_result = subject.parse(tokens)
316
+ expect(parse_result.success?).to eq(true)
317
+ expect(parse_result.ambiguous?).to eq(true)
318
+ ######################
319
+ # Expectation chart[0]:
320
+ expected = [
321
+ '.program | 0', # initialization
322
+ 'program => . stmt | 0', # start rule
323
+ '.stmt | 0', # call rule
324
+ 'stmt => . IF boolean THEN stmt | 0', # start rule
325
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 0', # start rule
326
+ 'stmt => . literal | 0', # start rule
327
+ '.literal | 0', # call rule
328
+ 'literal => . boolean | 0', # start rule
329
+ 'literal => . INTEGER | 0', # start rule
330
+ '.boolean | 0', # call rule
331
+ 'boolean => . FALSE | 0', # start rule
332
+ 'boolean => . TRUE | 0' # start rule
333
+ ]
334
+ compare_entry_texts(parse_result.chart[0], expected)
335
+ expected_terminals(parse_result.chart[0], %w[FALSE IF INTEGER TRUE])
336
+
337
+ # The parser should work as the previous version...
338
+ # we skip chart[2] and chart[3]
339
+ ######################
340
+ # Expectation chart[4]:
341
+ expected = [
342
+ 'stmt => IF . boolean THEN stmt | 3',
343
+ 'stmt => IF . boolean THEN stmt ELSE stmt | 3',
344
+ '.boolean | 4',
345
+ 'boolean => . FALSE | 4',
346
+ 'boolean => . TRUE | 4'
347
+ ]
348
+ result4 = parse_result.chart[4]
349
+ expect(result4.entries.size).to eq(5)
350
+ compare_entry_texts(result4, expected)
351
+ expected_terminals(result4, %w[FALSE TRUE])
352
+
353
+ ######################
354
+ # Before reading ELSE
355
+ # Expectation chart[7]:
356
+ expected = [
357
+ 'literal => INTEGER . | 6',
358
+ 'literal. | 6',
359
+ 'stmt => literal . | 6',
360
+ 'stmt. | 6',
361
+ 'stmt => IF boolean THEN stmt . | 3',
362
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 3',
363
+ 'stmt. | 3',
364
+ 'stmt => IF boolean THEN stmt . | 0',
365
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 0',
366
+ 'stmt. | 0',
367
+ 'program => stmt . | 0',
368
+ 'program. | 0'
369
+ ]
370
+ result7 = parse_result.chart[7]
371
+ expect(result7.entries.size).to eq(12)
372
+ compare_entry_texts(result7, expected)
373
+ expected_terminals(result7, %w[ELSE])
374
+
375
+ ######################
376
+ # After reading ELSE
377
+ # Expectation chart[8]:
378
+ expected = [
379
+ 'stmt => IF boolean THEN stmt ELSE . stmt | 3',
380
+ # 'stmt => IF boolean THEN stmt ELSE . stmt | 0', # Excluded
381
+ '.stmt | 8',
382
+ 'stmt => . IF boolean THEN stmt | 8',
383
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 8',
384
+ 'stmt => . literal | 8',
385
+ '.literal | 8',
386
+ 'literal => . boolean | 8',
387
+ 'literal => . INTEGER | 8',
388
+ '.boolean | 8',
389
+ 'boolean => . FALSE | 8',
390
+ 'boolean => . TRUE | 8'
391
+ ]
392
+ result8 = parse_result.chart[8]
393
+ found = parse_result.chart.search_entries(4, {before: 'IF'})
394
+ expect(result8.entries.size).to eq(11)
395
+ compare_entry_texts(result8, expected)
396
+ expected_terminals(result8, %w[FALSE IF INTEGER TRUE])
397
+
398
+ # How does it work?
399
+ # ELSE was just read at position 7
400
+ # We look backwards to nearest IF; there is one at position 3
401
+ # In chart[8], we should exclude the dotted item:
402
+ # 'stmt => IF boolean THEN stmt ELSE . stmt | 0'
403
+ # Reasoning?
404
+ # On chart[4], we find two entries for the IF .:
405
+ # 'stmt => IF . boolean THEN stmt | 3',
406
+ # 'stmt => IF . boolean THEN stmt ELSE stmt | 3'
407
+ # Only these productions that still applies at 8 must be retained
408
+ # 'stmt => IF boolean THEN stmt ELSE . stmt | 3',
409
+ # 'stmt => IF boolean THEN stmt ELSE . stmt | 0', # To exclude
410
+ # Where to place the check?
411
+ # At the dotted item?
412
+ # call, return scan nodes
413
+ # So if one has an annotated production rule:
414
+ # stmt => IF boolean THEN stmt ELSE{ closest: IF } stmt
415
+ # then the dotted item:
416
+ # stmt => IF boolean THEN stmt ELSE . stmt
417
+ # should bear the constraint
418
+
419
+ ######################
420
+ # Expectation chart[9]:
421
+ expected = [
422
+ 'literal => INTEGER . | 8',
423
+ 'literal. | 8',
424
+ 'stmt => literal . | 8',
425
+ 'stmt. | 8',
426
+ 'stmt => IF boolean THEN stmt ELSE stmt . | 3',
427
+ # 'stmt => IF boolean THEN stmt ELSE stmt . | 0', # Excluded
428
+ 'stmt. | 3',
429
+ 'stmt => IF boolean THEN stmt . | 0',
430
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 0',
431
+ 'stmt. | 0',
432
+ 'program => stmt . | 0',
433
+ 'program. | 0'
434
+ ]
435
+ result9 = parse_result.chart[9]
436
+ expect(result9.entries.size).to eq(11)
437
+ compare_entry_texts(result9, expected)
438
+ expected_terminals(result9, ['ELSE'])
439
+ end
440
+ end # context
441
+ end # describe
442
+ end # module
443
+ end # module
444
+
445
+