rley 0.7.06 → 0.8.01

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +362 -62
  3. data/.travis.yml +6 -6
  4. data/CHANGELOG.md +20 -4
  5. data/LICENSE.txt +1 -1
  6. data/README.md +7 -7
  7. data/examples/NLP/engtagger.rb +193 -190
  8. data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
  9. data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
  10. data/examples/NLP/pico_en_demo.rb +2 -2
  11. data/examples/data_formats/JSON/cli_options.rb +1 -1
  12. data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
  13. data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
  14. data/examples/data_formats/JSON/json_demo.rb +1 -2
  15. data/examples/data_formats/JSON/json_grammar.rb +13 -13
  16. data/examples/data_formats/JSON/json_lexer.rb +8 -8
  17. data/examples/data_formats/JSON/json_minifier.rb +1 -1
  18. data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
  19. data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
  20. data/examples/general/calc_iter1/calc_grammar.rb +7 -6
  21. data/examples/general/calc_iter1/calc_lexer.rb +6 -4
  22. data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
  23. data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
  24. data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
  25. data/examples/general/calc_iter2/calc_grammar.rb +12 -12
  26. data/examples/general/calc_iter2/calc_lexer.rb +11 -10
  27. data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
  28. data/examples/general/left.rb +2 -2
  29. data/examples/general/right.rb +2 -2
  30. data/lib/rley.rb +1 -1
  31. data/lib/rley/base/dotted_item.rb +28 -31
  32. data/lib/rley/base/grm_items_builder.rb +6 -0
  33. data/lib/rley/constants.rb +2 -2
  34. data/lib/rley/engine.rb +22 -25
  35. data/lib/rley/formatter/asciitree.rb +3 -3
  36. data/lib/rley/formatter/bracket_notation.rb +1 -8
  37. data/lib/rley/formatter/debug.rb +6 -6
  38. data/lib/rley/formatter/json.rb +2 -2
  39. data/lib/rley/gfg/call_edge.rb +1 -1
  40. data/lib/rley/gfg/edge.rb +5 -5
  41. data/lib/rley/gfg/end_vertex.rb +2 -6
  42. data/lib/rley/gfg/epsilon_edge.rb +1 -5
  43. data/lib/rley/gfg/grm_flow_graph.rb +27 -23
  44. data/lib/rley/gfg/item_vertex.rb +10 -10
  45. data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
  46. data/lib/rley/gfg/scan_edge.rb +1 -1
  47. data/lib/rley/gfg/shortcut_edge.rb +2 -2
  48. data/lib/rley/gfg/start_vertex.rb +4 -8
  49. data/lib/rley/gfg/vertex.rb +43 -39
  50. data/lib/rley/interface.rb +16 -0
  51. data/lib/rley/lexical/token_range.rb +6 -6
  52. data/lib/rley/notation/all_notation_nodes.rb +2 -0
  53. data/lib/rley/notation/ast_builder.rb +191 -0
  54. data/lib/rley/notation/ast_node.rb +44 -0
  55. data/lib/rley/notation/ast_visitor.rb +113 -0
  56. data/lib/rley/notation/grammar.rb +49 -0
  57. data/lib/rley/notation/grammar_builder.rb +504 -0
  58. data/lib/rley/notation/grouping_node.rb +23 -0
  59. data/lib/rley/notation/parser.rb +56 -0
  60. data/lib/rley/notation/sequence_node.rb +35 -0
  61. data/lib/rley/notation/symbol_node.rb +29 -0
  62. data/lib/rley/notation/tokenizer.rb +192 -0
  63. data/lib/rley/parse_forest_visitor.rb +5 -5
  64. data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
  65. data/lib/rley/parse_rep/cst_builder.rb +5 -6
  66. data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
  67. data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
  68. data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
  69. data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
  70. data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
  71. data/lib/rley/parse_tree_visitor.rb +1 -1
  72. data/lib/rley/parser/error_reason.rb +4 -5
  73. data/lib/rley/parser/gfg_chart.rb +118 -26
  74. data/lib/rley/parser/gfg_parsing.rb +22 -33
  75. data/lib/rley/parser/parse_entry.rb +25 -31
  76. data/lib/rley/parser/parse_entry_set.rb +19 -16
  77. data/lib/rley/parser/parse_entry_tracker.rb +4 -4
  78. data/lib/rley/parser/parse_tracer.rb +13 -13
  79. data/lib/rley/parser/parse_walker_factory.rb +23 -28
  80. data/lib/rley/ptree/non_terminal_node.rb +7 -5
  81. data/lib/rley/ptree/parse_tree.rb +3 -3
  82. data/lib/rley/ptree/parse_tree_node.rb +5 -5
  83. data/lib/rley/ptree/terminal_node.rb +7 -7
  84. data/lib/rley/rley_error.rb +12 -12
  85. data/lib/rley/sppf/alternative_node.rb +6 -6
  86. data/lib/rley/sppf/composite_node.rb +7 -7
  87. data/lib/rley/sppf/epsilon_node.rb +3 -3
  88. data/lib/rley/sppf/leaf_node.rb +3 -3
  89. data/lib/rley/sppf/parse_forest.rb +16 -16
  90. data/lib/rley/sppf/sppf_node.rb +7 -8
  91. data/lib/rley/sppf/token_node.rb +3 -3
  92. data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
  93. data/lib/rley/syntax/grammar.rb +5 -5
  94. data/lib/rley/syntax/grm_symbol.rb +7 -7
  95. data/lib/rley/syntax/match_closest.rb +43 -0
  96. data/lib/rley/syntax/non_terminal.rb +9 -15
  97. data/lib/rley/syntax/production.rb +16 -10
  98. data/lib/rley/syntax/symbol_seq.rb +7 -9
  99. data/lib/rley/syntax/terminal.rb +4 -5
  100. data/lib/rley/syntax/verbatim_symbol.rb +3 -3
  101. data/lib/support/base_tokenizer.rb +19 -18
  102. data/spec/rley/base/dotted_item_spec.rb +2 -2
  103. data/spec/rley/engine_spec.rb +23 -21
  104. data/spec/rley/formatter/asciitree_spec.rb +7 -7
  105. data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
  106. data/spec/rley/formatter/json_spec.rb +1 -1
  107. data/spec/rley/gfg/end_vertex_spec.rb +5 -5
  108. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
  109. data/spec/rley/gfg/item_vertex_spec.rb +10 -10
  110. data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
  111. data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
  112. data/spec/rley/gfg/start_vertex_spec.rb +5 -5
  113. data/spec/rley/gfg/vertex_spec.rb +3 -3
  114. data/spec/rley/lexical/token_range_spec.rb +16 -16
  115. data/spec/rley/lexical/token_spec.rb +2 -2
  116. data/spec/rley/notation/grammar_builder_spec.rb +302 -0
  117. data/spec/rley/notation/parser_spec.rb +184 -0
  118. data/spec/rley/notation/tokenizer_spec.rb +370 -0
  119. data/spec/rley/parse_forest_visitor_spec.rb +165 -163
  120. data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
  121. data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
  122. data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
  123. data/spec/rley/parse_rep/groucho_spec.rb +24 -26
  124. data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
  125. data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
  126. data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
  127. data/spec/rley/parse_tree_visitor_spec.rb +10 -8
  128. data/spec/rley/parser/dangling_else_spec.rb +445 -0
  129. data/spec/rley/parser/error_reason_spec.rb +6 -6
  130. data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
  131. data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
  132. data/spec/rley/parser/parse_entry_spec.rb +19 -19
  133. data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
  134. data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
  135. data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
  136. data/spec/rley/ptree/terminal_node_spec.rb +6 -6
  137. data/spec/rley/sppf/alternative_node_spec.rb +6 -6
  138. data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
  139. data/spec/rley/sppf/token_node_spec.rb +4 -4
  140. data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
  141. data/spec/rley/support/grammar_abc_helper.rb +3 -5
  142. data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
  143. data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
  144. data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
  145. data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
  146. data/spec/rley/support/grammar_l0_helper.rb +14 -17
  147. data/spec/rley/support/grammar_pb_helper.rb +8 -7
  148. data/spec/rley/support/grammar_sppf_helper.rb +3 -3
  149. data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
  150. data/spec/rley/syntax/grammar_spec.rb +6 -6
  151. data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
  152. data/spec/rley/syntax/match_closest_spec.rb +46 -0
  153. data/spec/rley/syntax/non_terminal_spec.rb +8 -8
  154. data/spec/rley/syntax/production_spec.rb +17 -13
  155. data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
  156. data/spec/rley/syntax/terminal_spec.rb +5 -5
  157. data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
  158. data/spec/spec_helper.rb +0 -12
  159. data/spec/support/base_tokenizer_spec.rb +7 -2
  160. metadata +48 -74
  161. data/.simplecov +0 -7
  162. data/lib/rley/parser/parse_state.rb +0 -83
  163. data/lib/rley/parser/parse_state_tracker.rb +0 -59
  164. data/lib/rley/parser/state_set.rb +0 -101
  165. data/spec/rley/parser/parse_state_spec.rb +0 -125
  166. data/spec/rley/parser/parse_tracer_spec.rb +0 -200
  167. data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -4,7 +4,7 @@ require_relative '../../spec_helper'
4
4
 
5
5
  require_relative '../../../lib/rley/parser/gfg_earley_parser'
6
6
 
7
- require_relative '../../../lib/rley/syntax/grammar_builder'
7
+ require_relative '../../../lib/rley/syntax/base_grammar_builder'
8
8
  require_relative '../support/grammar_helper'
9
9
  require_relative '../support/expectation_helper'
10
10
 
@@ -22,15 +22,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
22
22
  # "SPPF-Style Parsing From Earley Recognizers" in
23
23
  # Notes in Theoretical Computer Science 203, (2008), pp. 53-67
24
24
  # contains a hidden left recursion and a cycle
25
- builder = Syntax::GrammarBuilder.new do
25
+ builder = Syntax::BaseGrammarBuilder.new do
26
26
  add_terminals('a', 'b')
27
27
  rule 'Phi' => 'S'
28
- rule 'S' => %w[A T]
29
- rule 'S' => %w[a T]
28
+ rule 'S' => 'A T'
29
+ rule 'S' => 'a T'
30
30
  rule 'A' => 'a'
31
- rule 'A' => %w[B A]
31
+ rule 'A' => 'B A'
32
32
  rule 'B' => []
33
- rule 'T' => %w[b b b]
33
+ rule 'T' => 'b b b'
34
34
  end
35
35
  builder.grammar
36
36
  end
@@ -49,11 +49,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
49
49
  end
50
50
 
51
51
  # Emit a text representation of the current path.
52
- def path_to_s()
52
+ def path_to_s
53
53
  text_parts = subject.curr_path.map do |path_element|
54
54
  path_element.to_string(0)
55
55
  end
56
- return text_parts.join('/')
56
+ text_parts.join('/')
57
57
  end
58
58
 
59
59
  context 'Initialization:' do
@@ -3,7 +3,7 @@
3
3
  require_relative '../../spec_helper'
4
4
 
5
5
  require_relative '../../../lib/rley/parser/gfg_earley_parser'
6
- require_relative '../../../lib/rley/syntax/grammar_builder'
6
+ require_relative '../../../lib/rley/syntax/base_grammar_builder'
7
7
  require_relative '../support/grammar_helper'
8
8
  require_relative '../support/grammar_abc_helper'
9
9
  require_relative '../support/expectation_helper'
@@ -38,11 +38,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
38
38
  end
39
39
 
40
40
  # Emit a text representation of the current path.
41
- def path_to_s()
41
+ def path_to_s
42
42
  text_parts = subject.curr_path.map do |path_element|
43
43
  path_element.to_string(0)
44
44
  end
45
- return text_parts.join('/')
45
+ text_parts.join('/')
46
46
  end
47
47
 
48
48
 
@@ -11,7 +11,7 @@ require_relative './support/grammar_sppf_helper'
11
11
  require_relative '../../lib/rley/parse_tree_visitor'
12
12
 
13
13
  module Rley # Open this namespace to avoid module qualifier prefixes
14
- describe ParseTreeVisitor do
14
+ describe ParseTreeVisitor do
15
15
  let(:grammar_abc) do
16
16
  sandbox = Object.new
17
17
  sandbox.extend(GrammarABCHelper)
@@ -45,7 +45,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
45
45
  # Capital letters represent non-terminal nodes
46
46
  let(:grm_abc_ptree1) do
47
47
  engine = Rley::Engine.new
48
- engine.use_grammar(grammar_abc)
48
+ engine.use_grammar(grammar_abc)
49
49
  parse_result = engine.parse(grm_abc_tokens1)
50
50
  ptree = engine.convert(parse_result)
51
51
  ptree
@@ -161,6 +161,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
161
161
  subject.end_visit_ptree(grm_abc_ptree1)
162
162
  end
163
163
 
164
+ # rubocop: disable Naming/VariableNumber
164
165
  it 'should begin the visit when requested' do
165
166
  # Reminder: parse tree structure is
166
167
  # S[0,5]
@@ -209,11 +210,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
209
210
  expectations.each do |(msg, args)|
210
211
  expect(listener1).to receive(msg).with(*args).ordered
211
212
  end
212
-
213
+
213
214
  # Here we go...
214
215
  subject.start
215
216
  end
216
-
217
+
217
218
  it 'should also visit in pre-order' do
218
219
  # Reminder: parse tree structure is
219
220
  # S[0,5]
@@ -228,7 +229,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
228
229
  root = grm_abc_ptree1.root
229
230
  # Here we defeat encapsulation for the good cause
230
231
  subject.instance_variable_set(:@traversal, :pre_order)
231
-
232
+
232
233
  children = root.subnodes
233
234
  big_a_1 = children[0]
234
235
  big_a_1_children = big_a_1.subnodes
@@ -239,7 +240,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
239
240
  expectations = [
240
241
  [:before_ptree, [grm_abc_ptree1]]
241
242
  # TODO: fix this test
242
- # [:before_subnodes, [root, children]],
243
+ # [:before_subnodes, [root, children]],
243
244
  # [:before_non_terminal, [root]],
244
245
 
245
246
  # [:before_non_terminal, [big_a_1]],
@@ -267,10 +268,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
267
268
  expectations.each do |(msg, args)|
268
269
  expect(listener1).to receive(msg).with(*args).ordered
269
270
  end
270
-
271
+
271
272
  # Here we go...
272
273
  subject.start
273
- end
274
+ end
275
+ # rubocop: enable Naming/VariableNumber
274
276
  end # context
275
277
  end # describe
276
278
  end # module
@@ -0,0 +1,445 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require 'stringio'
5
+ require_relative '../../../lib/rley/syntax/match_closest'
6
+ require_relative '../../../lib/rley/syntax/non_terminal'
7
+ require_relative '../../../lib/rley/syntax/production'
8
+ require_relative '../../../lib/rley/syntax/base_grammar_builder'
9
+ require_relative '../../../lib/rley/lexical/token'
10
+ require_relative '../../../lib/rley/base/dotted_item'
11
+ require_relative '../../../lib/rley/parser/gfg_parsing'
12
+
13
+ require_relative '../support/expectation_helper'
14
+
15
+ # Load the class under test
16
+ require_relative '../../../lib/rley/parser/gfg_earley_parser'
17
+
18
+ module Rley # Open this namespace to avoid module qualifier prefixes
19
+ module Parser # Open this namespace to avoid module qualifier prefixes
20
+ describe GFGEarleyParser do
21
+ include ExpectationHelper # Mix-in with expectation on parse entry sets
22
+
23
+ Keyword = {
24
+ 'else' => 'ELSE',
25
+ 'false' => 'FALSE',
26
+ 'if' => 'IF',
27
+ 'then' => 'THEN',
28
+ 'true' => 'TRUE'
29
+ }.freeze
30
+
31
+ def tokenizer(aTextToParse)
32
+ scanner = StringScanner.new(aTextToParse)
33
+ tokens = []
34
+
35
+ loop do
36
+ scanner.skip(/\s+/)
37
+ break if scanner.eos?
38
+ curr_pos = scanner.pos
39
+ lexeme = scanner.scan(/\S+/)
40
+
41
+ term_name = Keyword[lexeme]
42
+ unless term_name
43
+ if lexeme =~ /\d+/
44
+ term_name = 'INTEGER'
45
+ else
46
+ err_msg = "Unknown token '#{lexeme}'"
47
+ raise StandardError, err_msg
48
+ end
49
+ end
50
+ pos = Rley::Lexical::Position.new(1, curr_pos + 1)
51
+ tokens << Rley::Lexical::Token.new(lexeme, term_name, pos)
52
+ end
53
+
54
+ tokens
55
+ end
56
+
57
+ let(:input) { 'if false then if true then 1 else 2' }
58
+
59
+ context 'Ambiguous parse: ' do
60
+ # Factory method. Creates a grammar builder for a simple grammar.
61
+ def grammar_if_else_amb
62
+ builder = Rley::Syntax::BaseGrammarBuilder.new do
63
+ add_terminals('IF', 'THEN', 'ELSE')
64
+ add_terminals('FALSE', 'TRUE', 'INTEGER')
65
+
66
+ rule 'program' => 'stmt'
67
+ rule 'stmt' => 'IF boolean THEN stmt'
68
+ rule 'stmt' => 'IF boolean THEN stmt ELSE stmt'
69
+ rule 'stmt' => 'literal'
70
+ rule 'literal' => 'boolean'
71
+ rule 'literal' => 'INTEGER'
72
+ rule 'boolean' => 'FALSE'
73
+ rule 'boolean' => 'TRUE'
74
+ end
75
+
76
+ builder.grammar
77
+ end
78
+
79
+ subject { GFGEarleyParser.new(grammar_if_else_amb) }
80
+
81
+ it 'should parse a valid simple input' do
82
+ tokens = tokenizer(input)
83
+ parse_result = subject.parse(tokens)
84
+ expect(parse_result.success?).to eq(true)
85
+ expect(parse_result.ambiguous?).to eq(true)
86
+ ######################
87
+ # Expectation chart[0]:
88
+ expected = [
89
+ '.program | 0', # initialization
90
+ 'program => . stmt | 0', # start rule
91
+ '.stmt | 0', # call rule
92
+ 'stmt => . IF boolean THEN stmt | 0', # start rule
93
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 0', # start rule
94
+ 'stmt => . literal | 0', # start rule
95
+ '.literal | 0', # call rule
96
+ 'literal => . boolean | 0', # start rule
97
+ 'literal => . INTEGER | 0', # start rule
98
+ '.boolean | 0', # call rule
99
+ 'boolean => . FALSE | 0', # start rule
100
+ 'boolean => . TRUE | 0' # start rule
101
+ ]
102
+ compare_entry_texts(parse_result.chart[0], expected)
103
+ expected_terminals(parse_result.chart[0], %w[FALSE IF INTEGER TRUE])
104
+
105
+ ######################
106
+ # Expectation chart[1]:
107
+ expected = [
108
+ 'stmt => IF . boolean THEN stmt | 0', # start rule
109
+ 'stmt => IF . boolean THEN stmt ELSE stmt | 0', # start rule
110
+ '.boolean | 1',
111
+ 'boolean => . FALSE | 1', # start rule
112
+ 'boolean => . TRUE | 1' # start rule
113
+ ]
114
+ result1 = parse_result.chart[1]
115
+ expect(result1.entries.size).to eq(5)
116
+ compare_entry_texts(result1, expected)
117
+ expected_terminals(result1, %w[FALSE TRUE])
118
+
119
+ ######################
120
+ # Expectation chart[2]:
121
+ expected = [
122
+ 'boolean => FALSE . | 1',
123
+ 'boolean. | 1',
124
+ 'stmt => IF boolean . THEN stmt | 0',
125
+ 'stmt => IF boolean . THEN stmt ELSE stmt | 0'
126
+ ]
127
+ result2 = parse_result.chart[2]
128
+ expect(result2.entries.size).to eq(4)
129
+ compare_entry_texts(result2, expected)
130
+ expected_terminals(result2, %w[THEN])
131
+
132
+ ######################
133
+ # Expectation chart[3]:
134
+ expected = [
135
+ 'stmt => IF boolean THEN . stmt | 0',
136
+ 'stmt => IF boolean THEN . stmt ELSE stmt | 0',
137
+ '.stmt | 3',
138
+ 'stmt => . IF boolean THEN stmt | 3',
139
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 3',
140
+ 'stmt => . literal | 3',
141
+ '.literal | 3',
142
+ 'literal => . boolean | 3',
143
+ 'literal => . INTEGER | 3',
144
+ '.boolean | 3',
145
+ 'boolean => . FALSE | 3',
146
+ 'boolean => . TRUE | 3'
147
+ ]
148
+ result3 = parse_result.chart[3]
149
+ expect(result3.entries.size).to eq(12)
150
+ compare_entry_texts(result3, expected)
151
+ expected_terminals(result3, %w[FALSE IF INTEGER TRUE])
152
+
153
+
154
+ ######################
155
+ # Expectation chart[4]:
156
+ expected = [
157
+ 'stmt => IF . boolean THEN stmt | 3',
158
+ 'stmt => IF . boolean THEN stmt ELSE stmt | 3',
159
+ '.boolean | 4',
160
+ 'boolean => . FALSE | 4',
161
+ 'boolean => . TRUE | 4'
162
+ ]
163
+ result4 = parse_result.chart[4]
164
+ expect(result4.entries.size).to eq(5)
165
+ compare_entry_texts(result4, expected)
166
+ expected_terminals(result4, %w[FALSE TRUE])
167
+
168
+ ######################
169
+ # Expectation chart[5]:
170
+ expected = [
171
+ 'boolean => TRUE . | 4',
172
+ 'boolean. | 4',
173
+ 'stmt => IF boolean . THEN stmt | 3',
174
+ 'stmt => IF boolean . THEN stmt ELSE stmt | 3'
175
+ ]
176
+ result5 = parse_result.chart[5]
177
+ expect(result5.entries.size).to eq(4)
178
+ compare_entry_texts(result5, expected)
179
+ expected_terminals(result5, %w[THEN])
180
+
181
+ ######################
182
+ # Expectation chart[6]:
183
+ expected = [
184
+ 'stmt => IF boolean THEN . stmt | 3',
185
+ 'stmt => IF boolean THEN . stmt ELSE stmt | 3',
186
+ '.stmt | 6',
187
+ 'stmt => . IF boolean THEN stmt | 6',
188
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 6',
189
+ 'stmt => . literal | 6',
190
+ '.literal | 6',
191
+ 'literal => . boolean | 6',
192
+ 'literal => . INTEGER | 6',
193
+ '.boolean | 6',
194
+ 'boolean => . FALSE | 6',
195
+ 'boolean => . TRUE | 6'
196
+ ]
197
+ result6 = parse_result.chart[6]
198
+ expect(result6.entries.size).to eq(12)
199
+ compare_entry_texts(result6, expected)
200
+ expected_terminals(result6, %w[FALSE IF INTEGER TRUE])
201
+
202
+ ######################
203
+ # Expectation chart[7]:
204
+ expected = [
205
+ 'literal => INTEGER . | 6',
206
+ 'literal. | 6',
207
+ 'stmt => literal . | 6',
208
+ 'stmt. | 6',
209
+ 'stmt => IF boolean THEN stmt . | 3',
210
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 3',
211
+ 'stmt. | 3',
212
+ 'stmt => IF boolean THEN stmt . | 0',
213
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 0',
214
+ 'stmt. | 0',
215
+ 'program => stmt . | 0',
216
+ 'program. | 0'
217
+ ]
218
+ result7 = parse_result.chart[7]
219
+ expect(result7.entries.size).to eq(12)
220
+ compare_entry_texts(result7, expected)
221
+ expected_terminals(result7, %w[ELSE])
222
+
223
+ # Expectation chart[8]:
224
+ expected = [
225
+ 'stmt => IF boolean THEN stmt ELSE . stmt | 3',
226
+ 'stmt => IF boolean THEN stmt ELSE . stmt | 0',
227
+ '.stmt | 8',
228
+ 'stmt => . IF boolean THEN stmt | 8',
229
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 8',
230
+ 'stmt => . literal | 8',
231
+ '.literal | 8',
232
+ 'literal => . boolean | 8',
233
+ 'literal => . INTEGER | 8',
234
+ '.boolean | 8',
235
+ 'boolean => . FALSE | 8',
236
+ 'boolean => . TRUE | 8'
237
+ ]
238
+ result8 = parse_result.chart[8]
239
+ expect(result8.entries.size).to eq(12)
240
+ compare_entry_texts(result8, expected)
241
+ expected_terminals(result8, %w[FALSE IF INTEGER TRUE])
242
+
243
+ ######################
244
+ # Expectation chart[9]:
245
+ expected = [
246
+ 'literal => INTEGER . | 8',
247
+ 'literal. | 8',
248
+ 'stmt => literal . | 8',
249
+ 'stmt. | 8',
250
+ 'stmt => IF boolean THEN stmt ELSE stmt . | 3',
251
+ 'stmt => IF boolean THEN stmt ELSE stmt . | 0',
252
+ 'stmt. | 3',
253
+ 'stmt. | 0',
254
+ 'stmt => IF boolean THEN stmt . | 0',
255
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 0',
256
+ 'program => stmt . | 0',
257
+ 'program. | 0'
258
+ ]
259
+ result9 = parse_result.chart[9]
260
+ expect(result9.entries.size).to eq(12)
261
+ compare_entry_texts(result9, expected)
262
+ expected_terminals(result9, %w[ELSE])
263
+
264
+ ######################
265
+ # Expectation chart[10]:
266
+ result10 = parse_result.chart[10]
267
+ expect(result10).to be_nil
268
+
269
+ # The parse is ambiguous since there more than one dotted item
270
+ # that matches the stmt. | 0 exit node on chart[9]:
271
+ # stmt => IF boolean THEN stmt ELSE stmt . | 0'
272
+ # stmt => IF boolean THEN stmt . | 0'
273
+ #
274
+ # This is related to the "dangling else problem"
275
+ end
276
+ end # context
277
+
278
+ context 'Disambiguated parse: ' do
279
+ def match_else_with_if(grammar)
280
+ # Brittle code
281
+ prod = grammar.rules[2]
282
+ constraint = Syntax::MatchClosest.new(prod.rhs.members, 4, 'IF')
283
+ prod.constraints << constraint
284
+ end
285
+
286
+ # Factory method. Creates a grammar builder for a simple grammar.
287
+ def grammar_if_else
288
+ builder = Rley::Syntax::BaseGrammarBuilder.new do
289
+ add_terminals('IF', 'THEN', 'ELSE')
290
+ add_terminals('FALSE', 'TRUE', 'INTEGER')
291
+
292
+ rule 'program' => 'stmt'
293
+ rule 'stmt' => 'IF boolean THEN stmt'
294
+
295
+ # To prevent dangling else issue, the ELSE must match the closest preceding IF
296
+ # rule 'stmt' => 'IF boolean THEN stmt ELSE{closest IF} stmt'
297
+ rule 'stmt' => 'IF boolean THEN stmt ELSE stmt'
298
+ rule 'stmt' => 'literal'
299
+ rule 'literal' => 'boolean'
300
+ rule 'literal' => 'INTEGER'
301
+ rule 'boolean' => 'FALSE'
302
+ rule 'boolean' => 'TRUE'
303
+ end
304
+
305
+ grm = builder.grammar
306
+ match_else_with_if(grm)
307
+
308
+ grm
309
+ end
310
+
311
+ subject { GFGEarleyParser.new(grammar_if_else) }
312
+
313
+ it 'should cope with dangling else problem' do
314
+ tokens = tokenizer(input)
315
+ parse_result = subject.parse(tokens)
316
+ expect(parse_result.success?).to eq(true)
317
+ expect(parse_result.ambiguous?).to eq(true)
318
+ ######################
319
+ # Expectation chart[0]:
320
+ expected = [
321
+ '.program | 0', # initialization
322
+ 'program => . stmt | 0', # start rule
323
+ '.stmt | 0', # call rule
324
+ 'stmt => . IF boolean THEN stmt | 0', # start rule
325
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 0', # start rule
326
+ 'stmt => . literal | 0', # start rule
327
+ '.literal | 0', # call rule
328
+ 'literal => . boolean | 0', # start rule
329
+ 'literal => . INTEGER | 0', # start rule
330
+ '.boolean | 0', # call rule
331
+ 'boolean => . FALSE | 0', # start rule
332
+ 'boolean => . TRUE | 0' # start rule
333
+ ]
334
+ compare_entry_texts(parse_result.chart[0], expected)
335
+ expected_terminals(parse_result.chart[0], %w[FALSE IF INTEGER TRUE])
336
+
337
+ # The parser should work as the previous version...
338
+ # we skip chart[2] and chart[3]
339
+ ######################
340
+ # Expectation chart[4]:
341
+ expected = [
342
+ 'stmt => IF . boolean THEN stmt | 3',
343
+ 'stmt => IF . boolean THEN stmt ELSE stmt | 3',
344
+ '.boolean | 4',
345
+ 'boolean => . FALSE | 4',
346
+ 'boolean => . TRUE | 4'
347
+ ]
348
+ result4 = parse_result.chart[4]
349
+ expect(result4.entries.size).to eq(5)
350
+ compare_entry_texts(result4, expected)
351
+ expected_terminals(result4, %w[FALSE TRUE])
352
+
353
+ ######################
354
+ # Before reading ELSE
355
+ # Expectation chart[7]:
356
+ expected = [
357
+ 'literal => INTEGER . | 6',
358
+ 'literal. | 6',
359
+ 'stmt => literal . | 6',
360
+ 'stmt. | 6',
361
+ 'stmt => IF boolean THEN stmt . | 3',
362
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 3',
363
+ 'stmt. | 3',
364
+ 'stmt => IF boolean THEN stmt . | 0',
365
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 0',
366
+ 'stmt. | 0',
367
+ 'program => stmt . | 0',
368
+ 'program. | 0'
369
+ ]
370
+ result7 = parse_result.chart[7]
371
+ expect(result7.entries.size).to eq(12)
372
+ compare_entry_texts(result7, expected)
373
+ expected_terminals(result7, %w[ELSE])
374
+
375
+ ######################
376
+ # After reading ELSE
377
+ # Expectation chart[8]:
378
+ expected = [
379
+ 'stmt => IF boolean THEN stmt ELSE . stmt | 3',
380
+ # 'stmt => IF boolean THEN stmt ELSE . stmt | 0', # Excluded
381
+ '.stmt | 8',
382
+ 'stmt => . IF boolean THEN stmt | 8',
383
+ 'stmt => . IF boolean THEN stmt ELSE stmt | 8',
384
+ 'stmt => . literal | 8',
385
+ '.literal | 8',
386
+ 'literal => . boolean | 8',
387
+ 'literal => . INTEGER | 8',
388
+ '.boolean | 8',
389
+ 'boolean => . FALSE | 8',
390
+ 'boolean => . TRUE | 8'
391
+ ]
392
+ result8 = parse_result.chart[8]
393
+ found = parse_result.chart.search_entries(4, {before: 'IF'})
394
+ expect(result8.entries.size).to eq(11)
395
+ compare_entry_texts(result8, expected)
396
+ expected_terminals(result8, %w[FALSE IF INTEGER TRUE])
397
+
398
+ # How does it work?
399
+ # ELSE was just read at position 7
400
+ # We look backwards to nearest IF; there is one at position 3
401
+ # In chart[8], we should exclude the dotted item:
402
+ # 'stmt => IF boolean THEN stmt ELSE . stmt | 0'
403
+ # Reasoning?
404
+ # On chart[4], we find two entries for the IF .:
405
+ # 'stmt => IF . boolean THEN stmt | 3',
406
+ # 'stmt => IF . boolean THEN stmt ELSE stmt | 3'
407
+ # Only these productions that still applies at 8 must be retained
408
+ # 'stmt => IF boolean THEN stmt ELSE . stmt | 3',
409
+ # 'stmt => IF boolean THEN stmt ELSE . stmt | 0', # To exclude
410
+ # Where to place the check?
411
+ # At the dotted item?
412
+ # call, return scan nodes
413
+ # So if one has an annotated production rule:
414
+ # stmt => IF boolean THEN stmt ELSE{ closest: IF } stmt
415
+ # then the dotted item:
416
+ # stmt => IF boolean THEN stmt ELSE . stmt
417
+ # should bear the constraint
418
+
419
+ ######################
420
+ # Expectation chart[9]:
421
+ expected = [
422
+ 'literal => INTEGER . | 8',
423
+ 'literal. | 8',
424
+ 'stmt => literal . | 8',
425
+ 'stmt. | 8',
426
+ 'stmt => IF boolean THEN stmt ELSE stmt . | 3',
427
+ # 'stmt => IF boolean THEN stmt ELSE stmt . | 0', # Excluded
428
+ 'stmt. | 3',
429
+ 'stmt => IF boolean THEN stmt . | 0',
430
+ 'stmt => IF boolean THEN stmt . ELSE stmt | 0',
431
+ 'stmt. | 0',
432
+ 'program => stmt . | 0',
433
+ 'program. | 0'
434
+ ]
435
+ result9 = parse_result.chart[9]
436
+ expect(result9.entries.size).to eq(11)
437
+ compare_entry_texts(result9, expected)
438
+ expected_terminals(result9, ['ELSE'])
439
+ end
440
+ end # context
441
+ end # describe
442
+ end # module
443
+ end # module
444
+
445
+