rley 0.7.06 → 0.8.01
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +362 -62
- data/.travis.yml +6 -6
- data/CHANGELOG.md +20 -4
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/examples/NLP/engtagger.rb +193 -190
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/examples/data_formats/JSON/cli_options.rb +1 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
- data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +13 -13
- data/examples/data_formats/JSON/json_lexer.rb +8 -8
- data/examples/data_formats/JSON/json_minifier.rb +1 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
- data/examples/general/calc_iter1/calc_grammar.rb +7 -6
- data/examples/general/calc_iter1/calc_lexer.rb +6 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
- data/examples/general/calc_iter2/calc_grammar.rb +12 -12
- data/examples/general/calc_iter2/calc_lexer.rb +11 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
- data/examples/general/left.rb +2 -2
- data/examples/general/right.rb +2 -2
- data/lib/rley.rb +1 -1
- data/lib/rley/base/dotted_item.rb +28 -31
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +2 -2
- data/lib/rley/engine.rb +22 -25
- data/lib/rley/formatter/asciitree.rb +3 -3
- data/lib/rley/formatter/bracket_notation.rb +1 -8
- data/lib/rley/formatter/debug.rb +6 -6
- data/lib/rley/formatter/json.rb +2 -2
- data/lib/rley/gfg/call_edge.rb +1 -1
- data/lib/rley/gfg/edge.rb +5 -5
- data/lib/rley/gfg/end_vertex.rb +2 -6
- data/lib/rley/gfg/epsilon_edge.rb +1 -5
- data/lib/rley/gfg/grm_flow_graph.rb +27 -23
- data/lib/rley/gfg/item_vertex.rb +10 -10
- data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
- data/lib/rley/gfg/scan_edge.rb +1 -1
- data/lib/rley/gfg/shortcut_edge.rb +2 -2
- data/lib/rley/gfg/start_vertex.rb +4 -8
- data/lib/rley/gfg/vertex.rb +43 -39
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/lexical/token_range.rb +6 -6
- data/lib/rley/notation/all_notation_nodes.rb +2 -0
- data/lib/rley/notation/ast_builder.rb +191 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +113 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +504 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +192 -0
- data/lib/rley/parse_forest_visitor.rb +5 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
- data/lib/rley/parse_rep/cst_builder.rb +5 -6
- data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
- data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
- data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
- data/lib/rley/parse_tree_visitor.rb +1 -1
- data/lib/rley/parser/error_reason.rb +4 -5
- data/lib/rley/parser/gfg_chart.rb +118 -26
- data/lib/rley/parser/gfg_parsing.rb +22 -33
- data/lib/rley/parser/parse_entry.rb +25 -31
- data/lib/rley/parser/parse_entry_set.rb +19 -16
- data/lib/rley/parser/parse_entry_tracker.rb +4 -4
- data/lib/rley/parser/parse_tracer.rb +13 -13
- data/lib/rley/parser/parse_walker_factory.rb +23 -28
- data/lib/rley/ptree/non_terminal_node.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +3 -3
- data/lib/rley/ptree/parse_tree_node.rb +5 -5
- data/lib/rley/ptree/terminal_node.rb +7 -7
- data/lib/rley/rley_error.rb +12 -12
- data/lib/rley/sppf/alternative_node.rb +6 -6
- data/lib/rley/sppf/composite_node.rb +7 -7
- data/lib/rley/sppf/epsilon_node.rb +3 -3
- data/lib/rley/sppf/leaf_node.rb +3 -3
- data/lib/rley/sppf/parse_forest.rb +16 -16
- data/lib/rley/sppf/sppf_node.rb +7 -8
- data/lib/rley/sppf/token_node.rb +3 -3
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
- data/lib/rley/syntax/grammar.rb +5 -5
- data/lib/rley/syntax/grm_symbol.rb +7 -7
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/non_terminal.rb +9 -15
- data/lib/rley/syntax/production.rb +16 -10
- data/lib/rley/syntax/symbol_seq.rb +7 -9
- data/lib/rley/syntax/terminal.rb +4 -5
- data/lib/rley/syntax/verbatim_symbol.rb +3 -3
- data/lib/support/base_tokenizer.rb +19 -18
- data/spec/rley/base/dotted_item_spec.rb +2 -2
- data/spec/rley/engine_spec.rb +23 -21
- data/spec/rley/formatter/asciitree_spec.rb +7 -7
- data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
- data/spec/rley/formatter/json_spec.rb +1 -1
- data/spec/rley/gfg/end_vertex_spec.rb +5 -5
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/gfg/item_vertex_spec.rb +10 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
- data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
- data/spec/rley/gfg/start_vertex_spec.rb +5 -5
- data/spec/rley/gfg/vertex_spec.rb +3 -3
- data/spec/rley/lexical/token_range_spec.rb +16 -16
- data/spec/rley/lexical/token_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +302 -0
- data/spec/rley/notation/parser_spec.rb +184 -0
- data/spec/rley/notation/tokenizer_spec.rb +370 -0
- data/spec/rley/parse_forest_visitor_spec.rb +165 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
- data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
- data/spec/rley/parse_rep/groucho_spec.rb +24 -26
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
- data/spec/rley/parse_tree_visitor_spec.rb +10 -8
- data/spec/rley/parser/dangling_else_spec.rb +445 -0
- data/spec/rley/parser/error_reason_spec.rb +6 -6
- data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
- data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
- data/spec/rley/parser/parse_entry_spec.rb +19 -19
- data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
- data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
- data/spec/rley/ptree/terminal_node_spec.rb +6 -6
- data/spec/rley/sppf/alternative_node_spec.rb +6 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
- data/spec/rley/sppf/token_node_spec.rb +4 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
- data/spec/rley/support/grammar_abc_helper.rb +3 -5
- data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
- data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
- data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +14 -17
- data/spec/rley/support/grammar_pb_helper.rb +8 -7
- data/spec/rley/support/grammar_sppf_helper.rb +3 -3
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
- data/spec/rley/syntax/grammar_spec.rb +6 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/non_terminal_spec.rb +8 -8
- data/spec/rley/syntax/production_spec.rb +17 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
- data/spec/rley/syntax/terminal_spec.rb +5 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -12
- data/spec/support/base_tokenizer_spec.rb +7 -2
- metadata +48 -74
- data/.simplecov +0 -7
- data/lib/rley/parser/parse_state.rb +0 -83
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -101
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -4,7 +4,7 @@ require_relative '../../spec_helper'
|
|
4
4
|
|
5
5
|
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
6
6
|
|
7
|
-
require_relative '../../../lib/rley/syntax/
|
7
|
+
require_relative '../../../lib/rley/syntax/base_grammar_builder'
|
8
8
|
require_relative '../support/grammar_helper'
|
9
9
|
require_relative '../support/expectation_helper'
|
10
10
|
|
@@ -22,15 +22,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
22
22
|
# "SPPF-Style Parsing From Earley Recognizers" in
|
23
23
|
# Notes in Theoretical Computer Science 203, (2008), pp. 53-67
|
24
24
|
# contains a hidden left recursion and a cycle
|
25
|
-
builder = Syntax::
|
25
|
+
builder = Syntax::BaseGrammarBuilder.new do
|
26
26
|
add_terminals('a', 'b')
|
27
27
|
rule 'Phi' => 'S'
|
28
|
-
rule 'S' =>
|
29
|
-
rule 'S' =>
|
28
|
+
rule 'S' => 'A T'
|
29
|
+
rule 'S' => 'a T'
|
30
30
|
rule 'A' => 'a'
|
31
|
-
rule 'A' =>
|
31
|
+
rule 'A' => 'B A'
|
32
32
|
rule 'B' => []
|
33
|
-
rule 'T' =>
|
33
|
+
rule 'T' => 'b b b'
|
34
34
|
end
|
35
35
|
builder.grammar
|
36
36
|
end
|
@@ -49,11 +49,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
49
49
|
end
|
50
50
|
|
51
51
|
# Emit a text representation of the current path.
|
52
|
-
def path_to_s
|
52
|
+
def path_to_s
|
53
53
|
text_parts = subject.curr_path.map do |path_element|
|
54
54
|
path_element.to_string(0)
|
55
55
|
end
|
56
|
-
|
56
|
+
text_parts.join('/')
|
57
57
|
end
|
58
58
|
|
59
59
|
context 'Initialization:' do
|
@@ -3,7 +3,7 @@
|
|
3
3
|
require_relative '../../spec_helper'
|
4
4
|
|
5
5
|
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
6
|
-
require_relative '../../../lib/rley/syntax/
|
6
|
+
require_relative '../../../lib/rley/syntax/base_grammar_builder'
|
7
7
|
require_relative '../support/grammar_helper'
|
8
8
|
require_relative '../support/grammar_abc_helper'
|
9
9
|
require_relative '../support/expectation_helper'
|
@@ -38,11 +38,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
38
38
|
end
|
39
39
|
|
40
40
|
# Emit a text representation of the current path.
|
41
|
-
def path_to_s
|
41
|
+
def path_to_s
|
42
42
|
text_parts = subject.curr_path.map do |path_element|
|
43
43
|
path_element.to_string(0)
|
44
44
|
end
|
45
|
-
|
45
|
+
text_parts.join('/')
|
46
46
|
end
|
47
47
|
|
48
48
|
|
@@ -11,7 +11,7 @@ require_relative './support/grammar_sppf_helper'
|
|
11
11
|
require_relative '../../lib/rley/parse_tree_visitor'
|
12
12
|
|
13
13
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
14
|
-
describe ParseTreeVisitor do
|
14
|
+
describe ParseTreeVisitor do
|
15
15
|
let(:grammar_abc) do
|
16
16
|
sandbox = Object.new
|
17
17
|
sandbox.extend(GrammarABCHelper)
|
@@ -45,7 +45,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
45
45
|
# Capital letters represent non-terminal nodes
|
46
46
|
let(:grm_abc_ptree1) do
|
47
47
|
engine = Rley::Engine.new
|
48
|
-
engine.use_grammar(grammar_abc)
|
48
|
+
engine.use_grammar(grammar_abc)
|
49
49
|
parse_result = engine.parse(grm_abc_tokens1)
|
50
50
|
ptree = engine.convert(parse_result)
|
51
51
|
ptree
|
@@ -161,6 +161,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
161
161
|
subject.end_visit_ptree(grm_abc_ptree1)
|
162
162
|
end
|
163
163
|
|
164
|
+
# rubocop: disable Naming/VariableNumber
|
164
165
|
it 'should begin the visit when requested' do
|
165
166
|
# Reminder: parse tree structure is
|
166
167
|
# S[0,5]
|
@@ -209,11 +210,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
209
210
|
expectations.each do |(msg, args)|
|
210
211
|
expect(listener1).to receive(msg).with(*args).ordered
|
211
212
|
end
|
212
|
-
|
213
|
+
|
213
214
|
# Here we go...
|
214
215
|
subject.start
|
215
216
|
end
|
216
|
-
|
217
|
+
|
217
218
|
it 'should also visit in pre-order' do
|
218
219
|
# Reminder: parse tree structure is
|
219
220
|
# S[0,5]
|
@@ -228,7 +229,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
228
229
|
root = grm_abc_ptree1.root
|
229
230
|
# Here we defeat encapsulation for the good cause
|
230
231
|
subject.instance_variable_set(:@traversal, :pre_order)
|
231
|
-
|
232
|
+
|
232
233
|
children = root.subnodes
|
233
234
|
big_a_1 = children[0]
|
234
235
|
big_a_1_children = big_a_1.subnodes
|
@@ -239,7 +240,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
239
240
|
expectations = [
|
240
241
|
[:before_ptree, [grm_abc_ptree1]]
|
241
242
|
# TODO: fix this test
|
242
|
-
# [:before_subnodes, [root, children]],
|
243
|
+
# [:before_subnodes, [root, children]],
|
243
244
|
# [:before_non_terminal, [root]],
|
244
245
|
|
245
246
|
# [:before_non_terminal, [big_a_1]],
|
@@ -267,10 +268,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
267
268
|
expectations.each do |(msg, args)|
|
268
269
|
expect(listener1).to receive(msg).with(*args).ordered
|
269
270
|
end
|
270
|
-
|
271
|
+
|
271
272
|
# Here we go...
|
272
273
|
subject.start
|
273
|
-
end
|
274
|
+
end
|
275
|
+
# rubocop: enable Naming/VariableNumber
|
274
276
|
end # context
|
275
277
|
end # describe
|
276
278
|
end # module
|
@@ -0,0 +1,445 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require 'stringio'
|
5
|
+
require_relative '../../../lib/rley/syntax/match_closest'
|
6
|
+
require_relative '../../../lib/rley/syntax/non_terminal'
|
7
|
+
require_relative '../../../lib/rley/syntax/production'
|
8
|
+
require_relative '../../../lib/rley/syntax/base_grammar_builder'
|
9
|
+
require_relative '../../../lib/rley/lexical/token'
|
10
|
+
require_relative '../../../lib/rley/base/dotted_item'
|
11
|
+
require_relative '../../../lib/rley/parser/gfg_parsing'
|
12
|
+
|
13
|
+
require_relative '../support/expectation_helper'
|
14
|
+
|
15
|
+
# Load the class under test
|
16
|
+
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
17
|
+
|
18
|
+
module Rley # Open this namespace to avoid module qualifier prefixes
|
19
|
+
module Parser # Open this namespace to avoid module qualifier prefixes
|
20
|
+
describe GFGEarleyParser do
|
21
|
+
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
22
|
+
|
23
|
+
Keyword = {
|
24
|
+
'else' => 'ELSE',
|
25
|
+
'false' => 'FALSE',
|
26
|
+
'if' => 'IF',
|
27
|
+
'then' => 'THEN',
|
28
|
+
'true' => 'TRUE'
|
29
|
+
}.freeze
|
30
|
+
|
31
|
+
def tokenizer(aTextToParse)
|
32
|
+
scanner = StringScanner.new(aTextToParse)
|
33
|
+
tokens = []
|
34
|
+
|
35
|
+
loop do
|
36
|
+
scanner.skip(/\s+/)
|
37
|
+
break if scanner.eos?
|
38
|
+
curr_pos = scanner.pos
|
39
|
+
lexeme = scanner.scan(/\S+/)
|
40
|
+
|
41
|
+
term_name = Keyword[lexeme]
|
42
|
+
unless term_name
|
43
|
+
if lexeme =~ /\d+/
|
44
|
+
term_name = 'INTEGER'
|
45
|
+
else
|
46
|
+
err_msg = "Unknown token '#{lexeme}'"
|
47
|
+
raise StandardError, err_msg
|
48
|
+
end
|
49
|
+
end
|
50
|
+
pos = Rley::Lexical::Position.new(1, curr_pos + 1)
|
51
|
+
tokens << Rley::Lexical::Token.new(lexeme, term_name, pos)
|
52
|
+
end
|
53
|
+
|
54
|
+
tokens
|
55
|
+
end
|
56
|
+
|
57
|
+
let(:input) { 'if false then if true then 1 else 2' }
|
58
|
+
|
59
|
+
context 'Ambiguous parse: ' do
|
60
|
+
# Factory method. Creates a grammar builder for a simple grammar.
|
61
|
+
def grammar_if_else_amb
|
62
|
+
builder = Rley::Syntax::BaseGrammarBuilder.new do
|
63
|
+
add_terminals('IF', 'THEN', 'ELSE')
|
64
|
+
add_terminals('FALSE', 'TRUE', 'INTEGER')
|
65
|
+
|
66
|
+
rule 'program' => 'stmt'
|
67
|
+
rule 'stmt' => 'IF boolean THEN stmt'
|
68
|
+
rule 'stmt' => 'IF boolean THEN stmt ELSE stmt'
|
69
|
+
rule 'stmt' => 'literal'
|
70
|
+
rule 'literal' => 'boolean'
|
71
|
+
rule 'literal' => 'INTEGER'
|
72
|
+
rule 'boolean' => 'FALSE'
|
73
|
+
rule 'boolean' => 'TRUE'
|
74
|
+
end
|
75
|
+
|
76
|
+
builder.grammar
|
77
|
+
end
|
78
|
+
|
79
|
+
subject { GFGEarleyParser.new(grammar_if_else_amb) }
|
80
|
+
|
81
|
+
it 'should parse a valid simple input' do
|
82
|
+
tokens = tokenizer(input)
|
83
|
+
parse_result = subject.parse(tokens)
|
84
|
+
expect(parse_result.success?).to eq(true)
|
85
|
+
expect(parse_result.ambiguous?).to eq(true)
|
86
|
+
######################
|
87
|
+
# Expectation chart[0]:
|
88
|
+
expected = [
|
89
|
+
'.program | 0', # initialization
|
90
|
+
'program => . stmt | 0', # start rule
|
91
|
+
'.stmt | 0', # call rule
|
92
|
+
'stmt => . IF boolean THEN stmt | 0', # start rule
|
93
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 0', # start rule
|
94
|
+
'stmt => . literal | 0', # start rule
|
95
|
+
'.literal | 0', # call rule
|
96
|
+
'literal => . boolean | 0', # start rule
|
97
|
+
'literal => . INTEGER | 0', # start rule
|
98
|
+
'.boolean | 0', # call rule
|
99
|
+
'boolean => . FALSE | 0', # start rule
|
100
|
+
'boolean => . TRUE | 0' # start rule
|
101
|
+
]
|
102
|
+
compare_entry_texts(parse_result.chart[0], expected)
|
103
|
+
expected_terminals(parse_result.chart[0], %w[FALSE IF INTEGER TRUE])
|
104
|
+
|
105
|
+
######################
|
106
|
+
# Expectation chart[1]:
|
107
|
+
expected = [
|
108
|
+
'stmt => IF . boolean THEN stmt | 0', # start rule
|
109
|
+
'stmt => IF . boolean THEN stmt ELSE stmt | 0', # start rule
|
110
|
+
'.boolean | 1',
|
111
|
+
'boolean => . FALSE | 1', # start rule
|
112
|
+
'boolean => . TRUE | 1' # start rule
|
113
|
+
]
|
114
|
+
result1 = parse_result.chart[1]
|
115
|
+
expect(result1.entries.size).to eq(5)
|
116
|
+
compare_entry_texts(result1, expected)
|
117
|
+
expected_terminals(result1, %w[FALSE TRUE])
|
118
|
+
|
119
|
+
######################
|
120
|
+
# Expectation chart[2]:
|
121
|
+
expected = [
|
122
|
+
'boolean => FALSE . | 1',
|
123
|
+
'boolean. | 1',
|
124
|
+
'stmt => IF boolean . THEN stmt | 0',
|
125
|
+
'stmt => IF boolean . THEN stmt ELSE stmt | 0'
|
126
|
+
]
|
127
|
+
result2 = parse_result.chart[2]
|
128
|
+
expect(result2.entries.size).to eq(4)
|
129
|
+
compare_entry_texts(result2, expected)
|
130
|
+
expected_terminals(result2, %w[THEN])
|
131
|
+
|
132
|
+
######################
|
133
|
+
# Expectation chart[3]:
|
134
|
+
expected = [
|
135
|
+
'stmt => IF boolean THEN . stmt | 0',
|
136
|
+
'stmt => IF boolean THEN . stmt ELSE stmt | 0',
|
137
|
+
'.stmt | 3',
|
138
|
+
'stmt => . IF boolean THEN stmt | 3',
|
139
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 3',
|
140
|
+
'stmt => . literal | 3',
|
141
|
+
'.literal | 3',
|
142
|
+
'literal => . boolean | 3',
|
143
|
+
'literal => . INTEGER | 3',
|
144
|
+
'.boolean | 3',
|
145
|
+
'boolean => . FALSE | 3',
|
146
|
+
'boolean => . TRUE | 3'
|
147
|
+
]
|
148
|
+
result3 = parse_result.chart[3]
|
149
|
+
expect(result3.entries.size).to eq(12)
|
150
|
+
compare_entry_texts(result3, expected)
|
151
|
+
expected_terminals(result3, %w[FALSE IF INTEGER TRUE])
|
152
|
+
|
153
|
+
|
154
|
+
######################
|
155
|
+
# Expectation chart[4]:
|
156
|
+
expected = [
|
157
|
+
'stmt => IF . boolean THEN stmt | 3',
|
158
|
+
'stmt => IF . boolean THEN stmt ELSE stmt | 3',
|
159
|
+
'.boolean | 4',
|
160
|
+
'boolean => . FALSE | 4',
|
161
|
+
'boolean => . TRUE | 4'
|
162
|
+
]
|
163
|
+
result4 = parse_result.chart[4]
|
164
|
+
expect(result4.entries.size).to eq(5)
|
165
|
+
compare_entry_texts(result4, expected)
|
166
|
+
expected_terminals(result4, %w[FALSE TRUE])
|
167
|
+
|
168
|
+
######################
|
169
|
+
# Expectation chart[5]:
|
170
|
+
expected = [
|
171
|
+
'boolean => TRUE . | 4',
|
172
|
+
'boolean. | 4',
|
173
|
+
'stmt => IF boolean . THEN stmt | 3',
|
174
|
+
'stmt => IF boolean . THEN stmt ELSE stmt | 3'
|
175
|
+
]
|
176
|
+
result5 = parse_result.chart[5]
|
177
|
+
expect(result5.entries.size).to eq(4)
|
178
|
+
compare_entry_texts(result5, expected)
|
179
|
+
expected_terminals(result5, %w[THEN])
|
180
|
+
|
181
|
+
######################
|
182
|
+
# Expectation chart[6]:
|
183
|
+
expected = [
|
184
|
+
'stmt => IF boolean THEN . stmt | 3',
|
185
|
+
'stmt => IF boolean THEN . stmt ELSE stmt | 3',
|
186
|
+
'.stmt | 6',
|
187
|
+
'stmt => . IF boolean THEN stmt | 6',
|
188
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 6',
|
189
|
+
'stmt => . literal | 6',
|
190
|
+
'.literal | 6',
|
191
|
+
'literal => . boolean | 6',
|
192
|
+
'literal => . INTEGER | 6',
|
193
|
+
'.boolean | 6',
|
194
|
+
'boolean => . FALSE | 6',
|
195
|
+
'boolean => . TRUE | 6'
|
196
|
+
]
|
197
|
+
result6 = parse_result.chart[6]
|
198
|
+
expect(result6.entries.size).to eq(12)
|
199
|
+
compare_entry_texts(result6, expected)
|
200
|
+
expected_terminals(result6, %w[FALSE IF INTEGER TRUE])
|
201
|
+
|
202
|
+
######################
|
203
|
+
# Expectation chart[7]:
|
204
|
+
expected = [
|
205
|
+
'literal => INTEGER . | 6',
|
206
|
+
'literal. | 6',
|
207
|
+
'stmt => literal . | 6',
|
208
|
+
'stmt. | 6',
|
209
|
+
'stmt => IF boolean THEN stmt . | 3',
|
210
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 3',
|
211
|
+
'stmt. | 3',
|
212
|
+
'stmt => IF boolean THEN stmt . | 0',
|
213
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 0',
|
214
|
+
'stmt. | 0',
|
215
|
+
'program => stmt . | 0',
|
216
|
+
'program. | 0'
|
217
|
+
]
|
218
|
+
result7 = parse_result.chart[7]
|
219
|
+
expect(result7.entries.size).to eq(12)
|
220
|
+
compare_entry_texts(result7, expected)
|
221
|
+
expected_terminals(result7, %w[ELSE])
|
222
|
+
|
223
|
+
# Expectation chart[8]:
|
224
|
+
expected = [
|
225
|
+
'stmt => IF boolean THEN stmt ELSE . stmt | 3',
|
226
|
+
'stmt => IF boolean THEN stmt ELSE . stmt | 0',
|
227
|
+
'.stmt | 8',
|
228
|
+
'stmt => . IF boolean THEN stmt | 8',
|
229
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 8',
|
230
|
+
'stmt => . literal | 8',
|
231
|
+
'.literal | 8',
|
232
|
+
'literal => . boolean | 8',
|
233
|
+
'literal => . INTEGER | 8',
|
234
|
+
'.boolean | 8',
|
235
|
+
'boolean => . FALSE | 8',
|
236
|
+
'boolean => . TRUE | 8'
|
237
|
+
]
|
238
|
+
result8 = parse_result.chart[8]
|
239
|
+
expect(result8.entries.size).to eq(12)
|
240
|
+
compare_entry_texts(result8, expected)
|
241
|
+
expected_terminals(result8, %w[FALSE IF INTEGER TRUE])
|
242
|
+
|
243
|
+
######################
|
244
|
+
# Expectation chart[9]:
|
245
|
+
expected = [
|
246
|
+
'literal => INTEGER . | 8',
|
247
|
+
'literal. | 8',
|
248
|
+
'stmt => literal . | 8',
|
249
|
+
'stmt. | 8',
|
250
|
+
'stmt => IF boolean THEN stmt ELSE stmt . | 3',
|
251
|
+
'stmt => IF boolean THEN stmt ELSE stmt . | 0',
|
252
|
+
'stmt. | 3',
|
253
|
+
'stmt. | 0',
|
254
|
+
'stmt => IF boolean THEN stmt . | 0',
|
255
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 0',
|
256
|
+
'program => stmt . | 0',
|
257
|
+
'program. | 0'
|
258
|
+
]
|
259
|
+
result9 = parse_result.chart[9]
|
260
|
+
expect(result9.entries.size).to eq(12)
|
261
|
+
compare_entry_texts(result9, expected)
|
262
|
+
expected_terminals(result9, %w[ELSE])
|
263
|
+
|
264
|
+
######################
|
265
|
+
# Expectation chart[10]:
|
266
|
+
result10 = parse_result.chart[10]
|
267
|
+
expect(result10).to be_nil
|
268
|
+
|
269
|
+
# The parse is ambiguous since there more than one dotted item
|
270
|
+
# that matches the stmt. | 0 exit node on chart[9]:
|
271
|
+
# stmt => IF boolean THEN stmt ELSE stmt . | 0'
|
272
|
+
# stmt => IF boolean THEN stmt . | 0'
|
273
|
+
#
|
274
|
+
# This is related to the "dangling else problem"
|
275
|
+
end
|
276
|
+
end # context
|
277
|
+
|
278
|
+
context 'Disambiguated parse: ' do
|
279
|
+
def match_else_with_if(grammar)
|
280
|
+
# Brittle code
|
281
|
+
prod = grammar.rules[2]
|
282
|
+
constraint = Syntax::MatchClosest.new(prod.rhs.members, 4, 'IF')
|
283
|
+
prod.constraints << constraint
|
284
|
+
end
|
285
|
+
|
286
|
+
# Factory method. Creates a grammar builder for a simple grammar.
|
287
|
+
def grammar_if_else
|
288
|
+
builder = Rley::Syntax::BaseGrammarBuilder.new do
|
289
|
+
add_terminals('IF', 'THEN', 'ELSE')
|
290
|
+
add_terminals('FALSE', 'TRUE', 'INTEGER')
|
291
|
+
|
292
|
+
rule 'program' => 'stmt'
|
293
|
+
rule 'stmt' => 'IF boolean THEN stmt'
|
294
|
+
|
295
|
+
# To prevent dangling else issue, the ELSE must match the closest preceding IF
|
296
|
+
# rule 'stmt' => 'IF boolean THEN stmt ELSE{closest IF} stmt'
|
297
|
+
rule 'stmt' => 'IF boolean THEN stmt ELSE stmt'
|
298
|
+
rule 'stmt' => 'literal'
|
299
|
+
rule 'literal' => 'boolean'
|
300
|
+
rule 'literal' => 'INTEGER'
|
301
|
+
rule 'boolean' => 'FALSE'
|
302
|
+
rule 'boolean' => 'TRUE'
|
303
|
+
end
|
304
|
+
|
305
|
+
grm = builder.grammar
|
306
|
+
match_else_with_if(grm)
|
307
|
+
|
308
|
+
grm
|
309
|
+
end
|
310
|
+
|
311
|
+
subject { GFGEarleyParser.new(grammar_if_else) }
|
312
|
+
|
313
|
+
it 'should cope with dangling else problem' do
|
314
|
+
tokens = tokenizer(input)
|
315
|
+
parse_result = subject.parse(tokens)
|
316
|
+
expect(parse_result.success?).to eq(true)
|
317
|
+
expect(parse_result.ambiguous?).to eq(true)
|
318
|
+
######################
|
319
|
+
# Expectation chart[0]:
|
320
|
+
expected = [
|
321
|
+
'.program | 0', # initialization
|
322
|
+
'program => . stmt | 0', # start rule
|
323
|
+
'.stmt | 0', # call rule
|
324
|
+
'stmt => . IF boolean THEN stmt | 0', # start rule
|
325
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 0', # start rule
|
326
|
+
'stmt => . literal | 0', # start rule
|
327
|
+
'.literal | 0', # call rule
|
328
|
+
'literal => . boolean | 0', # start rule
|
329
|
+
'literal => . INTEGER | 0', # start rule
|
330
|
+
'.boolean | 0', # call rule
|
331
|
+
'boolean => . FALSE | 0', # start rule
|
332
|
+
'boolean => . TRUE | 0' # start rule
|
333
|
+
]
|
334
|
+
compare_entry_texts(parse_result.chart[0], expected)
|
335
|
+
expected_terminals(parse_result.chart[0], %w[FALSE IF INTEGER TRUE])
|
336
|
+
|
337
|
+
# The parser should work as the previous version...
|
338
|
+
# we skip chart[2] and chart[3]
|
339
|
+
######################
|
340
|
+
# Expectation chart[4]:
|
341
|
+
expected = [
|
342
|
+
'stmt => IF . boolean THEN stmt | 3',
|
343
|
+
'stmt => IF . boolean THEN stmt ELSE stmt | 3',
|
344
|
+
'.boolean | 4',
|
345
|
+
'boolean => . FALSE | 4',
|
346
|
+
'boolean => . TRUE | 4'
|
347
|
+
]
|
348
|
+
result4 = parse_result.chart[4]
|
349
|
+
expect(result4.entries.size).to eq(5)
|
350
|
+
compare_entry_texts(result4, expected)
|
351
|
+
expected_terminals(result4, %w[FALSE TRUE])
|
352
|
+
|
353
|
+
######################
|
354
|
+
# Before reading ELSE
|
355
|
+
# Expectation chart[7]:
|
356
|
+
expected = [
|
357
|
+
'literal => INTEGER . | 6',
|
358
|
+
'literal. | 6',
|
359
|
+
'stmt => literal . | 6',
|
360
|
+
'stmt. | 6',
|
361
|
+
'stmt => IF boolean THEN stmt . | 3',
|
362
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 3',
|
363
|
+
'stmt. | 3',
|
364
|
+
'stmt => IF boolean THEN stmt . | 0',
|
365
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 0',
|
366
|
+
'stmt. | 0',
|
367
|
+
'program => stmt . | 0',
|
368
|
+
'program. | 0'
|
369
|
+
]
|
370
|
+
result7 = parse_result.chart[7]
|
371
|
+
expect(result7.entries.size).to eq(12)
|
372
|
+
compare_entry_texts(result7, expected)
|
373
|
+
expected_terminals(result7, %w[ELSE])
|
374
|
+
|
375
|
+
######################
|
376
|
+
# After reading ELSE
|
377
|
+
# Expectation chart[8]:
|
378
|
+
expected = [
|
379
|
+
'stmt => IF boolean THEN stmt ELSE . stmt | 3',
|
380
|
+
# 'stmt => IF boolean THEN stmt ELSE . stmt | 0', # Excluded
|
381
|
+
'.stmt | 8',
|
382
|
+
'stmt => . IF boolean THEN stmt | 8',
|
383
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 8',
|
384
|
+
'stmt => . literal | 8',
|
385
|
+
'.literal | 8',
|
386
|
+
'literal => . boolean | 8',
|
387
|
+
'literal => . INTEGER | 8',
|
388
|
+
'.boolean | 8',
|
389
|
+
'boolean => . FALSE | 8',
|
390
|
+
'boolean => . TRUE | 8'
|
391
|
+
]
|
392
|
+
result8 = parse_result.chart[8]
|
393
|
+
found = parse_result.chart.search_entries(4, {before: 'IF'})
|
394
|
+
expect(result8.entries.size).to eq(11)
|
395
|
+
compare_entry_texts(result8, expected)
|
396
|
+
expected_terminals(result8, %w[FALSE IF INTEGER TRUE])
|
397
|
+
|
398
|
+
# How does it work?
|
399
|
+
# ELSE was just read at position 7
|
400
|
+
# We look backwards to nearest IF; there is one at position 3
|
401
|
+
# In chart[8], we should exclude the dotted item:
|
402
|
+
# 'stmt => IF boolean THEN stmt ELSE . stmt | 0'
|
403
|
+
# Reasoning?
|
404
|
+
# On chart[4], we find two entries for the IF .:
|
405
|
+
# 'stmt => IF . boolean THEN stmt | 3',
|
406
|
+
# 'stmt => IF . boolean THEN stmt ELSE stmt | 3'
|
407
|
+
# Only these productions that still applies at 8 must be retained
|
408
|
+
# 'stmt => IF boolean THEN stmt ELSE . stmt | 3',
|
409
|
+
# 'stmt => IF boolean THEN stmt ELSE . stmt | 0', # To exclude
|
410
|
+
# Where to place the check?
|
411
|
+
# At the dotted item?
|
412
|
+
# call, return scan nodes
|
413
|
+
# So if one has an annotated production rule:
|
414
|
+
# stmt => IF boolean THEN stmt ELSE{ closest: IF } stmt
|
415
|
+
# then the dotted item:
|
416
|
+
# stmt => IF boolean THEN stmt ELSE . stmt
|
417
|
+
# should bear the constraint
|
418
|
+
|
419
|
+
######################
|
420
|
+
# Expectation chart[9]:
|
421
|
+
expected = [
|
422
|
+
'literal => INTEGER . | 8',
|
423
|
+
'literal. | 8',
|
424
|
+
'stmt => literal . | 8',
|
425
|
+
'stmt. | 8',
|
426
|
+
'stmt => IF boolean THEN stmt ELSE stmt . | 3',
|
427
|
+
# 'stmt => IF boolean THEN stmt ELSE stmt . | 0', # Excluded
|
428
|
+
'stmt. | 3',
|
429
|
+
'stmt => IF boolean THEN stmt . | 0',
|
430
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 0',
|
431
|
+
'stmt. | 0',
|
432
|
+
'program => stmt . | 0',
|
433
|
+
'program. | 0'
|
434
|
+
]
|
435
|
+
result9 = parse_result.chart[9]
|
436
|
+
expect(result9.entries.size).to eq(11)
|
437
|
+
compare_entry_texts(result9, expected)
|
438
|
+
expected_terminals(result9, ['ELSE'])
|
439
|
+
end
|
440
|
+
end # context
|
441
|
+
end # describe
|
442
|
+
end # module
|
443
|
+
end # module
|
444
|
+
|
445
|
+
|