rley 0.7.06 → 0.8.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +362 -62
- data/.travis.yml +6 -6
- data/CHANGELOG.md +20 -4
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/examples/NLP/engtagger.rb +193 -190
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/examples/data_formats/JSON/cli_options.rb +1 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
- data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +13 -13
- data/examples/data_formats/JSON/json_lexer.rb +8 -8
- data/examples/data_formats/JSON/json_minifier.rb +1 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
- data/examples/general/calc_iter1/calc_grammar.rb +7 -6
- data/examples/general/calc_iter1/calc_lexer.rb +6 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
- data/examples/general/calc_iter2/calc_grammar.rb +12 -12
- data/examples/general/calc_iter2/calc_lexer.rb +11 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
- data/examples/general/left.rb +2 -2
- data/examples/general/right.rb +2 -2
- data/lib/rley.rb +1 -1
- data/lib/rley/base/dotted_item.rb +28 -31
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +2 -2
- data/lib/rley/engine.rb +22 -25
- data/lib/rley/formatter/asciitree.rb +3 -3
- data/lib/rley/formatter/bracket_notation.rb +1 -8
- data/lib/rley/formatter/debug.rb +6 -6
- data/lib/rley/formatter/json.rb +2 -2
- data/lib/rley/gfg/call_edge.rb +1 -1
- data/lib/rley/gfg/edge.rb +5 -5
- data/lib/rley/gfg/end_vertex.rb +2 -6
- data/lib/rley/gfg/epsilon_edge.rb +1 -5
- data/lib/rley/gfg/grm_flow_graph.rb +27 -23
- data/lib/rley/gfg/item_vertex.rb +10 -10
- data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
- data/lib/rley/gfg/scan_edge.rb +1 -1
- data/lib/rley/gfg/shortcut_edge.rb +2 -2
- data/lib/rley/gfg/start_vertex.rb +4 -8
- data/lib/rley/gfg/vertex.rb +43 -39
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/lexical/token_range.rb +6 -6
- data/lib/rley/notation/all_notation_nodes.rb +2 -0
- data/lib/rley/notation/ast_builder.rb +191 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +113 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +504 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +192 -0
- data/lib/rley/parse_forest_visitor.rb +5 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
- data/lib/rley/parse_rep/cst_builder.rb +5 -6
- data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
- data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
- data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
- data/lib/rley/parse_tree_visitor.rb +1 -1
- data/lib/rley/parser/error_reason.rb +4 -5
- data/lib/rley/parser/gfg_chart.rb +118 -26
- data/lib/rley/parser/gfg_parsing.rb +22 -33
- data/lib/rley/parser/parse_entry.rb +25 -31
- data/lib/rley/parser/parse_entry_set.rb +19 -16
- data/lib/rley/parser/parse_entry_tracker.rb +4 -4
- data/lib/rley/parser/parse_tracer.rb +13 -13
- data/lib/rley/parser/parse_walker_factory.rb +23 -28
- data/lib/rley/ptree/non_terminal_node.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +3 -3
- data/lib/rley/ptree/parse_tree_node.rb +5 -5
- data/lib/rley/ptree/terminal_node.rb +7 -7
- data/lib/rley/rley_error.rb +12 -12
- data/lib/rley/sppf/alternative_node.rb +6 -6
- data/lib/rley/sppf/composite_node.rb +7 -7
- data/lib/rley/sppf/epsilon_node.rb +3 -3
- data/lib/rley/sppf/leaf_node.rb +3 -3
- data/lib/rley/sppf/parse_forest.rb +16 -16
- data/lib/rley/sppf/sppf_node.rb +7 -8
- data/lib/rley/sppf/token_node.rb +3 -3
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
- data/lib/rley/syntax/grammar.rb +5 -5
- data/lib/rley/syntax/grm_symbol.rb +7 -7
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/non_terminal.rb +9 -15
- data/lib/rley/syntax/production.rb +16 -10
- data/lib/rley/syntax/symbol_seq.rb +7 -9
- data/lib/rley/syntax/terminal.rb +4 -5
- data/lib/rley/syntax/verbatim_symbol.rb +3 -3
- data/lib/support/base_tokenizer.rb +19 -18
- data/spec/rley/base/dotted_item_spec.rb +2 -2
- data/spec/rley/engine_spec.rb +23 -21
- data/spec/rley/formatter/asciitree_spec.rb +7 -7
- data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
- data/spec/rley/formatter/json_spec.rb +1 -1
- data/spec/rley/gfg/end_vertex_spec.rb +5 -5
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/gfg/item_vertex_spec.rb +10 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
- data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
- data/spec/rley/gfg/start_vertex_spec.rb +5 -5
- data/spec/rley/gfg/vertex_spec.rb +3 -3
- data/spec/rley/lexical/token_range_spec.rb +16 -16
- data/spec/rley/lexical/token_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +302 -0
- data/spec/rley/notation/parser_spec.rb +184 -0
- data/spec/rley/notation/tokenizer_spec.rb +370 -0
- data/spec/rley/parse_forest_visitor_spec.rb +165 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
- data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
- data/spec/rley/parse_rep/groucho_spec.rb +24 -26
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
- data/spec/rley/parse_tree_visitor_spec.rb +10 -8
- data/spec/rley/parser/dangling_else_spec.rb +445 -0
- data/spec/rley/parser/error_reason_spec.rb +6 -6
- data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
- data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
- data/spec/rley/parser/parse_entry_spec.rb +19 -19
- data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
- data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
- data/spec/rley/ptree/terminal_node_spec.rb +6 -6
- data/spec/rley/sppf/alternative_node_spec.rb +6 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
- data/spec/rley/sppf/token_node_spec.rb +4 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
- data/spec/rley/support/grammar_abc_helper.rb +3 -5
- data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
- data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
- data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +14 -17
- data/spec/rley/support/grammar_pb_helper.rb +8 -7
- data/spec/rley/support/grammar_sppf_helper.rb +3 -3
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
- data/spec/rley/syntax/grammar_spec.rb +6 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/non_terminal_spec.rb +8 -8
- data/spec/rley/syntax/production_spec.rb +17 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
- data/spec/rley/syntax/terminal_spec.rb +5 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -12
- data/spec/support/base_tokenizer_spec.rb +7 -2
- metadata +48 -74
- data/.simplecov +0 -7
- data/lib/rley/parser/parse_state.rb +0 -83
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -101
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -4,7 +4,7 @@ require_relative '../../spec_helper'
|
|
4
4
|
|
5
5
|
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
6
6
|
|
7
|
-
require_relative '../../../lib/rley/syntax/
|
7
|
+
require_relative '../../../lib/rley/syntax/base_grammar_builder'
|
8
8
|
require_relative '../support/grammar_helper'
|
9
9
|
require_relative '../support/expectation_helper'
|
10
10
|
|
@@ -22,15 +22,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
22
22
|
# "SPPF-Style Parsing From Earley Recognizers" in
|
23
23
|
# Notes in Theoretical Computer Science 203, (2008), pp. 53-67
|
24
24
|
# contains a hidden left recursion and a cycle
|
25
|
-
builder = Syntax::
|
25
|
+
builder = Syntax::BaseGrammarBuilder.new do
|
26
26
|
add_terminals('a', 'b')
|
27
27
|
rule 'Phi' => 'S'
|
28
|
-
rule 'S' =>
|
29
|
-
rule 'S' =>
|
28
|
+
rule 'S' => 'A T'
|
29
|
+
rule 'S' => 'a T'
|
30
30
|
rule 'A' => 'a'
|
31
|
-
rule 'A' =>
|
31
|
+
rule 'A' => 'B A'
|
32
32
|
rule 'B' => []
|
33
|
-
rule 'T' =>
|
33
|
+
rule 'T' => 'b b b'
|
34
34
|
end
|
35
35
|
builder.grammar
|
36
36
|
end
|
@@ -49,11 +49,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
49
49
|
end
|
50
50
|
|
51
51
|
# Emit a text representation of the current path.
|
52
|
-
def path_to_s
|
52
|
+
def path_to_s
|
53
53
|
text_parts = subject.curr_path.map do |path_element|
|
54
54
|
path_element.to_string(0)
|
55
55
|
end
|
56
|
-
|
56
|
+
text_parts.join('/')
|
57
57
|
end
|
58
58
|
|
59
59
|
context 'Initialization:' do
|
@@ -3,7 +3,7 @@
|
|
3
3
|
require_relative '../../spec_helper'
|
4
4
|
|
5
5
|
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
6
|
-
require_relative '../../../lib/rley/syntax/
|
6
|
+
require_relative '../../../lib/rley/syntax/base_grammar_builder'
|
7
7
|
require_relative '../support/grammar_helper'
|
8
8
|
require_relative '../support/grammar_abc_helper'
|
9
9
|
require_relative '../support/expectation_helper'
|
@@ -38,11 +38,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
38
38
|
end
|
39
39
|
|
40
40
|
# Emit a text representation of the current path.
|
41
|
-
def path_to_s
|
41
|
+
def path_to_s
|
42
42
|
text_parts = subject.curr_path.map do |path_element|
|
43
43
|
path_element.to_string(0)
|
44
44
|
end
|
45
|
-
|
45
|
+
text_parts.join('/')
|
46
46
|
end
|
47
47
|
|
48
48
|
|
@@ -11,7 +11,7 @@ require_relative './support/grammar_sppf_helper'
|
|
11
11
|
require_relative '../../lib/rley/parse_tree_visitor'
|
12
12
|
|
13
13
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
14
|
-
describe ParseTreeVisitor do
|
14
|
+
describe ParseTreeVisitor do
|
15
15
|
let(:grammar_abc) do
|
16
16
|
sandbox = Object.new
|
17
17
|
sandbox.extend(GrammarABCHelper)
|
@@ -45,7 +45,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
45
45
|
# Capital letters represent non-terminal nodes
|
46
46
|
let(:grm_abc_ptree1) do
|
47
47
|
engine = Rley::Engine.new
|
48
|
-
engine.use_grammar(grammar_abc)
|
48
|
+
engine.use_grammar(grammar_abc)
|
49
49
|
parse_result = engine.parse(grm_abc_tokens1)
|
50
50
|
ptree = engine.convert(parse_result)
|
51
51
|
ptree
|
@@ -161,6 +161,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
161
161
|
subject.end_visit_ptree(grm_abc_ptree1)
|
162
162
|
end
|
163
163
|
|
164
|
+
# rubocop: disable Naming/VariableNumber
|
164
165
|
it 'should begin the visit when requested' do
|
165
166
|
# Reminder: parse tree structure is
|
166
167
|
# S[0,5]
|
@@ -209,11 +210,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
209
210
|
expectations.each do |(msg, args)|
|
210
211
|
expect(listener1).to receive(msg).with(*args).ordered
|
211
212
|
end
|
212
|
-
|
213
|
+
|
213
214
|
# Here we go...
|
214
215
|
subject.start
|
215
216
|
end
|
216
|
-
|
217
|
+
|
217
218
|
it 'should also visit in pre-order' do
|
218
219
|
# Reminder: parse tree structure is
|
219
220
|
# S[0,5]
|
@@ -228,7 +229,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
228
229
|
root = grm_abc_ptree1.root
|
229
230
|
# Here we defeat encapsulation for the good cause
|
230
231
|
subject.instance_variable_set(:@traversal, :pre_order)
|
231
|
-
|
232
|
+
|
232
233
|
children = root.subnodes
|
233
234
|
big_a_1 = children[0]
|
234
235
|
big_a_1_children = big_a_1.subnodes
|
@@ -239,7 +240,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
239
240
|
expectations = [
|
240
241
|
[:before_ptree, [grm_abc_ptree1]]
|
241
242
|
# TODO: fix this test
|
242
|
-
# [:before_subnodes, [root, children]],
|
243
|
+
# [:before_subnodes, [root, children]],
|
243
244
|
# [:before_non_terminal, [root]],
|
244
245
|
|
245
246
|
# [:before_non_terminal, [big_a_1]],
|
@@ -267,10 +268,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
267
268
|
expectations.each do |(msg, args)|
|
268
269
|
expect(listener1).to receive(msg).with(*args).ordered
|
269
270
|
end
|
270
|
-
|
271
|
+
|
271
272
|
# Here we go...
|
272
273
|
subject.start
|
273
|
-
end
|
274
|
+
end
|
275
|
+
# rubocop: enable Naming/VariableNumber
|
274
276
|
end # context
|
275
277
|
end # describe
|
276
278
|
end # module
|
@@ -0,0 +1,445 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require 'stringio'
|
5
|
+
require_relative '../../../lib/rley/syntax/match_closest'
|
6
|
+
require_relative '../../../lib/rley/syntax/non_terminal'
|
7
|
+
require_relative '../../../lib/rley/syntax/production'
|
8
|
+
require_relative '../../../lib/rley/syntax/base_grammar_builder'
|
9
|
+
require_relative '../../../lib/rley/lexical/token'
|
10
|
+
require_relative '../../../lib/rley/base/dotted_item'
|
11
|
+
require_relative '../../../lib/rley/parser/gfg_parsing'
|
12
|
+
|
13
|
+
require_relative '../support/expectation_helper'
|
14
|
+
|
15
|
+
# Load the class under test
|
16
|
+
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
17
|
+
|
18
|
+
module Rley # Open this namespace to avoid module qualifier prefixes
|
19
|
+
module Parser # Open this namespace to avoid module qualifier prefixes
|
20
|
+
describe GFGEarleyParser do
|
21
|
+
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
22
|
+
|
23
|
+
Keyword = {
|
24
|
+
'else' => 'ELSE',
|
25
|
+
'false' => 'FALSE',
|
26
|
+
'if' => 'IF',
|
27
|
+
'then' => 'THEN',
|
28
|
+
'true' => 'TRUE'
|
29
|
+
}.freeze
|
30
|
+
|
31
|
+
def tokenizer(aTextToParse)
|
32
|
+
scanner = StringScanner.new(aTextToParse)
|
33
|
+
tokens = []
|
34
|
+
|
35
|
+
loop do
|
36
|
+
scanner.skip(/\s+/)
|
37
|
+
break if scanner.eos?
|
38
|
+
curr_pos = scanner.pos
|
39
|
+
lexeme = scanner.scan(/\S+/)
|
40
|
+
|
41
|
+
term_name = Keyword[lexeme]
|
42
|
+
unless term_name
|
43
|
+
if lexeme =~ /\d+/
|
44
|
+
term_name = 'INTEGER'
|
45
|
+
else
|
46
|
+
err_msg = "Unknown token '#{lexeme}'"
|
47
|
+
raise StandardError, err_msg
|
48
|
+
end
|
49
|
+
end
|
50
|
+
pos = Rley::Lexical::Position.new(1, curr_pos + 1)
|
51
|
+
tokens << Rley::Lexical::Token.new(lexeme, term_name, pos)
|
52
|
+
end
|
53
|
+
|
54
|
+
tokens
|
55
|
+
end
|
56
|
+
|
57
|
+
let(:input) { 'if false then if true then 1 else 2' }
|
58
|
+
|
59
|
+
context 'Ambiguous parse: ' do
|
60
|
+
# Factory method. Creates a grammar builder for a simple grammar.
|
61
|
+
def grammar_if_else_amb
|
62
|
+
builder = Rley::Syntax::BaseGrammarBuilder.new do
|
63
|
+
add_terminals('IF', 'THEN', 'ELSE')
|
64
|
+
add_terminals('FALSE', 'TRUE', 'INTEGER')
|
65
|
+
|
66
|
+
rule 'program' => 'stmt'
|
67
|
+
rule 'stmt' => 'IF boolean THEN stmt'
|
68
|
+
rule 'stmt' => 'IF boolean THEN stmt ELSE stmt'
|
69
|
+
rule 'stmt' => 'literal'
|
70
|
+
rule 'literal' => 'boolean'
|
71
|
+
rule 'literal' => 'INTEGER'
|
72
|
+
rule 'boolean' => 'FALSE'
|
73
|
+
rule 'boolean' => 'TRUE'
|
74
|
+
end
|
75
|
+
|
76
|
+
builder.grammar
|
77
|
+
end
|
78
|
+
|
79
|
+
subject { GFGEarleyParser.new(grammar_if_else_amb) }
|
80
|
+
|
81
|
+
it 'should parse a valid simple input' do
|
82
|
+
tokens = tokenizer(input)
|
83
|
+
parse_result = subject.parse(tokens)
|
84
|
+
expect(parse_result.success?).to eq(true)
|
85
|
+
expect(parse_result.ambiguous?).to eq(true)
|
86
|
+
######################
|
87
|
+
# Expectation chart[0]:
|
88
|
+
expected = [
|
89
|
+
'.program | 0', # initialization
|
90
|
+
'program => . stmt | 0', # start rule
|
91
|
+
'.stmt | 0', # call rule
|
92
|
+
'stmt => . IF boolean THEN stmt | 0', # start rule
|
93
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 0', # start rule
|
94
|
+
'stmt => . literal | 0', # start rule
|
95
|
+
'.literal | 0', # call rule
|
96
|
+
'literal => . boolean | 0', # start rule
|
97
|
+
'literal => . INTEGER | 0', # start rule
|
98
|
+
'.boolean | 0', # call rule
|
99
|
+
'boolean => . FALSE | 0', # start rule
|
100
|
+
'boolean => . TRUE | 0' # start rule
|
101
|
+
]
|
102
|
+
compare_entry_texts(parse_result.chart[0], expected)
|
103
|
+
expected_terminals(parse_result.chart[0], %w[FALSE IF INTEGER TRUE])
|
104
|
+
|
105
|
+
######################
|
106
|
+
# Expectation chart[1]:
|
107
|
+
expected = [
|
108
|
+
'stmt => IF . boolean THEN stmt | 0', # start rule
|
109
|
+
'stmt => IF . boolean THEN stmt ELSE stmt | 0', # start rule
|
110
|
+
'.boolean | 1',
|
111
|
+
'boolean => . FALSE | 1', # start rule
|
112
|
+
'boolean => . TRUE | 1' # start rule
|
113
|
+
]
|
114
|
+
result1 = parse_result.chart[1]
|
115
|
+
expect(result1.entries.size).to eq(5)
|
116
|
+
compare_entry_texts(result1, expected)
|
117
|
+
expected_terminals(result1, %w[FALSE TRUE])
|
118
|
+
|
119
|
+
######################
|
120
|
+
# Expectation chart[2]:
|
121
|
+
expected = [
|
122
|
+
'boolean => FALSE . | 1',
|
123
|
+
'boolean. | 1',
|
124
|
+
'stmt => IF boolean . THEN stmt | 0',
|
125
|
+
'stmt => IF boolean . THEN stmt ELSE stmt | 0'
|
126
|
+
]
|
127
|
+
result2 = parse_result.chart[2]
|
128
|
+
expect(result2.entries.size).to eq(4)
|
129
|
+
compare_entry_texts(result2, expected)
|
130
|
+
expected_terminals(result2, %w[THEN])
|
131
|
+
|
132
|
+
######################
|
133
|
+
# Expectation chart[3]:
|
134
|
+
expected = [
|
135
|
+
'stmt => IF boolean THEN . stmt | 0',
|
136
|
+
'stmt => IF boolean THEN . stmt ELSE stmt | 0',
|
137
|
+
'.stmt | 3',
|
138
|
+
'stmt => . IF boolean THEN stmt | 3',
|
139
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 3',
|
140
|
+
'stmt => . literal | 3',
|
141
|
+
'.literal | 3',
|
142
|
+
'literal => . boolean | 3',
|
143
|
+
'literal => . INTEGER | 3',
|
144
|
+
'.boolean | 3',
|
145
|
+
'boolean => . FALSE | 3',
|
146
|
+
'boolean => . TRUE | 3'
|
147
|
+
]
|
148
|
+
result3 = parse_result.chart[3]
|
149
|
+
expect(result3.entries.size).to eq(12)
|
150
|
+
compare_entry_texts(result3, expected)
|
151
|
+
expected_terminals(result3, %w[FALSE IF INTEGER TRUE])
|
152
|
+
|
153
|
+
|
154
|
+
######################
|
155
|
+
# Expectation chart[4]:
|
156
|
+
expected = [
|
157
|
+
'stmt => IF . boolean THEN stmt | 3',
|
158
|
+
'stmt => IF . boolean THEN stmt ELSE stmt | 3',
|
159
|
+
'.boolean | 4',
|
160
|
+
'boolean => . FALSE | 4',
|
161
|
+
'boolean => . TRUE | 4'
|
162
|
+
]
|
163
|
+
result4 = parse_result.chart[4]
|
164
|
+
expect(result4.entries.size).to eq(5)
|
165
|
+
compare_entry_texts(result4, expected)
|
166
|
+
expected_terminals(result4, %w[FALSE TRUE])
|
167
|
+
|
168
|
+
######################
|
169
|
+
# Expectation chart[5]:
|
170
|
+
expected = [
|
171
|
+
'boolean => TRUE . | 4',
|
172
|
+
'boolean. | 4',
|
173
|
+
'stmt => IF boolean . THEN stmt | 3',
|
174
|
+
'stmt => IF boolean . THEN stmt ELSE stmt | 3'
|
175
|
+
]
|
176
|
+
result5 = parse_result.chart[5]
|
177
|
+
expect(result5.entries.size).to eq(4)
|
178
|
+
compare_entry_texts(result5, expected)
|
179
|
+
expected_terminals(result5, %w[THEN])
|
180
|
+
|
181
|
+
######################
|
182
|
+
# Expectation chart[6]:
|
183
|
+
expected = [
|
184
|
+
'stmt => IF boolean THEN . stmt | 3',
|
185
|
+
'stmt => IF boolean THEN . stmt ELSE stmt | 3',
|
186
|
+
'.stmt | 6',
|
187
|
+
'stmt => . IF boolean THEN stmt | 6',
|
188
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 6',
|
189
|
+
'stmt => . literal | 6',
|
190
|
+
'.literal | 6',
|
191
|
+
'literal => . boolean | 6',
|
192
|
+
'literal => . INTEGER | 6',
|
193
|
+
'.boolean | 6',
|
194
|
+
'boolean => . FALSE | 6',
|
195
|
+
'boolean => . TRUE | 6'
|
196
|
+
]
|
197
|
+
result6 = parse_result.chart[6]
|
198
|
+
expect(result6.entries.size).to eq(12)
|
199
|
+
compare_entry_texts(result6, expected)
|
200
|
+
expected_terminals(result6, %w[FALSE IF INTEGER TRUE])
|
201
|
+
|
202
|
+
######################
|
203
|
+
# Expectation chart[7]:
|
204
|
+
expected = [
|
205
|
+
'literal => INTEGER . | 6',
|
206
|
+
'literal. | 6',
|
207
|
+
'stmt => literal . | 6',
|
208
|
+
'stmt. | 6',
|
209
|
+
'stmt => IF boolean THEN stmt . | 3',
|
210
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 3',
|
211
|
+
'stmt. | 3',
|
212
|
+
'stmt => IF boolean THEN stmt . | 0',
|
213
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 0',
|
214
|
+
'stmt. | 0',
|
215
|
+
'program => stmt . | 0',
|
216
|
+
'program. | 0'
|
217
|
+
]
|
218
|
+
result7 = parse_result.chart[7]
|
219
|
+
expect(result7.entries.size).to eq(12)
|
220
|
+
compare_entry_texts(result7, expected)
|
221
|
+
expected_terminals(result7, %w[ELSE])
|
222
|
+
|
223
|
+
# Expectation chart[8]:
|
224
|
+
expected = [
|
225
|
+
'stmt => IF boolean THEN stmt ELSE . stmt | 3',
|
226
|
+
'stmt => IF boolean THEN stmt ELSE . stmt | 0',
|
227
|
+
'.stmt | 8',
|
228
|
+
'stmt => . IF boolean THEN stmt | 8',
|
229
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 8',
|
230
|
+
'stmt => . literal | 8',
|
231
|
+
'.literal | 8',
|
232
|
+
'literal => . boolean | 8',
|
233
|
+
'literal => . INTEGER | 8',
|
234
|
+
'.boolean | 8',
|
235
|
+
'boolean => . FALSE | 8',
|
236
|
+
'boolean => . TRUE | 8'
|
237
|
+
]
|
238
|
+
result8 = parse_result.chart[8]
|
239
|
+
expect(result8.entries.size).to eq(12)
|
240
|
+
compare_entry_texts(result8, expected)
|
241
|
+
expected_terminals(result8, %w[FALSE IF INTEGER TRUE])
|
242
|
+
|
243
|
+
######################
|
244
|
+
# Expectation chart[9]:
|
245
|
+
expected = [
|
246
|
+
'literal => INTEGER . | 8',
|
247
|
+
'literal. | 8',
|
248
|
+
'stmt => literal . | 8',
|
249
|
+
'stmt. | 8',
|
250
|
+
'stmt => IF boolean THEN stmt ELSE stmt . | 3',
|
251
|
+
'stmt => IF boolean THEN stmt ELSE stmt . | 0',
|
252
|
+
'stmt. | 3',
|
253
|
+
'stmt. | 0',
|
254
|
+
'stmt => IF boolean THEN stmt . | 0',
|
255
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 0',
|
256
|
+
'program => stmt . | 0',
|
257
|
+
'program. | 0'
|
258
|
+
]
|
259
|
+
result9 = parse_result.chart[9]
|
260
|
+
expect(result9.entries.size).to eq(12)
|
261
|
+
compare_entry_texts(result9, expected)
|
262
|
+
expected_terminals(result9, %w[ELSE])
|
263
|
+
|
264
|
+
######################
|
265
|
+
# Expectation chart[10]:
|
266
|
+
result10 = parse_result.chart[10]
|
267
|
+
expect(result10).to be_nil
|
268
|
+
|
269
|
+
# The parse is ambiguous since there more than one dotted item
|
270
|
+
# that matches the stmt. | 0 exit node on chart[9]:
|
271
|
+
# stmt => IF boolean THEN stmt ELSE stmt . | 0'
|
272
|
+
# stmt => IF boolean THEN stmt . | 0'
|
273
|
+
#
|
274
|
+
# This is related to the "dangling else problem"
|
275
|
+
end
|
276
|
+
end # context
|
277
|
+
|
278
|
+
context 'Disambiguated parse: ' do
|
279
|
+
def match_else_with_if(grammar)
|
280
|
+
# Brittle code
|
281
|
+
prod = grammar.rules[2]
|
282
|
+
constraint = Syntax::MatchClosest.new(prod.rhs.members, 4, 'IF')
|
283
|
+
prod.constraints << constraint
|
284
|
+
end
|
285
|
+
|
286
|
+
# Factory method. Creates a grammar builder for a simple grammar.
|
287
|
+
def grammar_if_else
|
288
|
+
builder = Rley::Syntax::BaseGrammarBuilder.new do
|
289
|
+
add_terminals('IF', 'THEN', 'ELSE')
|
290
|
+
add_terminals('FALSE', 'TRUE', 'INTEGER')
|
291
|
+
|
292
|
+
rule 'program' => 'stmt'
|
293
|
+
rule 'stmt' => 'IF boolean THEN stmt'
|
294
|
+
|
295
|
+
# To prevent dangling else issue, the ELSE must match the closest preceding IF
|
296
|
+
# rule 'stmt' => 'IF boolean THEN stmt ELSE{closest IF} stmt'
|
297
|
+
rule 'stmt' => 'IF boolean THEN stmt ELSE stmt'
|
298
|
+
rule 'stmt' => 'literal'
|
299
|
+
rule 'literal' => 'boolean'
|
300
|
+
rule 'literal' => 'INTEGER'
|
301
|
+
rule 'boolean' => 'FALSE'
|
302
|
+
rule 'boolean' => 'TRUE'
|
303
|
+
end
|
304
|
+
|
305
|
+
grm = builder.grammar
|
306
|
+
match_else_with_if(grm)
|
307
|
+
|
308
|
+
grm
|
309
|
+
end
|
310
|
+
|
311
|
+
subject { GFGEarleyParser.new(grammar_if_else) }
|
312
|
+
|
313
|
+
it 'should cope with dangling else problem' do
|
314
|
+
tokens = tokenizer(input)
|
315
|
+
parse_result = subject.parse(tokens)
|
316
|
+
expect(parse_result.success?).to eq(true)
|
317
|
+
expect(parse_result.ambiguous?).to eq(true)
|
318
|
+
######################
|
319
|
+
# Expectation chart[0]:
|
320
|
+
expected = [
|
321
|
+
'.program | 0', # initialization
|
322
|
+
'program => . stmt | 0', # start rule
|
323
|
+
'.stmt | 0', # call rule
|
324
|
+
'stmt => . IF boolean THEN stmt | 0', # start rule
|
325
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 0', # start rule
|
326
|
+
'stmt => . literal | 0', # start rule
|
327
|
+
'.literal | 0', # call rule
|
328
|
+
'literal => . boolean | 0', # start rule
|
329
|
+
'literal => . INTEGER | 0', # start rule
|
330
|
+
'.boolean | 0', # call rule
|
331
|
+
'boolean => . FALSE | 0', # start rule
|
332
|
+
'boolean => . TRUE | 0' # start rule
|
333
|
+
]
|
334
|
+
compare_entry_texts(parse_result.chart[0], expected)
|
335
|
+
expected_terminals(parse_result.chart[0], %w[FALSE IF INTEGER TRUE])
|
336
|
+
|
337
|
+
# The parser should work as the previous version...
|
338
|
+
# we skip chart[2] and chart[3]
|
339
|
+
######################
|
340
|
+
# Expectation chart[4]:
|
341
|
+
expected = [
|
342
|
+
'stmt => IF . boolean THEN stmt | 3',
|
343
|
+
'stmt => IF . boolean THEN stmt ELSE stmt | 3',
|
344
|
+
'.boolean | 4',
|
345
|
+
'boolean => . FALSE | 4',
|
346
|
+
'boolean => . TRUE | 4'
|
347
|
+
]
|
348
|
+
result4 = parse_result.chart[4]
|
349
|
+
expect(result4.entries.size).to eq(5)
|
350
|
+
compare_entry_texts(result4, expected)
|
351
|
+
expected_terminals(result4, %w[FALSE TRUE])
|
352
|
+
|
353
|
+
######################
|
354
|
+
# Before reading ELSE
|
355
|
+
# Expectation chart[7]:
|
356
|
+
expected = [
|
357
|
+
'literal => INTEGER . | 6',
|
358
|
+
'literal. | 6',
|
359
|
+
'stmt => literal . | 6',
|
360
|
+
'stmt. | 6',
|
361
|
+
'stmt => IF boolean THEN stmt . | 3',
|
362
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 3',
|
363
|
+
'stmt. | 3',
|
364
|
+
'stmt => IF boolean THEN stmt . | 0',
|
365
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 0',
|
366
|
+
'stmt. | 0',
|
367
|
+
'program => stmt . | 0',
|
368
|
+
'program. | 0'
|
369
|
+
]
|
370
|
+
result7 = parse_result.chart[7]
|
371
|
+
expect(result7.entries.size).to eq(12)
|
372
|
+
compare_entry_texts(result7, expected)
|
373
|
+
expected_terminals(result7, %w[ELSE])
|
374
|
+
|
375
|
+
######################
|
376
|
+
# After reading ELSE
|
377
|
+
# Expectation chart[8]:
|
378
|
+
expected = [
|
379
|
+
'stmt => IF boolean THEN stmt ELSE . stmt | 3',
|
380
|
+
# 'stmt => IF boolean THEN stmt ELSE . stmt | 0', # Excluded
|
381
|
+
'.stmt | 8',
|
382
|
+
'stmt => . IF boolean THEN stmt | 8',
|
383
|
+
'stmt => . IF boolean THEN stmt ELSE stmt | 8',
|
384
|
+
'stmt => . literal | 8',
|
385
|
+
'.literal | 8',
|
386
|
+
'literal => . boolean | 8',
|
387
|
+
'literal => . INTEGER | 8',
|
388
|
+
'.boolean | 8',
|
389
|
+
'boolean => . FALSE | 8',
|
390
|
+
'boolean => . TRUE | 8'
|
391
|
+
]
|
392
|
+
result8 = parse_result.chart[8]
|
393
|
+
found = parse_result.chart.search_entries(4, {before: 'IF'})
|
394
|
+
expect(result8.entries.size).to eq(11)
|
395
|
+
compare_entry_texts(result8, expected)
|
396
|
+
expected_terminals(result8, %w[FALSE IF INTEGER TRUE])
|
397
|
+
|
398
|
+
# How does it work?
|
399
|
+
# ELSE was just read at position 7
|
400
|
+
# We look backwards to nearest IF; there is one at position 3
|
401
|
+
# In chart[8], we should exclude the dotted item:
|
402
|
+
# 'stmt => IF boolean THEN stmt ELSE . stmt | 0'
|
403
|
+
# Reasoning?
|
404
|
+
# On chart[4], we find two entries for the IF .:
|
405
|
+
# 'stmt => IF . boolean THEN stmt | 3',
|
406
|
+
# 'stmt => IF . boolean THEN stmt ELSE stmt | 3'
|
407
|
+
# Only these productions that still applies at 8 must be retained
|
408
|
+
# 'stmt => IF boolean THEN stmt ELSE . stmt | 3',
|
409
|
+
# 'stmt => IF boolean THEN stmt ELSE . stmt | 0', # To exclude
|
410
|
+
# Where to place the check?
|
411
|
+
# At the dotted item?
|
412
|
+
# call, return scan nodes
|
413
|
+
# So if one has an annotated production rule:
|
414
|
+
# stmt => IF boolean THEN stmt ELSE{ closest: IF } stmt
|
415
|
+
# then the dotted item:
|
416
|
+
# stmt => IF boolean THEN stmt ELSE . stmt
|
417
|
+
# should bear the constraint
|
418
|
+
|
419
|
+
######################
|
420
|
+
# Expectation chart[9]:
|
421
|
+
expected = [
|
422
|
+
'literal => INTEGER . | 8',
|
423
|
+
'literal. | 8',
|
424
|
+
'stmt => literal . | 8',
|
425
|
+
'stmt. | 8',
|
426
|
+
'stmt => IF boolean THEN stmt ELSE stmt . | 3',
|
427
|
+
# 'stmt => IF boolean THEN stmt ELSE stmt . | 0', # Excluded
|
428
|
+
'stmt. | 3',
|
429
|
+
'stmt => IF boolean THEN stmt . | 0',
|
430
|
+
'stmt => IF boolean THEN stmt . ELSE stmt | 0',
|
431
|
+
'stmt. | 0',
|
432
|
+
'program => stmt . | 0',
|
433
|
+
'program. | 0'
|
434
|
+
]
|
435
|
+
result9 = parse_result.chart[9]
|
436
|
+
expect(result9.entries.size).to eq(11)
|
437
|
+
compare_entry_texts(result9, expected)
|
438
|
+
expected_terminals(result9, ['ELSE'])
|
439
|
+
end
|
440
|
+
end # context
|
441
|
+
end # describe
|
442
|
+
end # module
|
443
|
+
end # module
|
444
|
+
|
445
|
+
|