rley 0.2.15 → 0.3.00
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/gfg/call_edge.rb +30 -0
- data/lib/rley/gfg/edge.rb +4 -0
- data/lib/rley/gfg/end_vertex.rb +1 -1
- data/lib/rley/gfg/epsilon_edge.rb +0 -4
- data/lib/rley/gfg/grm_flow_graph.rb +32 -7
- data/lib/rley/gfg/item_vertex.rb +71 -25
- data/lib/rley/gfg/non_terminal_vertex.rb +10 -1
- data/lib/rley/gfg/return_edge.rb +31 -0
- data/lib/rley/gfg/scan_edge.rb +2 -1
- data/lib/rley/gfg/shortcut_edge.rb +26 -0
- data/lib/rley/gfg/start_vertex.rb +2 -2
- data/lib/rley/gfg/vertex.rb +27 -1
- data/lib/rley/parse_forest_visitor.rb +115 -0
- data/lib/rley/parser/base_parser.rb +27 -0
- data/lib/rley/parser/dotted_item.rb +11 -0
- data/lib/rley/parser/earley_parser.rb +3 -15
- data/lib/rley/parser/gfg_chart.rb +106 -0
- data/lib/rley/parser/gfg_earley_parser.rb +139 -0
- data/lib/rley/parser/gfg_parsing.rb +384 -0
- data/lib/rley/parser/parse_entry.rb +148 -0
- data/lib/rley/parser/parse_entry_set.rb +104 -0
- data/lib/rley/parser/parse_entry_tracker.rb +56 -0
- data/lib/rley/parser/parse_forest_builder.rb +229 -0
- data/lib/rley/parser/parse_forest_factory.rb +54 -0
- data/lib/rley/parser/parse_walker_factory.rb +237 -0
- data/lib/rley/ptree/token_range.rb +14 -1
- data/lib/rley/sppf/alternative_node.rb +34 -0
- data/lib/rley/sppf/composite_node.rb +27 -0
- data/lib/rley/sppf/epsilon_node.rb +27 -0
- data/lib/rley/sppf/leaf_node.rb +12 -0
- data/lib/rley/sppf/non_terminal_node.rb +38 -0
- data/lib/rley/sppf/parse_forest.rb +48 -0
- data/lib/rley/sppf/sppf_node.rb +24 -0
- data/lib/rley/sppf/token_node.rb +29 -0
- data/lib/rley/syntax/grammar_builder.rb +16 -12
- data/lib/rley/syntax/grm_symbol.rb +6 -0
- data/lib/rley/syntax/terminal.rb +5 -0
- data/spec/rley/gfg/call_edge_spec.rb +51 -0
- data/spec/rley/gfg/end_vertex_spec.rb +1 -0
- data/spec/rley/gfg/grm_flow_graph_spec.rb +24 -2
- data/spec/rley/gfg/item_vertex_spec.rb +75 -6
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +14 -0
- data/spec/rley/gfg/return_edge_spec.rb +51 -0
- data/spec/rley/gfg/shortcut_edge_spec.rb +43 -0
- data/spec/rley/gfg/vertex_spec.rb +52 -37
- data/spec/rley/parse_forest_visitor_spec.rb +238 -0
- data/spec/rley/parser/dotted_item_spec.rb +29 -8
- data/spec/rley/parser/gfg_chart_spec.rb +138 -0
- data/spec/rley/parser/gfg_earley_parser_spec.rb +918 -0
- data/spec/rley/parser/gfg_parsing_spec.rb +565 -0
- data/spec/rley/parser/parse_entry_set_spec.rb +179 -0
- data/spec/rley/parser/parse_entry_spec.rb +208 -0
- data/spec/rley/parser/parse_forest_builder_spec.rb +382 -0
- data/spec/rley/parser/parse_forest_factory_spec.rb +81 -0
- data/spec/rley/parser/parse_walker_factory_spec.rb +235 -0
- data/spec/rley/parser/state_set_spec.rb +4 -0
- data/spec/rley/sppf/alternative_node_spec.rb +72 -0
- data/spec/rley/sppf/antecedence_graph.rb +87 -0
- data/spec/rley/sppf/forest_representation.rb +136 -0
- data/spec/rley/sppf/gfg_representation.rb +111 -0
- data/spec/rley/sppf/non_terminal_node_spec.rb +64 -0
- data/spec/rley/support/ambiguous_grammar_helper.rb +36 -36
- data/spec/rley/support/expectation_helper.rb +36 -0
- data/spec/rley/support/grammar_helper.rb +28 -0
- data/spec/rley/support/grammar_sppf_helper.rb +25 -0
- data/spec/rley/syntax/grammar_builder_spec.rb +5 -0
- data/spec/rley/syntax/non_terminal_spec.rb +4 -0
- data/spec/rley/syntax/terminal_spec.rb +4 -0
- metadata +58 -2
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative '../syntax/grammar'
|
2
|
+
require_relative 'grm_items_builder' # Use mix-in module
|
3
|
+
require_relative 'parse_tracer'
|
4
|
+
require_relative 'parsing'
|
5
|
+
|
6
|
+
module Rley # This module is used as a namespace
|
7
|
+
module Parser # This module is used as a namespace
|
8
|
+
# Abstract class for Earley parser.
|
9
|
+
class BaseParser
|
10
|
+
include GrmItemsBuilder # Mix-in module for created dotted items of given grammar
|
11
|
+
|
12
|
+
# The grammar of the language.
|
13
|
+
attr_reader(:grammar)
|
14
|
+
|
15
|
+
# The dotted items/rules for the productions of the grammar
|
16
|
+
attr_reader(:dotted_items)
|
17
|
+
|
18
|
+
|
19
|
+
def initialize(aGrammar)
|
20
|
+
@grammar = aGrammar
|
21
|
+
@dotted_items = build_dotted_items(grammar) # Method from mixin
|
22
|
+
end
|
23
|
+
end # class
|
24
|
+
end # module
|
25
|
+
end # module
|
26
|
+
|
27
|
+
# End of file
|
@@ -100,7 +100,18 @@ module Rley # This module is used as a namespace
|
|
100
100
|
|
101
101
|
# An item with the dot in front of a terminal is called a shift item
|
102
102
|
def shift_item?()
|
103
|
+
return position == 0
|
103
104
|
end
|
105
|
+
|
106
|
+
# Return true if this dotted item has a dot one place
|
107
|
+
# to the right compared to the dotted item argument.
|
108
|
+
def successor_of?(another)
|
109
|
+
return false if production != another.production
|
110
|
+
to_the_left = prev_position
|
111
|
+
return false if to_the_left.nil?
|
112
|
+
return to_the_left == another.position
|
113
|
+
end
|
114
|
+
|
104
115
|
|
105
116
|
private
|
106
117
|
|
@@ -1,20 +1,9 @@
|
|
1
|
-
require_relative '
|
2
|
-
require_relative 'grm_items_builder' # Use mix-in module
|
3
|
-
require_relative 'parse_tracer'
|
4
|
-
require_relative 'parsing'
|
1
|
+
require_relative 'base_parser'
|
5
2
|
|
6
3
|
module Rley # This module is used as a namespace
|
7
4
|
module Parser # This module is used as a namespace
|
8
5
|
# Implementation of a parser that uses the Earley parsing algorithm.
|
9
|
-
class EarleyParser
|
10
|
-
include GrmItemsBuilder # Mix-in module for created dotted items of given grammar
|
11
|
-
|
12
|
-
# The grammar of the language.
|
13
|
-
attr_reader(:grammar)
|
14
|
-
|
15
|
-
# The dotted items/rules for the productions of the grammar
|
16
|
-
attr_reader(:dotted_items)
|
17
|
-
|
6
|
+
class EarleyParser < BaseParser
|
18
7
|
# A Hash that defines the mapping: non-terminal => [start dotted items]
|
19
8
|
attr_reader(:start_mapping)
|
20
9
|
|
@@ -24,8 +13,7 @@ module Rley # This module is used as a namespace
|
|
24
13
|
attr_reader(:next_mapping)
|
25
14
|
|
26
15
|
def initialize(aGrammar)
|
27
|
-
|
28
|
-
@dotted_items = build_dotted_items(grammar) # Method from mixin
|
16
|
+
super(aGrammar)
|
29
17
|
@start_mapping = build_start_mapping(dotted_items)
|
30
18
|
@next_mapping = build_next_mapping(dotted_items)
|
31
19
|
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require_relative 'parse_entry'
|
2
|
+
require_relative 'parse_entry_set'
|
3
|
+
|
4
|
+
|
5
|
+
module Rley # This module is used as a namespace
|
6
|
+
module Parser # This module is used as a namespace
|
7
|
+
# Also called a parse table.
|
8
|
+
# It is a Grammar Flow Graph implementation.
|
9
|
+
# Assuming that n == number of input tokens,
|
10
|
+
# the chart is an array with n + 1 entry sets.
|
11
|
+
class GFGChart
|
12
|
+
# An array of entry sets (one per input token + 1)
|
13
|
+
attr_reader(:sets)
|
14
|
+
|
15
|
+
# The level of trace details reported on stdout during the parse.
|
16
|
+
# The possible values are:
|
17
|
+
# 0: No trace output (default case)
|
18
|
+
# 1: Show trace of scanning and completion rules
|
19
|
+
# 2: Same as of 1 with the addition of the prediction rules
|
20
|
+
attr_reader(:tracer)
|
21
|
+
|
22
|
+
# @param tokenCount [Fixnum] The number of lexemes in the input to parse.
|
23
|
+
# @param aTracer [ParseTracer] A tracer object.
|
24
|
+
def initialize(tokenCount, aGFGraph, aTracer)
|
25
|
+
@tracer = aTracer
|
26
|
+
@sets = Array.new(tokenCount + 1) { |_| ParseEntrySet.new }
|
27
|
+
push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
|
28
|
+
end
|
29
|
+
|
30
|
+
# The dotted item/rule used to seed the parse chart.
|
31
|
+
# It corresponds to the start production and a dot placed
|
32
|
+
# at the beginning of the rhs
|
33
|
+
# def start_dotted_rule()
|
34
|
+
# return self[0].entries.first.dotted_rule
|
35
|
+
# end
|
36
|
+
|
37
|
+
# Return the start (non-terminal) symbol of the grammar.
|
38
|
+
def start_symbol()
|
39
|
+
return sets.first.entries[0].vertex.non_terminal
|
40
|
+
end
|
41
|
+
|
42
|
+
# Access the entry set at given position
|
43
|
+
def [](index)
|
44
|
+
return sets[index]
|
45
|
+
end
|
46
|
+
|
47
|
+
# Return the index value of the last non-empty entry set.
|
48
|
+
def last_index()
|
49
|
+
first_empty = sets.find_index(&:empty?)
|
50
|
+
if first_empty.nil?
|
51
|
+
index = sets.size - 1
|
52
|
+
else
|
53
|
+
index = (first_empty == 0) ? 0 : first_empty - 1
|
54
|
+
end
|
55
|
+
|
56
|
+
return index
|
57
|
+
end
|
58
|
+
|
59
|
+
# Push a parse entry for the chart entry with given index
|
60
|
+
def push_entry(aVertex, anOrigin, anIndex, aReason)
|
61
|
+
new_entry = ParseEntry.new(aVertex, anOrigin)
|
62
|
+
pushed = self[anIndex].push_entry(new_entry)
|
63
|
+
if pushed == new_entry && tracer.level > 0
|
64
|
+
case aReason
|
65
|
+
when :start_rule, :prediction
|
66
|
+
tracer.trace_prediction(anIndex, new_entry)
|
67
|
+
|
68
|
+
when :scanning
|
69
|
+
tracer.trace_scanning(anIndex, new_entry)
|
70
|
+
|
71
|
+
when :completion
|
72
|
+
tracer.trace_completion(anIndex, new_entry)
|
73
|
+
else
|
74
|
+
fail NotImplementedError, "Unknown push_entry mode #{aReason}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
return pushed
|
79
|
+
end
|
80
|
+
|
81
|
+
# Retrieve the first parse entry added to this chart
|
82
|
+
def initial_entry()
|
83
|
+
return sets[0].first
|
84
|
+
end
|
85
|
+
|
86
|
+
# Retrieve the entry that corresponds to a complete and successful parse
|
87
|
+
def accepting_entry()
|
88
|
+
# Success can be detected as follows:
|
89
|
+
# The last chart entry set has at least one complete parse entry
|
90
|
+
# for the start symbol with an origin == 0
|
91
|
+
|
92
|
+
# Retrieve all the end entries (i.e. of the form
|
93
|
+
last_entries = sets[last_index].entries.select(&:end_entry?)
|
94
|
+
|
95
|
+
# ... now find the end vertex for start symbol and with origin at zero...
|
96
|
+
success_entries = last_entries.select do |entry|
|
97
|
+
entry.origin == 0 && entry.vertex.non_terminal == start_symbol
|
98
|
+
end
|
99
|
+
|
100
|
+
return success_entries.first
|
101
|
+
end
|
102
|
+
end # class
|
103
|
+
end # module
|
104
|
+
end # module
|
105
|
+
|
106
|
+
# End of file
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require_relative 'base_parser'
|
2
|
+
require_relative '../gfg/grm_flow_graph'
|
3
|
+
require_relative 'gfg_parsing'
|
4
|
+
|
5
|
+
module Rley # This module is used as a namespace
|
6
|
+
module Parser # This module is used as a namespace
|
7
|
+
# Implementation of a parser that uses the Earley parsing algorithm.
|
8
|
+
class GFGEarleyParser < BaseParser
|
9
|
+
|
10
|
+
# The Grammar Flow graph for the given grammar
|
11
|
+
attr_reader :gf_graph
|
12
|
+
|
13
|
+
def initialize(aGrammar)
|
14
|
+
super(aGrammar)
|
15
|
+
@gf_graph = GFG::GrmFlowGraph.new(dotted_items)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Parse a sequence of input tokens.
|
19
|
+
# @param aTokenSequence [Array] Array of Tokens objects returned by a
|
20
|
+
# tokenizer/scanner/lexer.
|
21
|
+
# @param aTraceLevel [Fixnum] The specified trace level.
|
22
|
+
# The possible values are:
|
23
|
+
# 0: No trace output (default case)
|
24
|
+
# 1: Show trace of scanning and completion rules
|
25
|
+
# 2: Same as of 1 with the addition of the prediction rules
|
26
|
+
# @return [Parsing] an object that embeds the parse results.
|
27
|
+
def parse(aTokenSequence, aTraceLevel = 0)
|
28
|
+
tracer = ParseTracer.new(aTraceLevel, $stdout, aTokenSequence)
|
29
|
+
result = GFGParsing.new(gf_graph, aTokenSequence, tracer)
|
30
|
+
last_token_index = aTokenSequence.size
|
31
|
+
(0..last_token_index).each do |i|
|
32
|
+
handle_error(result) if result.chart[i].empty?
|
33
|
+
result.chart[i].each do |entry|
|
34
|
+
# Is entry of the form? [A => alpha . B beta, k]...
|
35
|
+
next_symbol = entry.next_symbol
|
36
|
+
if next_symbol && next_symbol.kind_of?(Syntax::NonTerminal)
|
37
|
+
# ...apply the Call rule
|
38
|
+
call_rule(result, entry, i, tracer)
|
39
|
+
end
|
40
|
+
|
41
|
+
exit_rule(result, entry, i, tracer) if entry.exit_entry?
|
42
|
+
start_rule(result, entry, i, tracer) if entry.start_entry?
|
43
|
+
end_rule(result, entry, i, tracer) if entry.end_entry?
|
44
|
+
end
|
45
|
+
scan_rule(result, i, tracer) if i < last_token_index
|
46
|
+
end
|
47
|
+
|
48
|
+
return result
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
# Let the current sigma set be the ith parse entry set.
|
53
|
+
# This method is invoked when an entry is added to the parse entry set
|
54
|
+
# and is of the form [A => alpha . B beta, k].
|
55
|
+
# Then the entry [.B, i] is added to the current sigma set.
|
56
|
+
# Gist: when an entry expects the non-terminal symbol B, then
|
57
|
+
# add an entry with start vertex .B
|
58
|
+
def call_rule(aParsing, anEntry, aPosition, aTracer)
|
59
|
+
if aTracer.level > 1
|
60
|
+
puts "Chart[#{aPosition}] Call rule applied upon #{anEntry}:"
|
61
|
+
end
|
62
|
+
aParsing.call_rule(anEntry, aPosition)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Let the current sigma set be the ith parse entry set.
|
66
|
+
# This method is invoked when an entry is added to a parse entry set
|
67
|
+
# and the entry is of the form [.B, i].
|
68
|
+
# then for every rule B => γ in the grammar an entry [B => . γ, i]
|
69
|
+
# is added to the current sigma set.
|
70
|
+
# Gist: for an entry corresponding to a start vertex, add an entry
|
71
|
+
# for each entry edge in the graph.
|
72
|
+
def start_rule(aParsing, anEntry, aPosition, aTracer)
|
73
|
+
if aTracer.level > 1
|
74
|
+
puts "Chart[#{aPosition}] Start rule applied upon #{anEntry}:"
|
75
|
+
end
|
76
|
+
aParsing.start_rule(anEntry, aPosition)
|
77
|
+
end
|
78
|
+
|
79
|
+
# This method must be invoked when an entry is added to a parse entry set
|
80
|
+
# and is of the form [B => γ ., k] (the dot is at the end of the production.
|
81
|
+
# Then entry [B., k] is added to the current entry set.
|
82
|
+
# Gist: for an entry corresponding to a reduced production, add an entry
|
83
|
+
# for each exit edge in the graph.
|
84
|
+
def exit_rule(aParsing, anEntry, aPosition, aTracer)
|
85
|
+
if aTracer.level > 1
|
86
|
+
puts "Chart[#{aPosition}] Exit rule applied upon #{anEntry}:"
|
87
|
+
end
|
88
|
+
aParsing.exit_rule(anEntry, aPosition)
|
89
|
+
end
|
90
|
+
|
91
|
+
# This method is invoked when an entry of the form [B., k]
|
92
|
+
# is added to a parse entry set with index j.
|
93
|
+
# then for every entry of the form [A => α . B γ, i] in the kth sigma set
|
94
|
+
# the entry [A => α B . γ, i] is added to the jth sigma set.
|
95
|
+
def end_rule(aParsing, anEntry, aPosition, aTracer)
|
96
|
+
if aTracer.level > 1
|
97
|
+
puts "Chart[#{aPosition}] End rule applied upon #{anEntry}:"
|
98
|
+
end
|
99
|
+
aParsing.end_rule(anEntry, aPosition)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Given that the terminal t is at the specified position,
|
103
|
+
# Locate all entries in the current sigma set that expect t: [A => α . t γ, i]
|
104
|
+
# and allow them to cross the edge, adding the node on the back side
|
105
|
+
# of the edge as an entry to the next sigma set:
|
106
|
+
# add an entry to the next sigma set [A => α t . γ, i]
|
107
|
+
def scan_rule(aParsing, aPosition, aTracer)
|
108
|
+
if aTracer.level > 1
|
109
|
+
puts "Chart[#{aPosition}] Scan rule applied upon #{aParsing.tokens[aPosition]}:"
|
110
|
+
end
|
111
|
+
aParsing.scan_rule(aPosition)
|
112
|
+
end
|
113
|
+
|
114
|
+
# Raise an exception to indicate a syntax error.
|
115
|
+
def handle_error(aParsing)
|
116
|
+
# Retrieve the first empty state set
|
117
|
+
pos = aParsing.chart.sets.find_index(&:empty?)
|
118
|
+
lexeme_at_pos = aParsing.tokens[pos - 1].lexeme
|
119
|
+
puts "chart index: #{pos - 1}"
|
120
|
+
terminals = aParsing.chart.sets[pos - 1].expected_terminals
|
121
|
+
puts "count expected terminals #{terminals.size}"
|
122
|
+
entries = aParsing.chart.sets[pos - 1].entries.map(&:to_s).join("\n")
|
123
|
+
puts "Items #{entries}"
|
124
|
+
term_names = terminals.map(&:name)
|
125
|
+
err_msg = "Syntax error at or near token #{pos}"
|
126
|
+
err_msg << ">>>#{lexeme_at_pos}<<<:\nExpected "
|
127
|
+
if terminals.size > 1
|
128
|
+
err_msg << "one of: ['#{term_names.join("', '")}'],"
|
129
|
+
else
|
130
|
+
err_msg << ": #{term_names[0]},"
|
131
|
+
end
|
132
|
+
err_msg << " found a '#{aParsing.tokens[pos - 1].terminal.name}'"
|
133
|
+
fail StandardError, err_msg + ' instead.'
|
134
|
+
end
|
135
|
+
end # class
|
136
|
+
end # module
|
137
|
+
end # module
|
138
|
+
|
139
|
+
# End of file
|
@@ -0,0 +1,384 @@
|
|
1
|
+
require_relative 'gfg_chart'
|
2
|
+
require_relative 'parse_entry_tracker'
|
3
|
+
require_relative 'parse_forest_builder'
|
4
|
+
|
5
|
+
|
6
|
+
module Rley # This module is used as a namespace
|
7
|
+
module Parser # This module is used as a namespace
|
8
|
+
class GFGParsing
|
9
|
+
# The link to the grammar flow graph
|
10
|
+
attr_reader(:gf_graph)
|
11
|
+
|
12
|
+
# The link to the chart object
|
13
|
+
attr_reader(:chart)
|
14
|
+
|
15
|
+
# The sequence of input token to parse
|
16
|
+
attr_reader(:tokens)
|
17
|
+
|
18
|
+
# A Hash with pairs of the form: parse entry => [ antecedent parse entries ]
|
19
|
+
# It associates to a every parse entry its antecedent(s), that is, the parse entry/ies
|
20
|
+
# that causes the key parse entry to be created with one the gfg rules
|
21
|
+
attr_reader(:antecedence)
|
22
|
+
|
23
|
+
# @param aTracer [ParseTracer] An object that traces the parsing.
|
24
|
+
# The possible values are:
|
25
|
+
# 0: No trace output (default case)
|
26
|
+
# 1: Show trace of scanning and completion rules
|
27
|
+
# 2: Same as of 1 with the addition of the prediction rules
|
28
|
+
def initialize(theGFG, theTokens, aTracer)
|
29
|
+
@gf_graph = theGFG
|
30
|
+
@tokens = theTokens.dup
|
31
|
+
@chart = GFGChart.new(tokens.size, gf_graph, aTracer)
|
32
|
+
@antecedence = Hash.new { |hash, key| hash[key] = [] }
|
33
|
+
antecedence[chart[0].first]
|
34
|
+
end
|
35
|
+
|
36
|
+
# Let the current sigma set be the ith parse entry set.
|
37
|
+
# This method is invoked when an entry is added to the parse entry set
|
38
|
+
# and is of the form [A => alpha . B beta, k].
|
39
|
+
# Then the entry [.B, i] is added to the current sigma set.
|
40
|
+
# Gist: when an entry expects the non-terminal symbol B, then
|
41
|
+
# add an entry with start vertex .B
|
42
|
+
def call_rule(anEntry, aPosition)
|
43
|
+
next_symbol = anEntry.next_symbol
|
44
|
+
start_vertex = gf_graph.start_vertex_for[next_symbol]
|
45
|
+
apply_rule(anEntry, start_vertex, aPosition, aPosition, :call_rule)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Let the current sigma set be the ith parse entry set.
|
49
|
+
# This method is invoked when an entry is added to a parse entry set
|
50
|
+
# and the entry is of the form [.B, i].
|
51
|
+
# then for every rule B => γ in the grammar an entry [B => . γ, i]
|
52
|
+
# is added to the current sigma set.
|
53
|
+
# Gist: for an entry corresponding to a start vertex, add an entry
|
54
|
+
# for each entry edge in the graph.
|
55
|
+
def start_rule(anEntry, aPosition)
|
56
|
+
return unless anEntry.origin == aPosition
|
57
|
+
|
58
|
+
anEntry.vertex.edges.each do |a_start_edge|
|
59
|
+
successor = a_start_edge.successor
|
60
|
+
apply_rule(anEntry, successor, aPosition, aPosition, :start_rule)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# This method must be invoked when an entry is added to a parse entry set
|
65
|
+
# and is of the form [B => γ ., k] (the dot is at the end of the production.
|
66
|
+
# Then entry [B., k] is added to the current entry set.
|
67
|
+
# Gist: for an entry corresponding to a reduced production, add an entry
|
68
|
+
# for each exit edge in the graph.
|
69
|
+
def exit_rule(anEntry, aPosition)
|
70
|
+
lhs = anEntry.vertex.lhs
|
71
|
+
end_vertex = gf_graph.end_vertex_for[lhs]
|
72
|
+
apply_rule(anEntry, end_vertex, anEntry.origin, aPosition, :exit_rule)
|
73
|
+
end
|
74
|
+
|
75
|
+
# This method is invoked when an entry of the form [B., k]
|
76
|
+
# is added to a parse entry set with index j.
|
77
|
+
# then for every entry of the form [A => α . B γ, i] in the kth sigma set
|
78
|
+
# the entry [A => α B . γ, i] is added to the jth sigma set.
|
79
|
+
def end_rule(anEntry, aPosition)
|
80
|
+
nterm_k = anEntry.vertex.non_terminal
|
81
|
+
origin_k = anEntry.origin
|
82
|
+
set_k = chart[origin_k]
|
83
|
+
|
84
|
+
# Retrieve all the entries that expect the non-terminal
|
85
|
+
expecting_nterm_k = set_k.entries4n_term(nterm_k)
|
86
|
+
expecting_nterm_k.each do |ntry|
|
87
|
+
# Get the vertices after the expected non-terminal
|
88
|
+
vertex_after_terminal = ntry.vertex.shortcut.successor
|
89
|
+
apply_rule(anEntry, vertex_after_terminal, ntry.origin, aPosition, :end_rule)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Given that the terminal t is at the specified position,
|
94
|
+
# Locate all entries in the current sigma set that expect t: [A => α . t γ, i]
|
95
|
+
# and allow them to cross the edge, adding the node on the back side
|
96
|
+
# of the edge as an entry to the next sigma set:
|
97
|
+
# add an entry to the next sigma set [A => α t . γ, i]
|
98
|
+
def scan_rule(aPosition)
|
99
|
+
terminal = tokens[aPosition].terminal
|
100
|
+
|
101
|
+
# Retrieve all the entries that expect the given terminal
|
102
|
+
expecting_term = chart[aPosition].entries4term(terminal)
|
103
|
+
|
104
|
+
# ... if the terminal isn't expected then we have an error
|
105
|
+
handle_error(aPosition) if expecting_term.empty?
|
106
|
+
|
107
|
+
expecting_term.each do |ntry|
|
108
|
+
# Get the vertices after the expected terminal
|
109
|
+
ntry.vertex.edges.each do |an_edge|
|
110
|
+
vertex_after_terminal = an_edge.successor
|
111
|
+
apply_rule(ntry, vertex_after_terminal, ntry.origin, aPosition + 1, :scan_rule)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
# Return true if the parse was successful (= input tokens
|
118
|
+
# followed the syntax specified by the grammar)
|
119
|
+
def success?()
|
120
|
+
return chart.accepting_entry() ? true : false
|
121
|
+
end
|
122
|
+
|
123
|
+
# Return true if there are more than one complete state
|
124
|
+
# for the same lhs and same origin in any state set.
|
125
|
+
def ambiguous?()
|
126
|
+
found = chart.sets.find { |set| !set.ambiguities.empty? }
|
127
|
+
return ! found.nil?
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
=begin
|
132
|
+
# Factory method. Builds a ParseForest from the parse result.
|
133
|
+
# @return [ParseForest]
|
134
|
+
# Algorithm:
|
135
|
+
# set state_set_index = index of last entry set in chart
|
136
|
+
# Search the completed parse state that corresponds to the full parse
|
137
|
+
def parse_forest()
|
138
|
+
state_tracker = new_state_tracker
|
139
|
+
builder = forest_builder(state_tracker.state_set_index)
|
140
|
+
|
141
|
+
loop do
|
142
|
+
state_tracker.symbol_on_left
|
143
|
+
# match_symbol = state_tracker.symbol_on_left
|
144
|
+
# puts '--------------------'
|
145
|
+
# puts "Active parse state: #{state_tracker.parse_state}"
|
146
|
+
# puts "Matching symbol: #{match_symbol}"
|
147
|
+
# puts 'Parse tree:'
|
148
|
+
# puts builder.root.to_string(0)
|
149
|
+
|
150
|
+
# Place the symbol on left of the dot in the parse tree
|
151
|
+
done = insert_matched_symbol(state_tracker, builder)
|
152
|
+
break if done
|
153
|
+
end
|
154
|
+
|
155
|
+
return builder.parse_forest
|
156
|
+
end
|
157
|
+
=end
|
158
|
+
|
159
|
+
# Retrieve the very first parse entry added to the chart.
|
160
|
+
# This entry corresponds to the start vertex of the GF graph
|
161
|
+
# with origin equal to zero.
|
162
|
+
def initial_entry()
|
163
|
+
return chart.initial_entry
|
164
|
+
end
|
165
|
+
|
166
|
+
# Retrieve the accepting parse entry that represents
|
167
|
+
# a complete, successful parse
|
168
|
+
# After a successful parse, the last chart entry set
|
169
|
+
# has an end parse entry that involves the start symbol
|
170
|
+
def accepting_entry()
|
171
|
+
return chart.accepting_entry
|
172
|
+
end
|
173
|
+
|
174
|
+
=begin
|
175
|
+
|
176
|
+
|
177
|
+
|
178
|
+
# This method is called when a parse entry for chart entry at position
|
179
|
+
# 'pos' expects a terminal as next symbol.
|
180
|
+
# If the input token matches the terminal symbol then:
|
181
|
+
# Retrieve all parse entrys for chart entry at 'aPosition'
|
182
|
+
# that have the given terminal as next symbol.
|
183
|
+
# For each s of the above entrys, push to chart entry aPosition + 1
|
184
|
+
# a new entry like: <next dotted rule, s.origin, aPosition + 1>
|
185
|
+
# In other words, we place the dotted rules in the next entry set
|
186
|
+
# such that the dot appears after terminal.
|
187
|
+
# @param aTerminal [Terminal] a terminal symbol that
|
188
|
+
# immediately follows a dot
|
189
|
+
# @param aPosition [Fixnum] position in the input token sequence.
|
190
|
+
# @param nextMapping [Proc or Lambda] code to evaluate in order to
|
191
|
+
# determine the "next" dotted rule for a given one.
|
192
|
+
def scanning(aTerminal, aPosition, &nextMapping)
|
193
|
+
curr_token = tokens[aPosition]
|
194
|
+
return unless curr_token.terminal == aTerminal
|
195
|
+
|
196
|
+
entrys = entrys_expecting(aTerminal, aPosition, false)
|
197
|
+
entrys.each do |s|
|
198
|
+
next_item = nextMapping.call(s.dotted_rule)
|
199
|
+
push_entry(next_item, s.origin, aPosition + 1, :scanning)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
|
205
|
+
# This method is called when a parse entry at chart entry reaches the end
|
206
|
+
# of a production.
|
207
|
+
# For every entry in chart[aPosition] that is complete
|
208
|
+
# (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
|
209
|
+
# Find entrys s in chart[j] of the form
|
210
|
+
# {dotted_rule: Y -> α • X β, origin: i}
|
211
|
+
# In other words, rules that predicted the non-terminal X.
|
212
|
+
# For each s, add to chart[aPosition] a entry of the form
|
213
|
+
# { dotted_rule: Y → α X • β, origin: i})
|
214
|
+
def completion(aState, aPosition, &nextMapping)
|
215
|
+
curr_origin = aState.origin
|
216
|
+
curr_lhs = aState.dotted_rule.lhs
|
217
|
+
entrys = entrys_expecting(curr_lhs, curr_origin, false)
|
218
|
+
entrys.each do |s|
|
219
|
+
next_item = nextMapping.call(s.dotted_rule)
|
220
|
+
push_entry(next_item, s.origin, aPosition, :completion)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
|
225
|
+
# The list of ParseState from the chart entry at given position
|
226
|
+
# that expect the given terminal
|
227
|
+
def entrys_expecting(aTerminal, aPosition, toSort)
|
228
|
+
expecting = chart[aPosition].entrys_expecting(aTerminal)
|
229
|
+
return expecting if !toSort || expecting.size < 2
|
230
|
+
|
231
|
+
# Put predicted entrys ahead
|
232
|
+
(predicted, others) = expecting.partition(&:predicted?)
|
233
|
+
|
234
|
+
# Sort entry in reverse order of their origin value
|
235
|
+
[predicted, others].each do |set|
|
236
|
+
set.sort! { |a, b| b.origin <=> a.origin }
|
237
|
+
end
|
238
|
+
|
239
|
+
return predicted + others
|
240
|
+
end
|
241
|
+
|
242
|
+
|
243
|
+
|
244
|
+
|
245
|
+
# Insert in a parse tree the symbol on the left of the
|
246
|
+
# current dotted rule.
|
247
|
+
def insert_matched_symbol(aStateTracker, aBuilder)
|
248
|
+
# Retrieve symbol before the dot in active parse entry
|
249
|
+
match_symbol = aStateTracker.symbol_on_left
|
250
|
+
|
251
|
+
# Retrieve tree node being processed
|
252
|
+
tree_node = aBuilder.current_node
|
253
|
+
|
254
|
+
done = false
|
255
|
+
case [match_symbol.class, tree_node.class]
|
256
|
+
when [Syntax::Terminal, PTree::TerminalNode]
|
257
|
+
aStateTracker.to_prev_entry_set
|
258
|
+
predecessor_entry_terminal(match_symbol, aStateTracker, aBuilder)
|
259
|
+
|
260
|
+
when [NilClass, Rley::PTree::TerminalNode],
|
261
|
+
[NilClass, PTree::NonTerminalNode]
|
262
|
+
# Retrieve all parse entrys that expect the lhs
|
263
|
+
new_entrys = entrys_expecting_lhs(aStateTracker, aBuilder)
|
264
|
+
done = true if new_entrys.empty?
|
265
|
+
# Select an unused parse entry
|
266
|
+
aStateTracker.select_entry(new_entrys)
|
267
|
+
|
268
|
+
when [Syntax::NonTerminal, PTree::NonTerminalNode]
|
269
|
+
completed_entry_for(match_symbol, aStateTracker, aBuilder)
|
270
|
+
end
|
271
|
+
|
272
|
+
done ||= aBuilder.root == aBuilder.current_node
|
273
|
+
return done
|
274
|
+
end
|
275
|
+
=end
|
276
|
+
private
|
277
|
+
|
278
|
+
# Raise an exception to indicate a syntax error.
|
279
|
+
def handle_error(aPosition)
|
280
|
+
# Retrieve the actual token
|
281
|
+
actual = tokens[aPosition].terminal
|
282
|
+
lexeme_at_pos = tokens[aPosition].lexeme
|
283
|
+
|
284
|
+
expected = chart.sets[aPosition].expected_terminals
|
285
|
+
term_names = expected.map(&:name)
|
286
|
+
err_msg = "Syntax error at or near token #{aPosition + 1}"
|
287
|
+
err_msg << ">>>#{lexeme_at_pos}<<<:\nExpected "
|
288
|
+
if expected.size > 1
|
289
|
+
err_msg << "one of: ['#{term_names.join("', '")}'],"
|
290
|
+
else
|
291
|
+
err_msg << ": #{term_names[0]},"
|
292
|
+
end
|
293
|
+
err_msg << " found a '#{actual.name}'"
|
294
|
+
fail StandardError, err_msg + ' instead.'
|
295
|
+
end
|
296
|
+
|
297
|
+
def apply_rule(antecedentEntry, aVertex, anOrigin, aPosition, aRuleId)
|
298
|
+
consequent = push_entry(aVertex, anOrigin, aPosition, aRuleId)
|
299
|
+
antecedence[consequent] << antecedentEntry
|
300
|
+
consequent.add_antecedent(antecedentEntry)
|
301
|
+
end
|
302
|
+
|
303
|
+
# Push a parse entry (vertex + origin) to the
|
304
|
+
# chart entry with given index if it isn't yet in the chart entry.
|
305
|
+
def push_entry(aVertex, anOrigin, aChartIndex, aReason)
|
306
|
+
fail StandardError, 'Vertex may not be nil' if aVertex.nil?
|
307
|
+
chart.push_entry(aVertex, anOrigin, aChartIndex, aReason)
|
308
|
+
end
|
309
|
+
|
310
|
+
# Factory method. Initializes a ParseForestBuilder object
|
311
|
+
def forest_builder(anIndex)
|
312
|
+
full_range = { low: 0, high: anIndex }
|
313
|
+
start_production = chart.start_dotted_rule.production
|
314
|
+
return ParseForestBuilder.new(start_production, full_range)
|
315
|
+
end
|
316
|
+
|
317
|
+
|
318
|
+
# Factory method. Creates and initializes a ParseEntryTracker instance.
|
319
|
+
def new_entry_tracker()
|
320
|
+
instance = ParseEntryTracker.new(chart.last_index)
|
321
|
+
instance.parse_entry = accepting_entry
|
322
|
+
|
323
|
+
return instance
|
324
|
+
end
|
325
|
+
=begin
|
326
|
+
|
327
|
+
# A terminal symbol is on the left of dot.
|
328
|
+
# Go to the predecessor entry for the given terminal
|
329
|
+
def predecessor_entry_terminal(_a_symb, aStateTracker, aTreeBuilder)
|
330
|
+
index = aStateTracker.entry_set_index
|
331
|
+
aTreeBuilder.current_node.range = { low: index, high: index + 1 }
|
332
|
+
link_node_to_token(aTreeBuilder, aStateTracker.entry_set_index)
|
333
|
+
unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
|
334
|
+
fail StandardError, 'Expected terminal node'
|
335
|
+
end
|
336
|
+
aTreeBuilder.move_back
|
337
|
+
entry_set = chart[aStateTracker.entry_set_index]
|
338
|
+
previous_entry = entry_set.predecessor_entry(aStateTracker.parse_entry)
|
339
|
+
aStateTracker.parse_entry = previous_entry
|
340
|
+
end
|
341
|
+
|
342
|
+
|
343
|
+
# Retrieve a complete entry with given terminal symbol as lhs.
|
344
|
+
def completed_entry_for(a_symb, aTracker, aTreeBuilder)
|
345
|
+
new_entrys = chart[aTracker.entry_set_index].entrys_rewriting(a_symb)
|
346
|
+
aTracker.select_entry(new_entrys)
|
347
|
+
aTreeBuilder.range = { high: aTracker.entry_set_index }
|
348
|
+
aTreeBuilder.use_complete_entry(aTracker.parse_entry)
|
349
|
+
link_node_to_token(aTreeBuilder, aTracker.entry_set_index - 1)
|
350
|
+
aTreeBuilder.move_down
|
351
|
+
end
|
352
|
+
|
353
|
+
|
354
|
+
def entrys_expecting_lhs(aStateTracker, aTreeBuilder)
|
355
|
+
lhs = aStateTracker.curr_dotted_item.production.lhs
|
356
|
+
new_entrys = entrys_expecting(lhs, aStateTracker.entry_set_index, true)
|
357
|
+
new_entrys.reject! { |st| st == aStateTracker.parse_entry }
|
358
|
+
# Filter out parse entrys with incompatible range
|
359
|
+
if new_entrys.size > 1
|
360
|
+
previous_node = aTreeBuilder.current_path[-3]
|
361
|
+
new_entrys.select! do |parse_entry|
|
362
|
+
parse_entry.dotted_rule.production.lhs == previous_node.symbol
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
return new_entrys
|
367
|
+
end
|
368
|
+
|
369
|
+
# If the current node is a terminal node
|
370
|
+
# then link the token to that node
|
371
|
+
def link_node_to_token(aTreeBuilder, aStateSetIndex)
|
372
|
+
return unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
|
373
|
+
return unless aTreeBuilder.current_node.token.nil?
|
374
|
+
|
375
|
+
a_node = aTreeBuilder.current_node
|
376
|
+
a_node.token = tokens[aStateSetIndex] unless a_node.token
|
377
|
+
end
|
378
|
+
|
379
|
+
=end
|
380
|
+
end # class
|
381
|
+
end # module
|
382
|
+
end # module
|
383
|
+
|
384
|
+
# End of file
|