rley 0.2.15 → 0.3.00

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/rley/constants.rb +1 -1
  4. data/lib/rley/gfg/call_edge.rb +30 -0
  5. data/lib/rley/gfg/edge.rb +4 -0
  6. data/lib/rley/gfg/end_vertex.rb +1 -1
  7. data/lib/rley/gfg/epsilon_edge.rb +0 -4
  8. data/lib/rley/gfg/grm_flow_graph.rb +32 -7
  9. data/lib/rley/gfg/item_vertex.rb +71 -25
  10. data/lib/rley/gfg/non_terminal_vertex.rb +10 -1
  11. data/lib/rley/gfg/return_edge.rb +31 -0
  12. data/lib/rley/gfg/scan_edge.rb +2 -1
  13. data/lib/rley/gfg/shortcut_edge.rb +26 -0
  14. data/lib/rley/gfg/start_vertex.rb +2 -2
  15. data/lib/rley/gfg/vertex.rb +27 -1
  16. data/lib/rley/parse_forest_visitor.rb +115 -0
  17. data/lib/rley/parser/base_parser.rb +27 -0
  18. data/lib/rley/parser/dotted_item.rb +11 -0
  19. data/lib/rley/parser/earley_parser.rb +3 -15
  20. data/lib/rley/parser/gfg_chart.rb +106 -0
  21. data/lib/rley/parser/gfg_earley_parser.rb +139 -0
  22. data/lib/rley/parser/gfg_parsing.rb +384 -0
  23. data/lib/rley/parser/parse_entry.rb +148 -0
  24. data/lib/rley/parser/parse_entry_set.rb +104 -0
  25. data/lib/rley/parser/parse_entry_tracker.rb +56 -0
  26. data/lib/rley/parser/parse_forest_builder.rb +229 -0
  27. data/lib/rley/parser/parse_forest_factory.rb +54 -0
  28. data/lib/rley/parser/parse_walker_factory.rb +237 -0
  29. data/lib/rley/ptree/token_range.rb +14 -1
  30. data/lib/rley/sppf/alternative_node.rb +34 -0
  31. data/lib/rley/sppf/composite_node.rb +27 -0
  32. data/lib/rley/sppf/epsilon_node.rb +27 -0
  33. data/lib/rley/sppf/leaf_node.rb +12 -0
  34. data/lib/rley/sppf/non_terminal_node.rb +38 -0
  35. data/lib/rley/sppf/parse_forest.rb +48 -0
  36. data/lib/rley/sppf/sppf_node.rb +24 -0
  37. data/lib/rley/sppf/token_node.rb +29 -0
  38. data/lib/rley/syntax/grammar_builder.rb +16 -12
  39. data/lib/rley/syntax/grm_symbol.rb +6 -0
  40. data/lib/rley/syntax/terminal.rb +5 -0
  41. data/spec/rley/gfg/call_edge_spec.rb +51 -0
  42. data/spec/rley/gfg/end_vertex_spec.rb +1 -0
  43. data/spec/rley/gfg/grm_flow_graph_spec.rb +24 -2
  44. data/spec/rley/gfg/item_vertex_spec.rb +75 -6
  45. data/spec/rley/gfg/non_terminal_vertex_spec.rb +14 -0
  46. data/spec/rley/gfg/return_edge_spec.rb +51 -0
  47. data/spec/rley/gfg/shortcut_edge_spec.rb +43 -0
  48. data/spec/rley/gfg/vertex_spec.rb +52 -37
  49. data/spec/rley/parse_forest_visitor_spec.rb +238 -0
  50. data/spec/rley/parser/dotted_item_spec.rb +29 -8
  51. data/spec/rley/parser/gfg_chart_spec.rb +138 -0
  52. data/spec/rley/parser/gfg_earley_parser_spec.rb +918 -0
  53. data/spec/rley/parser/gfg_parsing_spec.rb +565 -0
  54. data/spec/rley/parser/parse_entry_set_spec.rb +179 -0
  55. data/spec/rley/parser/parse_entry_spec.rb +208 -0
  56. data/spec/rley/parser/parse_forest_builder_spec.rb +382 -0
  57. data/spec/rley/parser/parse_forest_factory_spec.rb +81 -0
  58. data/spec/rley/parser/parse_walker_factory_spec.rb +235 -0
  59. data/spec/rley/parser/state_set_spec.rb +4 -0
  60. data/spec/rley/sppf/alternative_node_spec.rb +72 -0
  61. data/spec/rley/sppf/antecedence_graph.rb +87 -0
  62. data/spec/rley/sppf/forest_representation.rb +136 -0
  63. data/spec/rley/sppf/gfg_representation.rb +111 -0
  64. data/spec/rley/sppf/non_terminal_node_spec.rb +64 -0
  65. data/spec/rley/support/ambiguous_grammar_helper.rb +36 -36
  66. data/spec/rley/support/expectation_helper.rb +36 -0
  67. data/spec/rley/support/grammar_helper.rb +28 -0
  68. data/spec/rley/support/grammar_sppf_helper.rb +25 -0
  69. data/spec/rley/syntax/grammar_builder_spec.rb +5 -0
  70. data/spec/rley/syntax/non_terminal_spec.rb +4 -0
  71. data/spec/rley/syntax/terminal_spec.rb +4 -0
  72. metadata +58 -2
@@ -0,0 +1,27 @@
1
+ require_relative '../syntax/grammar'
2
+ require_relative 'grm_items_builder' # Use mix-in module
3
+ require_relative 'parse_tracer'
4
+ require_relative 'parsing'
5
+
6
+ module Rley # This module is used as a namespace
7
+ module Parser # This module is used as a namespace
8
+ # Abstract class for Earley parser.
9
+ class BaseParser
10
+ include GrmItemsBuilder # Mix-in module for created dotted items of given grammar
11
+
12
+ # The grammar of the language.
13
+ attr_reader(:grammar)
14
+
15
+ # The dotted items/rules for the productions of the grammar
16
+ attr_reader(:dotted_items)
17
+
18
+
19
+ def initialize(aGrammar)
20
+ @grammar = aGrammar
21
+ @dotted_items = build_dotted_items(grammar) # Method from mixin
22
+ end
23
+ end # class
24
+ end # module
25
+ end # module
26
+
27
+ # End of file
@@ -100,7 +100,18 @@ module Rley # This module is used as a namespace
100
100
 
101
101
  # An item with the dot in front of a terminal is called a shift item
102
102
  def shift_item?()
103
+ return position == 0
103
104
  end
105
+
106
+ # Return true if this dotted item has a dot one place
107
+ # to the right compared to the dotted item argument.
108
+ def successor_of?(another)
109
+ return false if production != another.production
110
+ to_the_left = prev_position
111
+ return false if to_the_left.nil?
112
+ return to_the_left == another.position
113
+ end
114
+
104
115
 
105
116
  private
106
117
 
@@ -1,20 +1,9 @@
1
- require_relative '../syntax/grammar'
2
- require_relative 'grm_items_builder' # Use mix-in module
3
- require_relative 'parse_tracer'
4
- require_relative 'parsing'
1
+ require_relative 'base_parser'
5
2
 
6
3
  module Rley # This module is used as a namespace
7
4
  module Parser # This module is used as a namespace
8
5
  # Implementation of a parser that uses the Earley parsing algorithm.
9
- class EarleyParser
10
- include GrmItemsBuilder # Mix-in module for created dotted items of given grammar
11
-
12
- # The grammar of the language.
13
- attr_reader(:grammar)
14
-
15
- # The dotted items/rules for the productions of the grammar
16
- attr_reader(:dotted_items)
17
-
6
+ class EarleyParser < BaseParser
18
7
  # A Hash that defines the mapping: non-terminal => [start dotted items]
19
8
  attr_reader(:start_mapping)
20
9
 
@@ -24,8 +13,7 @@ module Rley # This module is used as a namespace
24
13
  attr_reader(:next_mapping)
25
14
 
26
15
  def initialize(aGrammar)
27
- @grammar = aGrammar
28
- @dotted_items = build_dotted_items(grammar) # Method from mixin
16
+ super(aGrammar)
29
17
  @start_mapping = build_start_mapping(dotted_items)
30
18
  @next_mapping = build_next_mapping(dotted_items)
31
19
  end
@@ -0,0 +1,106 @@
1
+ require_relative 'parse_entry'
2
+ require_relative 'parse_entry_set'
3
+
4
+
5
+ module Rley # This module is used as a namespace
6
+ module Parser # This module is used as a namespace
7
+ # Also called a parse table.
8
+ # It is a Grammar Flow Graph implementation.
9
+ # Assuming that n == number of input tokens,
10
+ # the chart is an array with n + 1 entry sets.
11
+ class GFGChart
12
+ # An array of entry sets (one per input token + 1)
13
+ attr_reader(:sets)
14
+
15
+ # The level of trace details reported on stdout during the parse.
16
+ # The possible values are:
17
+ # 0: No trace output (default case)
18
+ # 1: Show trace of scanning and completion rules
19
+ # 2: Same as of 1 with the addition of the prediction rules
20
+ attr_reader(:tracer)
21
+
22
+ # @param tokenCount [Fixnum] The number of lexemes in the input to parse.
23
+ # @param aTracer [ParseTracer] A tracer object.
24
+ def initialize(tokenCount, aGFGraph, aTracer)
25
+ @tracer = aTracer
26
+ @sets = Array.new(tokenCount + 1) { |_| ParseEntrySet.new }
27
+ push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
28
+ end
29
+
30
+ # The dotted item/rule used to seed the parse chart.
31
+ # It corresponds to the start production and a dot placed
32
+ # at the beginning of the rhs
33
+ # def start_dotted_rule()
34
+ # return self[0].entries.first.dotted_rule
35
+ # end
36
+
37
+ # Return the start (non-terminal) symbol of the grammar.
38
+ def start_symbol()
39
+ return sets.first.entries[0].vertex.non_terminal
40
+ end
41
+
42
+ # Access the entry set at given position
43
+ def [](index)
44
+ return sets[index]
45
+ end
46
+
47
+ # Return the index value of the last non-empty entry set.
48
+ def last_index()
49
+ first_empty = sets.find_index(&:empty?)
50
+ if first_empty.nil?
51
+ index = sets.size - 1
52
+ else
53
+ index = (first_empty == 0) ? 0 : first_empty - 1
54
+ end
55
+
56
+ return index
57
+ end
58
+
59
+ # Push a parse entry for the chart entry with given index
60
+ def push_entry(aVertex, anOrigin, anIndex, aReason)
61
+ new_entry = ParseEntry.new(aVertex, anOrigin)
62
+ pushed = self[anIndex].push_entry(new_entry)
63
+ if pushed == new_entry && tracer.level > 0
64
+ case aReason
65
+ when :start_rule, :prediction
66
+ tracer.trace_prediction(anIndex, new_entry)
67
+
68
+ when :scanning
69
+ tracer.trace_scanning(anIndex, new_entry)
70
+
71
+ when :completion
72
+ tracer.trace_completion(anIndex, new_entry)
73
+ else
74
+ fail NotImplementedError, "Unknown push_entry mode #{aReason}"
75
+ end
76
+ end
77
+
78
+ return pushed
79
+ end
80
+
81
+ # Retrieve the first parse entry added to this chart
82
+ def initial_entry()
83
+ return sets[0].first
84
+ end
85
+
86
+ # Retrieve the entry that corresponds to a complete and successful parse
87
+ def accepting_entry()
88
+ # Success can be detected as follows:
89
+ # The last chart entry set has at least one complete parse entry
90
+ # for the start symbol with an origin == 0
91
+
92
+ # Retrieve all the end entries (i.e. of the form
93
+ last_entries = sets[last_index].entries.select(&:end_entry?)
94
+
95
+ # ... now find the end vertex for start symbol and with origin at zero...
96
+ success_entries = last_entries.select do |entry|
97
+ entry.origin == 0 && entry.vertex.non_terminal == start_symbol
98
+ end
99
+
100
+ return success_entries.first
101
+ end
102
+ end # class
103
+ end # module
104
+ end # module
105
+
106
+ # End of file
@@ -0,0 +1,139 @@
1
+ require_relative 'base_parser'
2
+ require_relative '../gfg/grm_flow_graph'
3
+ require_relative 'gfg_parsing'
4
+
5
+ module Rley # This module is used as a namespace
6
+ module Parser # This module is used as a namespace
7
+ # Implementation of a parser that uses the Earley parsing algorithm.
8
+ class GFGEarleyParser < BaseParser
9
+
10
+ # The Grammar Flow graph for the given grammar
11
+ attr_reader :gf_graph
12
+
13
+ def initialize(aGrammar)
14
+ super(aGrammar)
15
+ @gf_graph = GFG::GrmFlowGraph.new(dotted_items)
16
+ end
17
+
18
+ # Parse a sequence of input tokens.
19
+ # @param aTokenSequence [Array] Array of Tokens objects returned by a
20
+ # tokenizer/scanner/lexer.
21
+ # @param aTraceLevel [Fixnum] The specified trace level.
22
+ # The possible values are:
23
+ # 0: No trace output (default case)
24
+ # 1: Show trace of scanning and completion rules
25
+ # 2: Same as of 1 with the addition of the prediction rules
26
+ # @return [Parsing] an object that embeds the parse results.
27
+ def parse(aTokenSequence, aTraceLevel = 0)
28
+ tracer = ParseTracer.new(aTraceLevel, $stdout, aTokenSequence)
29
+ result = GFGParsing.new(gf_graph, aTokenSequence, tracer)
30
+ last_token_index = aTokenSequence.size
31
+ (0..last_token_index).each do |i|
32
+ handle_error(result) if result.chart[i].empty?
33
+ result.chart[i].each do |entry|
34
+ # Is entry of the form? [A => alpha . B beta, k]...
35
+ next_symbol = entry.next_symbol
36
+ if next_symbol && next_symbol.kind_of?(Syntax::NonTerminal)
37
+ # ...apply the Call rule
38
+ call_rule(result, entry, i, tracer)
39
+ end
40
+
41
+ exit_rule(result, entry, i, tracer) if entry.exit_entry?
42
+ start_rule(result, entry, i, tracer) if entry.start_entry?
43
+ end_rule(result, entry, i, tracer) if entry.end_entry?
44
+ end
45
+ scan_rule(result, i, tracer) if i < last_token_index
46
+ end
47
+
48
+ return result
49
+ end
50
+
51
+ private
52
+ # Let the current sigma set be the ith parse entry set.
53
+ # This method is invoked when an entry is added to the parse entry set
54
+ # and is of the form [A => alpha . B beta, k].
55
+ # Then the entry [.B, i] is added to the current sigma set.
56
+ # Gist: when an entry expects the non-terminal symbol B, then
57
+ # add an entry with start vertex .B
58
+ def call_rule(aParsing, anEntry, aPosition, aTracer)
59
+ if aTracer.level > 1
60
+ puts "Chart[#{aPosition}] Call rule applied upon #{anEntry}:"
61
+ end
62
+ aParsing.call_rule(anEntry, aPosition)
63
+ end
64
+
65
+ # Let the current sigma set be the ith parse entry set.
66
+ # This method is invoked when an entry is added to a parse entry set
67
+ # and the entry is of the form [.B, i].
68
+ # then for every rule B => γ in the grammar an entry [B => . γ, i]
69
+ # is added to the current sigma set.
70
+ # Gist: for an entry corresponding to a start vertex, add an entry
71
+ # for each entry edge in the graph.
72
+ def start_rule(aParsing, anEntry, aPosition, aTracer)
73
+ if aTracer.level > 1
74
+ puts "Chart[#{aPosition}] Start rule applied upon #{anEntry}:"
75
+ end
76
+ aParsing.start_rule(anEntry, aPosition)
77
+ end
78
+
79
+ # This method must be invoked when an entry is added to a parse entry set
80
+ # and is of the form [B => γ ., k] (the dot is at the end of the production.
81
+ # Then entry [B., k] is added to the current entry set.
82
+ # Gist: for an entry corresponding to a reduced production, add an entry
83
+ # for each exit edge in the graph.
84
+ def exit_rule(aParsing, anEntry, aPosition, aTracer)
85
+ if aTracer.level > 1
86
+ puts "Chart[#{aPosition}] Exit rule applied upon #{anEntry}:"
87
+ end
88
+ aParsing.exit_rule(anEntry, aPosition)
89
+ end
90
+
91
+ # This method is invoked when an entry of the form [B., k]
92
+ # is added to a parse entry set with index j.
93
+ # then for every entry of the form [A => α . B γ, i] in the kth sigma set
94
+ # the entry [A => α B . γ, i] is added to the jth sigma set.
95
+ def end_rule(aParsing, anEntry, aPosition, aTracer)
96
+ if aTracer.level > 1
97
+ puts "Chart[#{aPosition}] End rule applied upon #{anEntry}:"
98
+ end
99
+ aParsing.end_rule(anEntry, aPosition)
100
+ end
101
+
102
+ # Given that the terminal t is at the specified position,
103
+ # Locate all entries in the current sigma set that expect t: [A => α . t γ, i]
104
+ # and allow them to cross the edge, adding the node on the back side
105
+ # of the edge as an entry to the next sigma set:
106
+ # add an entry to the next sigma set [A => α t . γ, i]
107
+ def scan_rule(aParsing, aPosition, aTracer)
108
+ if aTracer.level > 1
109
+ puts "Chart[#{aPosition}] Scan rule applied upon #{aParsing.tokens[aPosition]}:"
110
+ end
111
+ aParsing.scan_rule(aPosition)
112
+ end
113
+
114
+ # Raise an exception to indicate a syntax error.
115
+ def handle_error(aParsing)
116
+ # Retrieve the first empty state set
117
+ pos = aParsing.chart.sets.find_index(&:empty?)
118
+ lexeme_at_pos = aParsing.tokens[pos - 1].lexeme
119
+ puts "chart index: #{pos - 1}"
120
+ terminals = aParsing.chart.sets[pos - 1].expected_terminals
121
+ puts "count expected terminals #{terminals.size}"
122
+ entries = aParsing.chart.sets[pos - 1].entries.map(&:to_s).join("\n")
123
+ puts "Items #{entries}"
124
+ term_names = terminals.map(&:name)
125
+ err_msg = "Syntax error at or near token #{pos}"
126
+ err_msg << ">>>#{lexeme_at_pos}<<<:\nExpected "
127
+ if terminals.size > 1
128
+ err_msg << "one of: ['#{term_names.join("', '")}'],"
129
+ else
130
+ err_msg << ": #{term_names[0]},"
131
+ end
132
+ err_msg << " found a '#{aParsing.tokens[pos - 1].terminal.name}'"
133
+ fail StandardError, err_msg + ' instead.'
134
+ end
135
+ end # class
136
+ end # module
137
+ end # module
138
+
139
+ # End of file
@@ -0,0 +1,384 @@
1
+ require_relative 'gfg_chart'
2
+ require_relative 'parse_entry_tracker'
3
+ require_relative 'parse_forest_builder'
4
+
5
+
6
+ module Rley # This module is used as a namespace
7
+ module Parser # This module is used as a namespace
8
+ class GFGParsing
9
+ # The link to the grammar flow graph
10
+ attr_reader(:gf_graph)
11
+
12
+ # The link to the chart object
13
+ attr_reader(:chart)
14
+
15
+ # The sequence of input token to parse
16
+ attr_reader(:tokens)
17
+
18
+ # A Hash with pairs of the form: parse entry => [ antecedent parse entries ]
19
+ # It associates to a every parse entry its antecedent(s), that is, the parse entry/ies
20
+ # that causes the key parse entry to be created with one the gfg rules
21
+ attr_reader(:antecedence)
22
+
23
+ # @param aTracer [ParseTracer] An object that traces the parsing.
24
+ # The possible values are:
25
+ # 0: No trace output (default case)
26
+ # 1: Show trace of scanning and completion rules
27
+ # 2: Same as of 1 with the addition of the prediction rules
28
+ def initialize(theGFG, theTokens, aTracer)
29
+ @gf_graph = theGFG
30
+ @tokens = theTokens.dup
31
+ @chart = GFGChart.new(tokens.size, gf_graph, aTracer)
32
+ @antecedence = Hash.new { |hash, key| hash[key] = [] }
33
+ antecedence[chart[0].first]
34
+ end
35
+
36
+ # Let the current sigma set be the ith parse entry set.
37
+ # This method is invoked when an entry is added to the parse entry set
38
+ # and is of the form [A => alpha . B beta, k].
39
+ # Then the entry [.B, i] is added to the current sigma set.
40
+ # Gist: when an entry expects the non-terminal symbol B, then
41
+ # add an entry with start vertex .B
42
+ def call_rule(anEntry, aPosition)
43
+ next_symbol = anEntry.next_symbol
44
+ start_vertex = gf_graph.start_vertex_for[next_symbol]
45
+ apply_rule(anEntry, start_vertex, aPosition, aPosition, :call_rule)
46
+ end
47
+
48
+ # Let the current sigma set be the ith parse entry set.
49
+ # This method is invoked when an entry is added to a parse entry set
50
+ # and the entry is of the form [.B, i].
51
+ # then for every rule B => γ in the grammar an entry [B => . γ, i]
52
+ # is added to the current sigma set.
53
+ # Gist: for an entry corresponding to a start vertex, add an entry
54
+ # for each entry edge in the graph.
55
+ def start_rule(anEntry, aPosition)
56
+ return unless anEntry.origin == aPosition
57
+
58
+ anEntry.vertex.edges.each do |a_start_edge|
59
+ successor = a_start_edge.successor
60
+ apply_rule(anEntry, successor, aPosition, aPosition, :start_rule)
61
+ end
62
+ end
63
+
64
+ # This method must be invoked when an entry is added to a parse entry set
65
+ # and is of the form [B => γ ., k] (the dot is at the end of the production.
66
+ # Then entry [B., k] is added to the current entry set.
67
+ # Gist: for an entry corresponding to a reduced production, add an entry
68
+ # for each exit edge in the graph.
69
+ def exit_rule(anEntry, aPosition)
70
+ lhs = anEntry.vertex.lhs
71
+ end_vertex = gf_graph.end_vertex_for[lhs]
72
+ apply_rule(anEntry, end_vertex, anEntry.origin, aPosition, :exit_rule)
73
+ end
74
+
75
+ # This method is invoked when an entry of the form [B., k]
76
+ # is added to a parse entry set with index j.
77
+ # then for every entry of the form [A => α . B γ, i] in the kth sigma set
78
+ # the entry [A => α B . γ, i] is added to the jth sigma set.
79
+ def end_rule(anEntry, aPosition)
80
+ nterm_k = anEntry.vertex.non_terminal
81
+ origin_k = anEntry.origin
82
+ set_k = chart[origin_k]
83
+
84
+ # Retrieve all the entries that expect the non-terminal
85
+ expecting_nterm_k = set_k.entries4n_term(nterm_k)
86
+ expecting_nterm_k.each do |ntry|
87
+ # Get the vertices after the expected non-terminal
88
+ vertex_after_terminal = ntry.vertex.shortcut.successor
89
+ apply_rule(anEntry, vertex_after_terminal, ntry.origin, aPosition, :end_rule)
90
+ end
91
+ end
92
+
93
+ # Given that the terminal t is at the specified position,
94
+ # Locate all entries in the current sigma set that expect t: [A => α . t γ, i]
95
+ # and allow them to cross the edge, adding the node on the back side
96
+ # of the edge as an entry to the next sigma set:
97
+ # add an entry to the next sigma set [A => α t . γ, i]
98
+ def scan_rule(aPosition)
99
+ terminal = tokens[aPosition].terminal
100
+
101
+ # Retrieve all the entries that expect the given terminal
102
+ expecting_term = chart[aPosition].entries4term(terminal)
103
+
104
+ # ... if the terminal isn't expected then we have an error
105
+ handle_error(aPosition) if expecting_term.empty?
106
+
107
+ expecting_term.each do |ntry|
108
+ # Get the vertices after the expected terminal
109
+ ntry.vertex.edges.each do |an_edge|
110
+ vertex_after_terminal = an_edge.successor
111
+ apply_rule(ntry, vertex_after_terminal, ntry.origin, aPosition + 1, :scan_rule)
112
+ end
113
+ end
114
+ end
115
+
116
+
117
+ # Return true if the parse was successful (= input tokens
118
+ # followed the syntax specified by the grammar)
119
+ def success?()
120
+ return chart.accepting_entry() ? true : false
121
+ end
122
+
123
+ # Return true if there are more than one complete state
124
+ # for the same lhs and same origin in any state set.
125
+ def ambiguous?()
126
+ found = chart.sets.find { |set| !set.ambiguities.empty? }
127
+ return ! found.nil?
128
+ end
129
+
130
+
131
+ =begin
132
+ # Factory method. Builds a ParseForest from the parse result.
133
+ # @return [ParseForest]
134
+ # Algorithm:
135
+ # set state_set_index = index of last entry set in chart
136
+ # Search the completed parse state that corresponds to the full parse
137
+ def parse_forest()
138
+ state_tracker = new_state_tracker
139
+ builder = forest_builder(state_tracker.state_set_index)
140
+
141
+ loop do
142
+ state_tracker.symbol_on_left
143
+ # match_symbol = state_tracker.symbol_on_left
144
+ # puts '--------------------'
145
+ # puts "Active parse state: #{state_tracker.parse_state}"
146
+ # puts "Matching symbol: #{match_symbol}"
147
+ # puts 'Parse tree:'
148
+ # puts builder.root.to_string(0)
149
+
150
+ # Place the symbol on left of the dot in the parse tree
151
+ done = insert_matched_symbol(state_tracker, builder)
152
+ break if done
153
+ end
154
+
155
+ return builder.parse_forest
156
+ end
157
+ =end
158
+
159
+ # Retrieve the very first parse entry added to the chart.
160
+ # This entry corresponds to the start vertex of the GF graph
161
+ # with origin equal to zero.
162
+ def initial_entry()
163
+ return chart.initial_entry
164
+ end
165
+
166
+ # Retrieve the accepting parse entry that represents
167
+ # a complete, successful parse
168
+ # After a successful parse, the last chart entry set
169
+ # has an end parse entry that involves the start symbol
170
+ def accepting_entry()
171
+ return chart.accepting_entry
172
+ end
173
+
174
+ =begin
175
+
176
+
177
+
178
+ # This method is called when a parse entry for chart entry at position
179
+ # 'pos' expects a terminal as next symbol.
180
+ # If the input token matches the terminal symbol then:
181
+ # Retrieve all parse entrys for chart entry at 'aPosition'
182
+ # that have the given terminal as next symbol.
183
+ # For each s of the above entrys, push to chart entry aPosition + 1
184
+ # a new entry like: <next dotted rule, s.origin, aPosition + 1>
185
+ # In other words, we place the dotted rules in the next entry set
186
+ # such that the dot appears after terminal.
187
+ # @param aTerminal [Terminal] a terminal symbol that
188
+ # immediately follows a dot
189
+ # @param aPosition [Fixnum] position in the input token sequence.
190
+ # @param nextMapping [Proc or Lambda] code to evaluate in order to
191
+ # determine the "next" dotted rule for a given one.
192
+ def scanning(aTerminal, aPosition, &nextMapping)
193
+ curr_token = tokens[aPosition]
194
+ return unless curr_token.terminal == aTerminal
195
+
196
+ entrys = entrys_expecting(aTerminal, aPosition, false)
197
+ entrys.each do |s|
198
+ next_item = nextMapping.call(s.dotted_rule)
199
+ push_entry(next_item, s.origin, aPosition + 1, :scanning)
200
+ end
201
+ end
202
+
203
+
204
+
205
+ # This method is called when a parse entry at chart entry reaches the end
206
+ # of a production.
207
+ # For every entry in chart[aPosition] that is complete
208
+ # (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
209
+ # Find entrys s in chart[j] of the form
210
+ # {dotted_rule: Y -> α • X β, origin: i}
211
+ # In other words, rules that predicted the non-terminal X.
212
+ # For each s, add to chart[aPosition] a entry of the form
213
+ # { dotted_rule: Y → α X • β, origin: i})
214
+ def completion(aState, aPosition, &nextMapping)
215
+ curr_origin = aState.origin
216
+ curr_lhs = aState.dotted_rule.lhs
217
+ entrys = entrys_expecting(curr_lhs, curr_origin, false)
218
+ entrys.each do |s|
219
+ next_item = nextMapping.call(s.dotted_rule)
220
+ push_entry(next_item, s.origin, aPosition, :completion)
221
+ end
222
+ end
223
+
224
+
225
+ # The list of ParseState from the chart entry at given position
226
+ # that expect the given terminal
227
+ def entrys_expecting(aTerminal, aPosition, toSort)
228
+ expecting = chart[aPosition].entrys_expecting(aTerminal)
229
+ return expecting if !toSort || expecting.size < 2
230
+
231
+ # Put predicted entrys ahead
232
+ (predicted, others) = expecting.partition(&:predicted?)
233
+
234
+ # Sort entry in reverse order of their origin value
235
+ [predicted, others].each do |set|
236
+ set.sort! { |a, b| b.origin <=> a.origin }
237
+ end
238
+
239
+ return predicted + others
240
+ end
241
+
242
+
243
+
244
+
245
+ # Insert in a parse tree the symbol on the left of the
246
+ # current dotted rule.
247
+ def insert_matched_symbol(aStateTracker, aBuilder)
248
+ # Retrieve symbol before the dot in active parse entry
249
+ match_symbol = aStateTracker.symbol_on_left
250
+
251
+ # Retrieve tree node being processed
252
+ tree_node = aBuilder.current_node
253
+
254
+ done = false
255
+ case [match_symbol.class, tree_node.class]
256
+ when [Syntax::Terminal, PTree::TerminalNode]
257
+ aStateTracker.to_prev_entry_set
258
+ predecessor_entry_terminal(match_symbol, aStateTracker, aBuilder)
259
+
260
+ when [NilClass, Rley::PTree::TerminalNode],
261
+ [NilClass, PTree::NonTerminalNode]
262
+ # Retrieve all parse entrys that expect the lhs
263
+ new_entrys = entrys_expecting_lhs(aStateTracker, aBuilder)
264
+ done = true if new_entrys.empty?
265
+ # Select an unused parse entry
266
+ aStateTracker.select_entry(new_entrys)
267
+
268
+ when [Syntax::NonTerminal, PTree::NonTerminalNode]
269
+ completed_entry_for(match_symbol, aStateTracker, aBuilder)
270
+ end
271
+
272
+ done ||= aBuilder.root == aBuilder.current_node
273
+ return done
274
+ end
275
+ =end
276
+ private
277
+
278
+ # Raise an exception to indicate a syntax error.
279
+ def handle_error(aPosition)
280
+ # Retrieve the actual token
281
+ actual = tokens[aPosition].terminal
282
+ lexeme_at_pos = tokens[aPosition].lexeme
283
+
284
+ expected = chart.sets[aPosition].expected_terminals
285
+ term_names = expected.map(&:name)
286
+ err_msg = "Syntax error at or near token #{aPosition + 1}"
287
+ err_msg << ">>>#{lexeme_at_pos}<<<:\nExpected "
288
+ if expected.size > 1
289
+ err_msg << "one of: ['#{term_names.join("', '")}'],"
290
+ else
291
+ err_msg << ": #{term_names[0]},"
292
+ end
293
+ err_msg << " found a '#{actual.name}'"
294
+ fail StandardError, err_msg + ' instead.'
295
+ end
296
+
297
+ def apply_rule(antecedentEntry, aVertex, anOrigin, aPosition, aRuleId)
298
+ consequent = push_entry(aVertex, anOrigin, aPosition, aRuleId)
299
+ antecedence[consequent] << antecedentEntry
300
+ consequent.add_antecedent(antecedentEntry)
301
+ end
302
+
303
+ # Push a parse entry (vertex + origin) to the
304
+ # chart entry with given index if it isn't yet in the chart entry.
305
+ def push_entry(aVertex, anOrigin, aChartIndex, aReason)
306
+ fail StandardError, 'Vertex may not be nil' if aVertex.nil?
307
+ chart.push_entry(aVertex, anOrigin, aChartIndex, aReason)
308
+ end
309
+
310
+ # Factory method. Initializes a ParseForestBuilder object
311
+ def forest_builder(anIndex)
312
+ full_range = { low: 0, high: anIndex }
313
+ start_production = chart.start_dotted_rule.production
314
+ return ParseForestBuilder.new(start_production, full_range)
315
+ end
316
+
317
+
318
+ # Factory method. Creates and initializes a ParseEntryTracker instance.
319
+ def new_entry_tracker()
320
+ instance = ParseEntryTracker.new(chart.last_index)
321
+ instance.parse_entry = accepting_entry
322
+
323
+ return instance
324
+ end
325
+ =begin
326
+
327
+ # A terminal symbol is on the left of dot.
328
+ # Go to the predecessor entry for the given terminal
329
+ def predecessor_entry_terminal(_a_symb, aStateTracker, aTreeBuilder)
330
+ index = aStateTracker.entry_set_index
331
+ aTreeBuilder.current_node.range = { low: index, high: index + 1 }
332
+ link_node_to_token(aTreeBuilder, aStateTracker.entry_set_index)
333
+ unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
334
+ fail StandardError, 'Expected terminal node'
335
+ end
336
+ aTreeBuilder.move_back
337
+ entry_set = chart[aStateTracker.entry_set_index]
338
+ previous_entry = entry_set.predecessor_entry(aStateTracker.parse_entry)
339
+ aStateTracker.parse_entry = previous_entry
340
+ end
341
+
342
+
343
+ # Retrieve a complete entry with given terminal symbol as lhs.
344
+ def completed_entry_for(a_symb, aTracker, aTreeBuilder)
345
+ new_entrys = chart[aTracker.entry_set_index].entrys_rewriting(a_symb)
346
+ aTracker.select_entry(new_entrys)
347
+ aTreeBuilder.range = { high: aTracker.entry_set_index }
348
+ aTreeBuilder.use_complete_entry(aTracker.parse_entry)
349
+ link_node_to_token(aTreeBuilder, aTracker.entry_set_index - 1)
350
+ aTreeBuilder.move_down
351
+ end
352
+
353
+
354
+ def entrys_expecting_lhs(aStateTracker, aTreeBuilder)
355
+ lhs = aStateTracker.curr_dotted_item.production.lhs
356
+ new_entrys = entrys_expecting(lhs, aStateTracker.entry_set_index, true)
357
+ new_entrys.reject! { |st| st == aStateTracker.parse_entry }
358
+ # Filter out parse entrys with incompatible range
359
+ if new_entrys.size > 1
360
+ previous_node = aTreeBuilder.current_path[-3]
361
+ new_entrys.select! do |parse_entry|
362
+ parse_entry.dotted_rule.production.lhs == previous_node.symbol
363
+ end
364
+ end
365
+
366
+ return new_entrys
367
+ end
368
+
369
+ # If the current node is a terminal node
370
+ # then link the token to that node
371
+ def link_node_to_token(aTreeBuilder, aStateSetIndex)
372
+ return unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
373
+ return unless aTreeBuilder.current_node.token.nil?
374
+
375
+ a_node = aTreeBuilder.current_node
376
+ a_node.token = tokens[aStateSetIndex] unless a_node.token
377
+ end
378
+
379
+ =end
380
+ end # class
381
+ end # module
382
+ end # module
383
+
384
+ # End of file