rley 0.2.15 → 0.3.00

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/rley/constants.rb +1 -1
  4. data/lib/rley/gfg/call_edge.rb +30 -0
  5. data/lib/rley/gfg/edge.rb +4 -0
  6. data/lib/rley/gfg/end_vertex.rb +1 -1
  7. data/lib/rley/gfg/epsilon_edge.rb +0 -4
  8. data/lib/rley/gfg/grm_flow_graph.rb +32 -7
  9. data/lib/rley/gfg/item_vertex.rb +71 -25
  10. data/lib/rley/gfg/non_terminal_vertex.rb +10 -1
  11. data/lib/rley/gfg/return_edge.rb +31 -0
  12. data/lib/rley/gfg/scan_edge.rb +2 -1
  13. data/lib/rley/gfg/shortcut_edge.rb +26 -0
  14. data/lib/rley/gfg/start_vertex.rb +2 -2
  15. data/lib/rley/gfg/vertex.rb +27 -1
  16. data/lib/rley/parse_forest_visitor.rb +115 -0
  17. data/lib/rley/parser/base_parser.rb +27 -0
  18. data/lib/rley/parser/dotted_item.rb +11 -0
  19. data/lib/rley/parser/earley_parser.rb +3 -15
  20. data/lib/rley/parser/gfg_chart.rb +106 -0
  21. data/lib/rley/parser/gfg_earley_parser.rb +139 -0
  22. data/lib/rley/parser/gfg_parsing.rb +384 -0
  23. data/lib/rley/parser/parse_entry.rb +148 -0
  24. data/lib/rley/parser/parse_entry_set.rb +104 -0
  25. data/lib/rley/parser/parse_entry_tracker.rb +56 -0
  26. data/lib/rley/parser/parse_forest_builder.rb +229 -0
  27. data/lib/rley/parser/parse_forest_factory.rb +54 -0
  28. data/lib/rley/parser/parse_walker_factory.rb +237 -0
  29. data/lib/rley/ptree/token_range.rb +14 -1
  30. data/lib/rley/sppf/alternative_node.rb +34 -0
  31. data/lib/rley/sppf/composite_node.rb +27 -0
  32. data/lib/rley/sppf/epsilon_node.rb +27 -0
  33. data/lib/rley/sppf/leaf_node.rb +12 -0
  34. data/lib/rley/sppf/non_terminal_node.rb +38 -0
  35. data/lib/rley/sppf/parse_forest.rb +48 -0
  36. data/lib/rley/sppf/sppf_node.rb +24 -0
  37. data/lib/rley/sppf/token_node.rb +29 -0
  38. data/lib/rley/syntax/grammar_builder.rb +16 -12
  39. data/lib/rley/syntax/grm_symbol.rb +6 -0
  40. data/lib/rley/syntax/terminal.rb +5 -0
  41. data/spec/rley/gfg/call_edge_spec.rb +51 -0
  42. data/spec/rley/gfg/end_vertex_spec.rb +1 -0
  43. data/spec/rley/gfg/grm_flow_graph_spec.rb +24 -2
  44. data/spec/rley/gfg/item_vertex_spec.rb +75 -6
  45. data/spec/rley/gfg/non_terminal_vertex_spec.rb +14 -0
  46. data/spec/rley/gfg/return_edge_spec.rb +51 -0
  47. data/spec/rley/gfg/shortcut_edge_spec.rb +43 -0
  48. data/spec/rley/gfg/vertex_spec.rb +52 -37
  49. data/spec/rley/parse_forest_visitor_spec.rb +238 -0
  50. data/spec/rley/parser/dotted_item_spec.rb +29 -8
  51. data/spec/rley/parser/gfg_chart_spec.rb +138 -0
  52. data/spec/rley/parser/gfg_earley_parser_spec.rb +918 -0
  53. data/spec/rley/parser/gfg_parsing_spec.rb +565 -0
  54. data/spec/rley/parser/parse_entry_set_spec.rb +179 -0
  55. data/spec/rley/parser/parse_entry_spec.rb +208 -0
  56. data/spec/rley/parser/parse_forest_builder_spec.rb +382 -0
  57. data/spec/rley/parser/parse_forest_factory_spec.rb +81 -0
  58. data/spec/rley/parser/parse_walker_factory_spec.rb +235 -0
  59. data/spec/rley/parser/state_set_spec.rb +4 -0
  60. data/spec/rley/sppf/alternative_node_spec.rb +72 -0
  61. data/spec/rley/sppf/antecedence_graph.rb +87 -0
  62. data/spec/rley/sppf/forest_representation.rb +136 -0
  63. data/spec/rley/sppf/gfg_representation.rb +111 -0
  64. data/spec/rley/sppf/non_terminal_node_spec.rb +64 -0
  65. data/spec/rley/support/ambiguous_grammar_helper.rb +36 -36
  66. data/spec/rley/support/expectation_helper.rb +36 -0
  67. data/spec/rley/support/grammar_helper.rb +28 -0
  68. data/spec/rley/support/grammar_sppf_helper.rb +25 -0
  69. data/spec/rley/syntax/grammar_builder_spec.rb +5 -0
  70. data/spec/rley/syntax/non_terminal_spec.rb +4 -0
  71. data/spec/rley/syntax/terminal_spec.rb +4 -0
  72. metadata +58 -2
@@ -0,0 +1,148 @@
1
+ require_relative '../gfg/start_vertex'
2
+ require_relative '../gfg/end_vertex'
3
+ require_relative '../gfg/item_vertex'
4
+
5
+ module Rley # This module is used as a namespace
6
+ module Parser # This module is used as a namespace
7
+
8
+ # Responsibilities:
9
+ # - To know whether the vertex is a start, end or item vertex
10
+ # - To know the next symbol to expect
11
+ class ParseEntry
12
+ # Link to a vertex of the GFG
13
+ attr_reader(:vertex)
14
+
15
+ # Links to preceding parse entries
16
+ attr_reader(:antecedents)
17
+
18
+ # the position in the input that matches the beginning of the rhs
19
+ # of the production.
20
+ attr_reader(:origin)
21
+
22
+ def initialize(aVertex, theOrigin)
23
+ @vertex = valid_vertex(aVertex)
24
+ @origin = theOrigin
25
+ @antecedents = []
26
+ end
27
+
28
+ # Add a link to an antecedent parse entry
29
+ def add_antecedent(anAntecedent)
30
+ antecedents << anAntecedent
31
+ end
32
+
33
+ # Equality comparison. A parse entry behaves as a value object.
34
+ def ==(other)
35
+ return true if object_id == other.object_id
36
+
37
+ if (vertex == other.vertex) && (origin == other.origin)
38
+ result = true
39
+ else
40
+ result = false
41
+ end
42
+
43
+ return result
44
+ end
45
+
46
+ # Returns true iff the vertex is a start vertex (i.e. of the form: .X)
47
+ def start_entry?()
48
+ return vertex.kind_of?(GFG::StartVertex)
49
+ end
50
+
51
+ # Returns true iff the vertex is at the start of rhs
52
+ # (i.e. of the form: X => .Y
53
+ def entry_entry?()
54
+ return false unless vertex.kind_of?(GFG::ItemVertex)
55
+
56
+ return vertex.dotted_item.at_start?
57
+ end
58
+
59
+ # Returns true iff the vertex corresponds to a dotted item
60
+ # X => Y
61
+ def dotted_entry?
62
+ return vertex.kind_of?(GFG::ItemVertex)
63
+ end
64
+
65
+ # Returns true iff the vertex is at end of rhs (i.e. of the form: X => Y.)
66
+ def exit_entry?()
67
+ return vertex.complete?
68
+ end
69
+
70
+ # Returns true iff the vertex is an end vertex (i.e. of the form: X.)
71
+ def end_entry?()
72
+ return vertex.kind_of?(GFG::EndVertex)
73
+ end
74
+
75
+
76
+ # Return the symbol before the dot (if any)
77
+ def prev_symbol()
78
+ return vertex.prev_symbol
79
+ end
80
+
81
+ # Return the symbol after the dot (if any)
82
+ def next_symbol()
83
+ return vertex.next_symbol
84
+ end
85
+
86
+ # Return true if the entry has no antecedent entry
87
+ def orphan?()
88
+ return antecedents.empty?
89
+ end
90
+
91
+ =begin
92
+ # Returns true if the dot is at the end of the rhs of the production.
93
+ # In other words, the complete rhs matches the input.
94
+ def complete?()
95
+ return vertex.reduce_item?
96
+ end
97
+
98
+ # Returns true if the dot is at the start of the rhs of the production.
99
+ def predicted?()
100
+ return vertex.predicted_item?
101
+ end
102
+
103
+ # Next expected symbol in the production
104
+ def next_symbol()
105
+ return vertex.next_symbol
106
+ end
107
+
108
+ # Does this parse state have the 'other' as successor?
109
+ def precedes?(other)
110
+ return false if self == other
111
+
112
+ return false unless origin == other.origin
113
+ other_production = other.dotted_rule.production
114
+ return false unless dotted_rule.production == other_production
115
+
116
+ prev_position = other.dotted_rule.prev_position
117
+ if prev_position.nil?
118
+ result = false
119
+ else
120
+ result = dotted_rule.position == prev_position
121
+ end
122
+
123
+ return result
124
+ end
125
+ =end
126
+
127
+ # Give a String representation of itself.
128
+ # The format of the text representation is
129
+ # "format of dotted rule" + " | " + origin
130
+ # @return [String]
131
+ def to_s()
132
+ return vertex.label + " | #{origin}"
133
+ end
134
+
135
+
136
+ private
137
+
138
+ # Return the validated GFG vertex
139
+ def valid_vertex(aVertex)
140
+ fail StandardError, 'GFG vertex cannot be nil' if aVertex.nil?
141
+
142
+ return aVertex
143
+ end
144
+ end # class
145
+ end # module
146
+ end # module
147
+
148
+ # End of file
@@ -0,0 +1,104 @@
1
+ require 'forwardable' # Delegation
2
+
3
+ require_relative '../syntax/terminal'
4
+ require_relative '../syntax/non_terminal'
5
+
6
+ module Rley # This module is used as a namespace
7
+ module Parser # This module is used as a namespace
8
+ # Responsibilities:
9
+ # - To know all the parse entries in the set
10
+ class ParseEntrySet
11
+ extend Forwardable
12
+ def_delegators :entries, :empty?, :size, :first, :last, :pop, :each
13
+
14
+ # The set of parse entries
15
+ attr_reader :entries
16
+
17
+ # A Hash with pairs of the form:
18
+ # terminal symbol => [ parse entry expecting the terminal ]
19
+ #attr_reader :entries4term
20
+
21
+ # A Hash with pairs of the form:
22
+ # non terminal symbol => [ parse entry expecting the non-terminal ]
23
+ # attr_reader :entries4n_term
24
+
25
+ def initialize()
26
+ @entries = []
27
+ @entries4term = Hash.new { |hash, key| hash[key] = [] }
28
+ @entries4n_term = Hash.new { |hash, key| hash[key] = [] }
29
+ end
30
+
31
+ # Access the entry at given position
32
+ def [](index)
33
+ return entries[index]
34
+ end
35
+
36
+ def entries4term(aTerminal)
37
+ return @entries4term.fetch(aTerminal, [])
38
+ end
39
+
40
+ def entries4n_term(aNonTerminal)
41
+ return @entries4n_term.fetch(aNonTerminal, [])
42
+ end
43
+
44
+ # Append the given entry (if it isn't yet in the set)
45
+ # to the list of parse entries
46
+ # @param aState [ParseEntry] the parse entry to push.
47
+ # @return [ParseEntry] the passed parse entry it doesn't added
48
+ def push_entry(anEntry)
49
+ match = entries.find { |entry| entry == anEntry }
50
+ if match
51
+ result = match
52
+ #if entries.include?(anEntry) # TODO: check performance
53
+ # result = false
54
+ else
55
+ @entries << anEntry
56
+ expecting = anEntry.next_symbol
57
+ add_lookup4symbol(anEntry) if expecting
58
+ result = anEntry
59
+ end
60
+
61
+ return result
62
+ end
63
+
64
+
65
+ # Return an Array of Arrays of ambiguous parse entries.
66
+ def ambiguities()
67
+ complete_entries = entries.select { |entry| entry.exit_entry? }
68
+ return [] if complete_entries.size <= 1
69
+
70
+ # Group parse entries by lhs symbol and origin
71
+ groupings = complete_entries.group_by do |entry|
72
+ "#{entry.vertex.dotted_rule.lhs.object_id}"
73
+ end
74
+
75
+ # Retain the groups having more than one element.
76
+ ambiguous_groups = []
77
+ groupings.each_value do |a_group|
78
+ ambiguous_groups << a_group if a_group.size > 1
79
+ end
80
+
81
+ return ambiguous_groups
82
+ end
83
+
84
+ # The list of distinct expected terminal symbols. An expected symbol
85
+ # is on the left of a dot in a parse state of the parse set.
86
+ def expected_terminals()
87
+ return @entries4term.keys
88
+ end
89
+
90
+ private
91
+ def add_lookup4symbol(anEntry)
92
+ symb = anEntry.next_symbol
93
+ case symb
94
+ when Syntax::Terminal
95
+ @entries4term[symb] << anEntry
96
+
97
+ when Syntax::NonTerminal
98
+ @entries4n_term[symb] << anEntry
99
+ end
100
+ end
101
+ end # class
102
+ end # module
103
+ end # module
104
+ # End of file
@@ -0,0 +1,56 @@
1
+ module Rley # This module is used as a namespace
2
+ module Parser # This module is used as a namespace
3
+ # Helper class that keeps track of the parse entries used
4
+ # while a Parsing instance is constructing a parse forest.
5
+ class ParseEntryTracker
6
+ # The index of the current entry set
7
+ attr_reader(:entry_set_index)
8
+
9
+ # The current parse entry
10
+ attr_reader(:parse_entry)
11
+
12
+ # The already processed entries from current entry set
13
+ attr_reader(:processed_entries)
14
+
15
+ # Constructor.
16
+ def initialize(aEntrySetIndex)
17
+ self.entry_set_index = aEntrySetIndex
18
+ end
19
+
20
+ # Write accessor. Sets the value of the entry set index
21
+ def entry_set_index=(anIndex)
22
+ @entry_set_index = anIndex
23
+ @processed_entries = {}
24
+ end
25
+
26
+ # Write accessor. Set the given parse entry as the current one.
27
+ def parse_entry=(aParseEntry)
28
+ fail StandardError, 'Nil parse entry' if aParseEntry.nil?
29
+ @parse_entry = aParseEntry
30
+ processed_entries[parse_entry] = true
31
+ end
32
+
33
+ # Take the first provided entry that wasn't processed yet.
34
+ def select_entry(theEntrys)
35
+ a_entry = theEntrys.find { |st| !processed_entries.include?(st) }
36
+ self.parse_entry = a_entry
37
+ end
38
+
39
+ # The dotted item for the current parse entry.
40
+ def curr_dotted_item()
41
+ parse_entry.dotted_rule
42
+ end
43
+
44
+ def symbol_on_left()
45
+ return curr_dotted_item.prev_symbol
46
+ end
47
+
48
+ # Notification that one begins with the previous entry set
49
+ def to_prev_entry_set()
50
+ self.entry_set_index = entry_set_index - 1
51
+ end
52
+ end # class
53
+ end # module
54
+ end # module
55
+
56
+ # End of file
@@ -0,0 +1,229 @@
1
+ require_relative '../sppf/epsilon_node'
2
+ require_relative '../sppf/non_terminal_node'
3
+ require_relative '../sppf/alternative_node'
4
+ require_relative '../sppf/parse_forest'
5
+
6
+ module Rley # This module is used as a namespace
7
+ module Parser # This module is used as a namespace
8
+ # Builder GoF pattern. Builder pattern builds a complex object
9
+ # (say, a parse forest) from simpler objects (terminal and non-terminal
10
+ # nodes) and using a step by step approach.
11
+ class ParseForestBuilder
12
+ # Link to parse result
13
+ attr_reader(:parsing)
14
+
15
+ # Link to forest object
16
+ attr_reader(:forest)
17
+
18
+ # Link to current path
19
+ attr_reader(:curr_path)
20
+
21
+ # A hash with pairs of the form: visited parse entry => forest node
22
+ attr_reader(:entry2node)
23
+
24
+ # A hash with pairs of the form: parent end entry => path to alternative node
25
+ # This is needed for synchronizing backtracking
26
+ attr_reader(:entry2path_to_alt)
27
+
28
+ def initialize(aParsingResult)
29
+ @parsing = aParsingResult
30
+ @curr_path = []
31
+ @entry2node = {}
32
+ @entry2path_to_alt = {}
33
+ end
34
+
35
+ def receive_event(anEvent, anEntry, anIndex)
36
+ # puts "Event: #{anEvent} #{anEntry} #{anIndex}"
37
+ case anEntry.vertex
38
+ when GFG::StartVertex
39
+ process_start_entry(anEvent, anEntry, anIndex)
40
+
41
+ when GFG::ItemVertex
42
+ process_item_entry(anEvent, anEntry, anIndex)
43
+
44
+ when GFG::EndVertex
45
+ process_end_entry(anEvent, anEntry, anIndex)
46
+ else
47
+ fail NotImplementedError
48
+ end
49
+ end
50
+
51
+ # Return the current_parent node
52
+ def curr_parent()
53
+ return self.curr_path.last
54
+ end
55
+
56
+ private
57
+
58
+ def process_start_entry(anEvent, anEntry, anIndex)
59
+ self.curr_path.pop while curr_parent.kind_of?(SPPF::AlternativeNode)
60
+ self.curr_path.pop
61
+ end
62
+
63
+ def process_end_entry(anEvent, anEntry, anIndex)
64
+ case anEvent
65
+ when :visit
66
+ if curr_path.empty?
67
+ # Build parse forest with root node derived from the
68
+ # accepting parse entry.
69
+ @forest = create_forest(anEntry)
70
+ else
71
+ # if current_parent node matches the lhs non-terminal of anEntry
72
+ # set its origin to the origin of its first child (if not yet assigned)
73
+ curr_parent.range.assign(low: anEntry.origin)
74
+ @entry2node[anEntry] = self.curr_parent
75
+ if anEntry.antecedents.size > 1
76
+ # Store current path for later backtracking
77
+ # puts "Store backtrack context #{anEntry}"
78
+ # puts "path [#{curr_path.join(', ')}]"
79
+ self.entry2path_to_alt[anEntry] = curr_path.dup
80
+ curr_parent.refinement = :or
81
+
82
+ create_alternative_node(anEntry.antecedents.first)
83
+ end
84
+ end
85
+
86
+ when :backtrack
87
+ # Restore path
88
+ @curr_path = self.entry2path_to_alt[anEntry].dup
89
+ # puts "Restore path #{curr_path.join(', ')}]"
90
+ antecedent_index = curr_parent.subnodes.size
91
+ # puts "Current parent #{curr_parent.to_string(0)}"
92
+ # puts "Antecedent index #{antecedent_index}"
93
+ create_alternative_node(anEntry.antecedents[antecedent_index])
94
+
95
+ when :revisit
96
+ # Remove most recent entry in path
97
+ @curr_path.pop
98
+
99
+ # Remove also its reference in parent
100
+ curr_parent.subnodes.pop
101
+
102
+ # Retrieve the already existing node corresponding to re-visited entry
103
+ popular = @entry2node[anEntry]
104
+
105
+ # Share with parent
106
+ curr_parent.add_subnode(popular)
107
+
108
+ else
109
+ fail NotImplementedError
110
+ end
111
+ end
112
+
113
+ =begin
114
+ if it is a dotted item entry (pattern is: X => α . β):
115
+ if there is at least one symbol before the dot
116
+ if that symbol is a non-terminal:
117
+
118
+ if that symbol is a terminal # else
119
+ create a token node,
120
+ with same origin as token,
121
+ with same right extent = origin + 1
122
+ add the new node as first child of current_parent
123
+ set curr_entry_set_index to curr_entry_set_index - 1
124
+ if it is a dotted item entry with a beginning dot: # else
125
+ if current_parent node matches the lhs non-terminal of anEntry
126
+ set its origin to the origin of its first child (if not yet assigned)
127
+ remove this node from the path
128
+ =end
129
+ def process_item_entry(anEvent, anEntry, anIndex)
130
+ # Retrieve the grammar symbol before the dot (if any)
131
+ prev_symbol = anEntry.prev_symbol
132
+ case prev_symbol
133
+ when Syntax::Terminal
134
+ # create a token node,
135
+ # with same origin as token,
136
+ # with same right extent = origin + 1
137
+ # add the new node as first child of current_parent
138
+ create_token_node(anEntry, anIndex)
139
+
140
+
141
+ when Syntax::NonTerminal
142
+ # create a node with the non-terminal before the dot,
143
+ # with same right extent as curr_entry_set_index
144
+ # add the new node as first child of current_parent
145
+ # append the new node to the curr_path
146
+ range = { high: anIndex }
147
+ create_non_terminal_node(anEntry, range, prev_symbol)
148
+
149
+ when NilClass # Dot at the beginning of production
150
+ if anEntry.vertex.dotted_item.production.empty?
151
+ # Empty rhs => create an epsilon node
152
+ create_epsilon_node(anEntry, anIndex)
153
+ end
154
+ end
155
+ end
156
+
157
+ # Create an empty parse forest
158
+ def create_forest(anEntry)
159
+ full_range = { low: 0, high: parsing.chart.last_index }
160
+ root_node = create_non_terminal_node(anEntry, full_range)
161
+ return Rley::SPPF::ParseForest.new(root_node)
162
+ end
163
+
164
+
165
+ # Factory method. Build and return an SPPF non-terminal node.
166
+ def create_non_terminal_node(anEntry, aRange, nonTSymb = nil)
167
+ non_terminal = nonTSymb.nil? ? anEntry.vertex.non_terminal : nonTSymb
168
+ new_node = Rley::SPPF::NonTerminalNode.new(non_terminal, aRange)
169
+ entry2node[anEntry] = new_node
170
+ add_subnode(new_node)
171
+
172
+ return new_node
173
+ end
174
+
175
+
176
+ # Add an alternative node to the forest
177
+ def create_alternative_node(anEntry)
178
+ alternative = Rley::SPPF::AlternativeNode.new(anEntry.vertex, curr_parent.range)
179
+ add_subnode(alternative)
180
+
181
+ return alternative
182
+ end
183
+
184
+ def create_token_node(anEntry, anIndex)
185
+ token_position = anIndex - 1
186
+ curr_token = parsing.tokens[token_position]
187
+ new_node = SPPF::TokenNode.new(curr_token, token_position)
188
+ candidate = add_node_to_forest(new_node)
189
+ entry2node[anEntry] = candidate
190
+
191
+ return candidate
192
+ end
193
+
194
+
195
+ def create_epsilon_node(anEntry, anIndex)
196
+ new_node = SPPF::EpsilonNode.new(anIndex)
197
+ candidate = add_node_to_forest(new_node)
198
+ entry2node[anEntry] = candidate
199
+
200
+ return candidate
201
+ end
202
+
203
+ # Add the given node if not yet present in parse forest
204
+ def add_node_to_forest(aNode)
205
+ key_node = aNode.key
206
+ if forest.include?(key_node)
207
+ new_node = forest.key2node[key_node]
208
+ else
209
+ new_node = aNode
210
+ forest.key2node[key_node] = new_node
211
+ # puts "FOREST ADD #{key_node}"
212
+ end
213
+ add_subnode(new_node, false)
214
+
215
+ return new_node
216
+ end
217
+
218
+
219
+ # Add the given node as sub-node of current parent node
220
+ # Optionally add the node to the current path
221
+ def add_subnode(aNode, addToPath = true)
222
+ curr_parent.add_subnode(aNode) unless curr_path.empty?
223
+ self.curr_path << aNode if addToPath
224
+ end
225
+ end # class
226
+ end # module
227
+ end # module
228
+
229
+ # End of file