rley 0.2.15 → 0.3.00

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/rley/constants.rb +1 -1
  4. data/lib/rley/gfg/call_edge.rb +30 -0
  5. data/lib/rley/gfg/edge.rb +4 -0
  6. data/lib/rley/gfg/end_vertex.rb +1 -1
  7. data/lib/rley/gfg/epsilon_edge.rb +0 -4
  8. data/lib/rley/gfg/grm_flow_graph.rb +32 -7
  9. data/lib/rley/gfg/item_vertex.rb +71 -25
  10. data/lib/rley/gfg/non_terminal_vertex.rb +10 -1
  11. data/lib/rley/gfg/return_edge.rb +31 -0
  12. data/lib/rley/gfg/scan_edge.rb +2 -1
  13. data/lib/rley/gfg/shortcut_edge.rb +26 -0
  14. data/lib/rley/gfg/start_vertex.rb +2 -2
  15. data/lib/rley/gfg/vertex.rb +27 -1
  16. data/lib/rley/parse_forest_visitor.rb +115 -0
  17. data/lib/rley/parser/base_parser.rb +27 -0
  18. data/lib/rley/parser/dotted_item.rb +11 -0
  19. data/lib/rley/parser/earley_parser.rb +3 -15
  20. data/lib/rley/parser/gfg_chart.rb +106 -0
  21. data/lib/rley/parser/gfg_earley_parser.rb +139 -0
  22. data/lib/rley/parser/gfg_parsing.rb +384 -0
  23. data/lib/rley/parser/parse_entry.rb +148 -0
  24. data/lib/rley/parser/parse_entry_set.rb +104 -0
  25. data/lib/rley/parser/parse_entry_tracker.rb +56 -0
  26. data/lib/rley/parser/parse_forest_builder.rb +229 -0
  27. data/lib/rley/parser/parse_forest_factory.rb +54 -0
  28. data/lib/rley/parser/parse_walker_factory.rb +237 -0
  29. data/lib/rley/ptree/token_range.rb +14 -1
  30. data/lib/rley/sppf/alternative_node.rb +34 -0
  31. data/lib/rley/sppf/composite_node.rb +27 -0
  32. data/lib/rley/sppf/epsilon_node.rb +27 -0
  33. data/lib/rley/sppf/leaf_node.rb +12 -0
  34. data/lib/rley/sppf/non_terminal_node.rb +38 -0
  35. data/lib/rley/sppf/parse_forest.rb +48 -0
  36. data/lib/rley/sppf/sppf_node.rb +24 -0
  37. data/lib/rley/sppf/token_node.rb +29 -0
  38. data/lib/rley/syntax/grammar_builder.rb +16 -12
  39. data/lib/rley/syntax/grm_symbol.rb +6 -0
  40. data/lib/rley/syntax/terminal.rb +5 -0
  41. data/spec/rley/gfg/call_edge_spec.rb +51 -0
  42. data/spec/rley/gfg/end_vertex_spec.rb +1 -0
  43. data/spec/rley/gfg/grm_flow_graph_spec.rb +24 -2
  44. data/spec/rley/gfg/item_vertex_spec.rb +75 -6
  45. data/spec/rley/gfg/non_terminal_vertex_spec.rb +14 -0
  46. data/spec/rley/gfg/return_edge_spec.rb +51 -0
  47. data/spec/rley/gfg/shortcut_edge_spec.rb +43 -0
  48. data/spec/rley/gfg/vertex_spec.rb +52 -37
  49. data/spec/rley/parse_forest_visitor_spec.rb +238 -0
  50. data/spec/rley/parser/dotted_item_spec.rb +29 -8
  51. data/spec/rley/parser/gfg_chart_spec.rb +138 -0
  52. data/spec/rley/parser/gfg_earley_parser_spec.rb +918 -0
  53. data/spec/rley/parser/gfg_parsing_spec.rb +565 -0
  54. data/spec/rley/parser/parse_entry_set_spec.rb +179 -0
  55. data/spec/rley/parser/parse_entry_spec.rb +208 -0
  56. data/spec/rley/parser/parse_forest_builder_spec.rb +382 -0
  57. data/spec/rley/parser/parse_forest_factory_spec.rb +81 -0
  58. data/spec/rley/parser/parse_walker_factory_spec.rb +235 -0
  59. data/spec/rley/parser/state_set_spec.rb +4 -0
  60. data/spec/rley/sppf/alternative_node_spec.rb +72 -0
  61. data/spec/rley/sppf/antecedence_graph.rb +87 -0
  62. data/spec/rley/sppf/forest_representation.rb +136 -0
  63. data/spec/rley/sppf/gfg_representation.rb +111 -0
  64. data/spec/rley/sppf/non_terminal_node_spec.rb +64 -0
  65. data/spec/rley/support/ambiguous_grammar_helper.rb +36 -36
  66. data/spec/rley/support/expectation_helper.rb +36 -0
  67. data/spec/rley/support/grammar_helper.rb +28 -0
  68. data/spec/rley/support/grammar_sppf_helper.rb +25 -0
  69. data/spec/rley/syntax/grammar_builder_spec.rb +5 -0
  70. data/spec/rley/syntax/non_terminal_spec.rb +4 -0
  71. data/spec/rley/syntax/terminal_spec.rb +4 -0
  72. metadata +58 -2
@@ -0,0 +1,54 @@
1
+ require_relative 'parse_walker_factory'
2
+ require_relative 'parse_forest_builder'
3
+
4
+ module Rley # This module is used as a namespace
5
+ module Parser # This module is used as a namespace
6
+ # Utility class that helps to create a ParseForest from
7
+ # a given Parsing object.
8
+ class ParseForestFactory
9
+ # Link to Parsing object (= results of recognizer)
10
+ attr_reader(:parsing)
11
+
12
+
13
+ def initialize(aParsingResult)
14
+ @parsing = aParsingResult
15
+ end
16
+
17
+ # Factory that produces the parse forest
18
+ def build_parse_forest()
19
+ a_walker = walker(parsing)
20
+ a_builder = builder(parsing)
21
+
22
+ begin
23
+ loop do
24
+ event = a_walker.next
25
+ # puts "EVENT #{event[0]} #{event[1]}"
26
+ a_builder.receive_event(*event)
27
+ end
28
+ rescue StopIteration
29
+ # Do nothing
30
+ end
31
+
32
+ return a_builder.forest
33
+ end
34
+
35
+
36
+ private
37
+ # Create a Parsing walker, that is, an object
38
+ # that will iterate over the relevant nodes (= parsing entries)
39
+ # of a GFGParsing
40
+ def walker(aParseResult)
41
+ walker_factory = ParseWalkerFactory.new
42
+ walker = walker_factory.build_walker(aParseResult)
43
+ end
44
+
45
+ # Create a Builder, that is, an object
46
+ # that will create piece by piece the forest
47
+ def builder(aParseResult)
48
+ ParseForestBuilder.new(aParseResult)
49
+ end
50
+ end # class
51
+ end # module
52
+ end # module
53
+
54
+ # End of file
@@ -0,0 +1,237 @@
1
+ require 'set'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module Parser # This module is used as a namespace
5
+ ParseWalkerContext = Struct.new(
6
+ :curr_entry, # Parse entry currently being visited
7
+ :entry_set_index, # Sigma set index of current parse entry
8
+ :visitees, # The set of already visited parse entries
9
+ :nterm2start, # A Hash non-terminal symbol => start entry
10
+ :return_stack, # A stack of parse entries
11
+ :backtrack_points
12
+ )
13
+
14
+ WalkerBacktrackpoint = Struct.new(
15
+ :entry_set_index, # Sigma set index of current parse entry
16
+ :return_stack, # A stack of parse entries
17
+ :visitee, # The parse entry being visited
18
+ :antecedent_index
19
+ )
20
+
21
+
22
+ # A factory that creates an enumerator
23
+ # that itself walks through a given parsing graph.
24
+ # The walker yields visit events.
25
+ # Terminology warning: this class implements an external iterator
26
+ # for a given GFGParsing object. In other words, its instances are objects
27
+ # distinct for the GFGParsing.
28
+ # This is different from the internal iterators, usually implemented in Ruby
29
+ # with an each method.
30
+ # Allows to perform a backwards traversal over the relevant parse entries.
31
+ # backwards traversal means that the traversal starts from the accepting (final)
32
+ # parse entries and goes to the initial parse entry.
33
+ # Relevant parse entries are parse entries that "count" in the parse
34
+ # (i.e. they belong to a path that leads to the accepting parse entry)
35
+ class ParseWalkerFactory
36
+ # Build an Enumerator that will yield the parse entries as it
37
+ # walks backwards on the parse graph
38
+ def build_walker(aParseResult)
39
+ # Local context for the enumerator
40
+ parsing = aParseResult
41
+ ctx = init_context(aParseResult)
42
+
43
+ walker = Enumerator.new do |receiver| # 'receiver' is a Yielder
44
+ # At this point: current entry == accepting entry
45
+
46
+ loop do
47
+ event = visit_entry(ctx.curr_entry, ctx)
48
+ receiver << event unless event.nil?
49
+
50
+ if ctx.curr_entry.orphan? # No antecedent?...
51
+ if ctx.backtrack_points.empty?
52
+ break
53
+ else
54
+ receiver << use_backtrack_point(ctx)
55
+ receiver << visit_entry(ctx.curr_entry, ctx)
56
+ end
57
+ end
58
+
59
+ result = jump_to_antecedent(ctx, parsing)
60
+ # Emit detection of scan edge if any...
61
+ receiver << result[0] if result.size > 1
62
+ ctx.curr_entry = result.last
63
+ end
64
+ end
65
+
66
+ return walker
67
+ end
68
+
69
+ private
70
+ # Context factory method
71
+ def init_context(aParseResult)
72
+ context = ParseWalkerContext.new
73
+ context.entry_set_index = aParseResult.chart.last_index
74
+ context.curr_entry = aParseResult.accepting_entry
75
+ context.visitees = Set.new
76
+ context.nterm2start = {}
77
+ context.return_stack = []
78
+ context.backtrack_points = []
79
+
80
+ return context
81
+ end
82
+
83
+ # [event, entry, index, vertex]
84
+ def visit_entry(anEntry, aContext)
85
+ index = aContext.entry_set_index
86
+
87
+ if anEntry.start_entry?
88
+ aContext.nterm2start[anEntry.vertex.non_terminal] = anEntry
89
+ end
90
+
91
+ if aContext.visitees.include?(anEntry)
92
+ # multiple time visit
93
+ case anEntry.vertex
94
+ when GFG::EndVertex
95
+ # Jump to related start entry...
96
+ new_entry = aContext.nterm2start[anEntry.vertex.non_terminal]
97
+ aContext.curr_entry = new_entry
98
+ aContext.entry_set_index = new_entry.origin
99
+ event = [:revisit, anEntry, index]
100
+
101
+ when GFG::StartVertex
102
+ # Skip start entries while revisiting
103
+ event = nil
104
+
105
+ when GFG::ItemVertex
106
+ # Skip item entries while revisiting
107
+ event = nil
108
+ else
109
+ fail NotImplementedError
110
+ end
111
+ else
112
+ # first time visit
113
+ aContext.visitees << anEntry
114
+ event = [:visit, anEntry, index]
115
+ end
116
+
117
+ return event
118
+ end
119
+
120
+ def detect_scan_edge(ctx)
121
+ return nil unless aContext.curr_entry.dotted_entry?
122
+ end
123
+
124
+
125
+ # Given the current entry from context object
126
+ # Go to the parse entry that is one of its antecedent
127
+ # The context object is updated
128
+ def jump_to_antecedent(aContext, aParseResult)
129
+ entries = []
130
+ return entries if aContext.curr_entry.orphan?
131
+
132
+ if aContext.curr_entry.antecedents.size == 1
133
+ entries = antecedent_of(aContext, aParseResult)
134
+ else
135
+ entries = select_antecedent(aContext)
136
+ end
137
+
138
+ return entries
139
+ end
140
+
141
+ # Handle the case of an entry having one antecedent only
142
+ def antecedent_of(aContext, aParseResult)
143
+ new_entry = aContext.curr_entry.antecedents.first
144
+ events = [new_entry]
145
+ traversed_edge = new_entry.vertex.edges.first
146
+ case new_entry.vertex
147
+ when GFG::EndVertex
148
+ # Return edge encountered
149
+ # Push current entry onto stack
150
+ # puts "Push on return stack #{aContext.curr_entry}"
151
+ aContext.return_stack << aContext.curr_entry
152
+ else
153
+ if traversed_edge.is_a?(GFG::ScanEdge)
154
+ # Scan edge encountered, decrease sigma set index
155
+ aContext.entry_set_index -= 1
156
+ end
157
+ end
158
+
159
+ return events
160
+ end
161
+
162
+ # Handle the case of an entry having multiple antecedents
163
+ def select_antecedent(aContext)
164
+ case aContext.curr_entry.vertex
165
+ when GFG::EndVertex
166
+ # puts "Add backtrack point stack #{aContext.curr_entry}"
167
+ # An end vertex with multiple antecedents requires
168
+ # a backtrack point for a correct graph traversal
169
+ bp = add_backtrack_point(aContext)
170
+ new_entry = bp.visitee.antecedents[bp.antecedent_index]
171
+
172
+ when GFG::StartVertex
173
+ # An start vertex with multiple requires a backtrack point
174
+ new_entry = select_calling_entry(aContext)
175
+ else
176
+ fail StandardError, "Internal error"
177
+ end
178
+
179
+ return [ new_entry ]
180
+ end
181
+
182
+ def add_backtrack_point(aContext)
183
+ bp = WalkerBacktrackpoint.new
184
+
185
+ bp.entry_set_index = aContext.entry_set_index
186
+ bp.return_stack = aContext.return_stack.dup
187
+ bp.visitee = aContext.curr_entry
188
+ bp.antecedent_index = 0
189
+ aContext.backtrack_points << bp
190
+
191
+ return bp
192
+ end
193
+
194
+
195
+ def use_backtrack_point(aContext)
196
+ bp = aContext.backtrack_points.last
197
+ bp.antecedent_index += 1
198
+
199
+ # Restore state
200
+ aContext.entry_set_index = bp.entry_set_index
201
+ aContext.return_stack = bp.return_stack.dup
202
+ aContext.curr_entry = bp.visitee.antecedents[bp.antecedent_index]
203
+
204
+ # Drop backtrack point if useless in future
205
+ if bp.antecedent_index == bp.visitee.antecedents.size - 1
206
+ aContext.backtrack_points.pop
207
+ end
208
+
209
+ # Emit a backtrack event
210
+ return [:backtrack, bp.visitee, aContext.entry_set_index]
211
+ end
212
+
213
+ # From the antecedent of the current parse entry
214
+ # Retrieve the one that corresponds to the parse entry on
215
+ # top of return stack
216
+ # Observation: calling parse entry is an parse entry linked
217
+ # to a item vertex
218
+ def select_calling_entry(aContext)
219
+ # Retrieve top of stack
220
+ tos = aContext.return_stack.pop
221
+ tos_dotted_item = tos.vertex.dotted_item
222
+
223
+ antecedents = aContext.curr_entry.antecedents
224
+ new_entry = antecedents.find do |antecd|
225
+ item = antecd.vertex.dotted_item
226
+ (antecd.origin == tos.origin) && tos_dotted_item.successor_of?(item)
227
+ end
228
+
229
+ # TODO: double-check validity of next line
230
+ new_entry = aContext.curr_entry unless new_entry
231
+
232
+ # puts "Pop from return stack matching entry #{new_entry}"
233
+ return new_entry
234
+ end
235
+ end # class
236
+ end # module
237
+ end # module
@@ -1,5 +1,13 @@
1
1
  module Rley # This module is used as a namespace
2
2
  module PTree # This module is used as a namespace
3
+ # A token range (also called an extent) represents an interval
4
+ # of token positions that is matched by a given grammar symbol.
5
+ # For instance consider the expression E: 3 + 11,
6
+ # let's assume that the integer literal '3' is the fifth input token and
7
+ # that the '+' and '11' tokens are respectively at position 6 and 7;
8
+ # then the token range associated with E is [5, 7]
9
+ # While the parse tree/forest is being constructed the boundaries of the token range
10
+ # can be temporarily undefined (= set to nil)
3
11
  class TokenRange
4
12
  # The index of the lower bound of token range
5
13
  attr_reader(:low)
@@ -58,7 +66,12 @@ module Rley # This module is used as a namespace
58
66
  high_text = high.nil? ? '?' : high.to_s
59
67
 
60
68
  return "[#{low_text}, #{high_text}]"
61
- end
69
+ end
70
+
71
+ # Generate a String that represents a value-based identifier
72
+ def keystr()
73
+ return "#{low.object_id}-#{high.object_id}"
74
+ end
62
75
 
63
76
  private
64
77
 
@@ -0,0 +1,34 @@
1
+ require_relative 'composite_node'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module SPPF # This module is used as a namespace
5
+ # A node in a parse forest that is a child
6
+ # of a parent node with :or refinement
7
+ class AlternativeNode < CompositeNode
8
+
9
+ # GFG vertex label
10
+ attr_reader(:label)
11
+
12
+ # Link to lhs symbol
13
+ attr_reader(:symbol)
14
+
15
+ # @param aVertex [ItemVertex] An GFG vertex that corresponds
16
+ # a dotted item (with the dot at the end)for the alternative under
17
+ # consideration.
18
+ # @param aRange [TokenRange]
19
+ def initialize(aVertex, aRange)
20
+ super(aRange)
21
+ @label = aVertex.label
22
+ @symbol = aVertex.dotted_item.lhs
23
+ end
24
+
25
+ # Emit a (formatted) string representation of the node.
26
+ # Mainly used for diagnosis/debugging purposes.
27
+ def to_string(indentation)
28
+ return "Alt(#{label})#{range.to_string(indentation)}"
29
+ end
30
+
31
+ end # class
32
+ end # module
33
+ end # module
34
+ # End of file
@@ -0,0 +1,27 @@
1
+ require_relative 'sppf_node'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module SPPF # This module is used as a namespace
5
+ # Abstract class. The generalization for nodes that have
6
+ # children node(s).
7
+ class CompositeNode < SPPFNode
8
+ # Array of sub-nodes.
9
+ attr_reader(:subnodes)
10
+
11
+ def initialize(aRange)
12
+ super(aRange)
13
+ @subnodes = []
14
+ end
15
+
16
+
17
+ def add_subnode(aSubnode)
18
+ subnodes.unshift(aSubnode)
19
+ end
20
+
21
+ def key()
22
+ @key ||= to_string(0)
23
+ end
24
+ end # class
25
+ end # module
26
+ end # module
27
+ # End of file
@@ -0,0 +1,27 @@
1
+ require_relative 'leaf_node'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module SPPF # This module is used as a namespace
5
+ # A leaf node in a parse forest that matches an empty
6
+ # string from the input
7
+ class EpsilonNode < LeafNode
8
+
9
+ # aPosition is the position of the token in the input stream.
10
+ def initialize(aPosition)
11
+ range = {low: aPosition, high: aPosition}
12
+ super(range)
13
+ end
14
+
15
+ # Emit a (formatted) string representation of the node.
16
+ # Mainly used for diagnosis/debugging purposes.
17
+ def to_string(indentation)
18
+ return "_#{range.to_string(indentation)}"
19
+ end
20
+
21
+ def key()
22
+ @key ||= to_string(0)
23
+ end
24
+ end # class
25
+ end # module
26
+ end # module
27
+ # End of file
@@ -0,0 +1,12 @@
1
+ require_relative 'sppf_node'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module SPPF # This module is used as a namespace
5
+ # Abstract class. The generalization for nodes that don't have
6
+ # child node.
7
+ class LeafNode < SPPFNode
8
+
9
+ end # class
10
+ end # module
11
+ end # module
12
+ # End of file
@@ -0,0 +1,38 @@
1
+ require_relative 'composite_node'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module SPPF # This module is used as a namespace
5
+ # A node in a parse forest that matches exactly one
6
+ # non-terminal symbol
7
+ class NonTerminalNode < CompositeNode
8
+ # Link to the non-terminal symbol
9
+ attr_reader(:symbol)
10
+
11
+ # Indication on how the sub-nodes contribute to the 'success'
12
+ # of parent node. Possible values: :and, :or
13
+ attr_accessor :refinement
14
+
15
+ def initialize(aNonTerminal, aRange)
16
+ super(aRange)
17
+ @symbol = aNonTerminal
18
+ @refinement = :and
19
+ end
20
+
21
+ def add_subnode(aSubnode)
22
+ if refinement == :or
23
+ subnodes << aSubnode
24
+ else
25
+ super(aSubnode)
26
+ end
27
+ end
28
+
29
+ # Emit a (formatted) string representation of the node.
30
+ # Mainly used for diagnosis/debugging purposes.
31
+ def to_string(indentation)
32
+ return "#{symbol.name}#{range.to_string(indentation)}"
33
+ end
34
+
35
+ end # class
36
+ end # module
37
+ end # module
38
+ # End of file