rley 0.1.12 → 0.2.00

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  require_relative 'chart'
2
- require_relative '../ptree/parse_tree'
2
+ require_relative 'parse_state_tracker'
3
+ require_relative 'parse_tree_builder'
3
4
 
4
5
 
5
6
  module Rley # This module is used as a namespace
@@ -9,7 +10,7 @@ module Rley # This module is used as a namespace
9
10
 
10
11
  # The sequence of input token to parse
11
12
  attr_reader(:tokens)
12
-
13
+
13
14
  def initialize(startDottedRule, theTokens)
14
15
  @tokens = theTokens.dup
15
16
  @chart = Chart.new(startDottedRule, tokens.size)
@@ -31,49 +32,32 @@ module Rley # This module is used as a namespace
31
32
  # set state_set_index = index of last state set in chart
32
33
  # Search the completed parse state that corresponds to the full parse
33
34
  def parse_tree()
34
- state_set_index = chart.state_sets.size - 1
35
- parse_state = end_parse_state
36
- full_range = { low: 0, high: state_set_index }
37
- start_production = chart.start_dotted_rule.production
38
- ptree = PTree::ParseTree.new(start_production, full_range)
39
- return ptree if parse_state.nil?
35
+ state_tracker = new_state_tracker
36
+ builder = tree_builder(state_tracker.state_set_index)
37
+
40
38
  loop do
41
- curr_dotted_item = parse_state.dotted_rule
42
39
  # Look at the symbol on left of the dot
43
- curr_symbol = curr_dotted_item.prev_symbol
40
+ curr_symbol = state_tracker.symbol_on_left
41
+
44
42
  case curr_symbol
45
43
  when Syntax::Terminal
46
- state_set_index -= 1
47
- parse_state = predecessor_state_terminal(ptree, state_set_index,
48
- parse_state)
49
-
44
+ state_tracker.to_prev_state_set
45
+ predecessor_state_terminal(curr_symbol, state_tracker, builder)
46
+
50
47
  when Syntax::NonTerminal
51
- # Retrieve complete states with curr_symbol as lhs
52
- new_states = chart[state_set_index].states_rewriting(curr_symbol)
53
- # TODO: make this more robust
54
- parse_state = new_states[0]
55
- curr_dotted_item = parse_state.dotted_rule
56
- # Additional check
57
- if ptree.current_node.symbol != curr_dotted_item.production.lhs
58
- ptree.step_back(state_set_index)
59
- end
60
- ptree.current_node.range = { low: parse_state.origin }
61
- node_range = ptree.current_node.range
62
- ptree.add_children(curr_dotted_item.production, node_range)
63
- link_node_to_token(ptree, state_set_index - 1)
64
-
48
+ completed_state_for(curr_symbol, state_tracker, builder)
49
+
65
50
  when NilClass # No symbol on the left of dot
66
- lhs = curr_dotted_item.production.lhs
67
- new_states = states_expecting(lhs, state_set_index, true)
51
+ # Retrieve all parse states that expect the lhs
52
+ new_states = states_expecting_lhs(state_tracker)
68
53
  break if new_states.empty?
69
- # TODO: make this more robust
70
- parse_state = new_states[0]
71
- ptree.step_up(state_set_index)
72
- ptree.current_node.range = { low: parse_state.origin }
73
- break if ptree.root == ptree.current_node
54
+
55
+ select_expecting_state(new_states, state_tracker, builder)
56
+ break if builder.root == builder.current_node
74
57
  end
75
58
  end
76
- return ptree
59
+
60
+ return builder.parse_tree
77
61
  end
78
62
 
79
63
 
@@ -102,7 +86,7 @@ module Rley # This module is used as a namespace
102
86
  def scanning(aTerminal, aPosition, &nextMapping)
103
87
  curr_token = tokens[aPosition]
104
88
  return unless curr_token.terminal == aTerminal
105
-
89
+
106
90
  states = states_expecting(aTerminal, aPosition, false)
107
91
  states.each do |s|
108
92
  next_item = nextMapping.call(s.dotted_rule)
@@ -114,9 +98,9 @@ module Rley # This module is used as a namespace
114
98
 
115
99
  # This method is called when a parse state at chart entry reaches the end
116
100
  # of a production.
117
- # For every state in chart[aPosition] that is complete
101
+ # For every state in chart[aPosition] that is complete
118
102
  # (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
119
- # Find states s in chart[j] of the form
103
+ # Find states s in chart[j] of the form
120
104
  # {dotted_rule: Y -> α • X β, origin: i}
121
105
  # In other words, rules that predicted the non-terminal X.
122
106
  # For each s, add to chart[aPosition] a state of the form
@@ -137,22 +121,20 @@ module Rley # This module is used as a namespace
137
121
  def states_expecting(aTerminal, aPosition, toSort)
138
122
  expecting = chart[aPosition].states_expecting(aTerminal)
139
123
  return expecting if !toSort || expecting.size < 2
140
-
124
+
141
125
  # Put predicted states ahead
142
126
  (predicted, others) = expecting.partition { |state| state.predicted? }
143
-
127
+
144
128
  # Sort state in reverse order of their origin value
145
129
  [predicted, others].each do |set|
146
130
  set.sort! { |a,b| b.origin <=> a.origin }
147
131
  end
148
-
132
+
149
133
  return predicted + others
150
134
  end
151
-
152
- private
153
-
154
- # Retrieve full parse state.
155
- # After a successful parse, the last chart entry
135
+
136
+ # Retrieve the parse state that represents a complete, successful parse
137
+ # After a successful parse, the last chart entry
156
138
  # has a parse state that involves the start production and
157
139
  # has a dot positioned at the end of its rhs.
158
140
  def end_parse_state()
@@ -162,25 +144,80 @@ module Rley # This module is used as a namespace
162
144
  candidate_states = last_chart_entry.states_for(start_production)
163
145
  return candidate_states.find(&:complete?)
164
146
  end
147
+
148
+ private
149
+
150
+ # Factory method. Creates and initializes a ParseStateTracker instance.
151
+ def new_state_tracker()
152
+ instance = ParseStateTracker.new(chart.last_index)
153
+ instance.parse_state = end_parse_state
154
+
155
+ return instance
156
+ end
157
+
165
158
 
159
+ # A terminal symbol is on the left of dot.
166
160
  # Go to the predecessor state for the given terminal
167
- def predecessor_state_terminal(aParseTree, aStateSetIndex, current_state)
168
- aParseTree.step_back(aStateSetIndex)
169
- link_node_to_token(aParseTree, aStateSetIndex)
170
- state_set = chart[aStateSetIndex]
171
- state_set.predecessor_state(current_state)
161
+ def predecessor_state_terminal(a_symb, aStateTracker, aTreeBuilder)
162
+ aTreeBuilder.current_node.range = { low: aStateTracker.state_set_index }
163
+ link_node_to_token(aTreeBuilder, aStateTracker.state_set_index)
164
+ unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
165
+ pp aTreeBuilder.root
166
+ pp aTreeBuilder.current_node
167
+ fail StandardError, "Expected terminal node"
168
+ end
169
+ aTreeBuilder.move_back
170
+ state_set = chart[aStateTracker.state_set_index]
171
+ previous_state = state_set.predecessor_state(aStateTracker.parse_state)
172
+ aStateTracker.parse_state = previous_state
172
173
  end
174
+
175
+
176
+ # Retrieve a complete state with given symbol as lhs.
177
+ def completed_state_for(a_symb, aStateTracker, aTreeBuilder)
178
+ new_states = chart[aStateTracker.state_set_index].states_rewriting(a_symb)
179
+ aStateTracker.select_state(new_states)
180
+ aTreeBuilder.range = { high: aStateTracker.state_set_index }
181
+ aTreeBuilder.use_complete_state(aStateTracker.parse_state)
182
+ link_node_to_token(aTreeBuilder, aStateTracker.state_set_index - 1)
183
+ aTreeBuilder.move_down
184
+ end
185
+
186
+
187
+ def states_expecting_lhs(aStateTracker)
188
+ lhs = aStateTracker.curr_dotted_item.production.lhs
189
+ new_states = states_expecting(lhs, aStateTracker.state_set_index, true)
173
190
 
191
+ return new_states
192
+ end
174
193
 
194
+ def select_expecting_state(theStates, aStateTracker, aTreeBuilder)
195
+ # Select an unused parse state
196
+ aStateTracker.select_state(theStates)
197
+
198
+ aTreeBuilder.range = { low: aStateTracker.state_set_index }
199
+ aTreeBuilder.move_back
200
+ aTreeBuilder.range = { low: aStateTracker.parse_state.origin }
201
+ end
202
+
203
+
175
204
  # If the current node is a terminal node
176
205
  # then link the token to that node
177
- def link_node_to_token(aParseTree, aStateSetIndex)
178
- if aParseTree.current_node.is_a?(PTree::TerminalNode)
179
- a_node = aParseTree.current_node
206
+ def link_node_to_token(aTreeBuilder, aStateSetIndex)
207
+ if aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
208
+ a_node = aTreeBuilder.current_node
180
209
  a_node.token = tokens[aStateSetIndex] unless a_node.token
181
210
  end
182
211
  end
212
+
213
+ # Factory method. Initializes a ParseTreeBuilder object
214
+ def tree_builder(anIndex)
215
+ full_range = { low: 0, high: anIndex }
216
+ start_production = chart.start_dotted_rule.production
217
+ return ParseTreeBuilder.new(start_production, full_range)
218
+ end
183
219
 
220
+
184
221
  end # class
185
222
  end # module
186
223
  end # module
@@ -7,19 +7,11 @@ module Rley # This module is used as a namespace
7
7
  # The root node of the tree
8
8
  attr_reader(:root)
9
9
 
10
- # The path to current node
11
- attr_reader(:current_path)
12
-
13
- def initialize(aProduction, aRange)
14
- @root = NonTerminalNode.new(aProduction.lhs, aRange)
15
- @current_path = [ @root ]
16
- add_children(aProduction, aRange)
10
+ # @param theRootNode [ParseTreeNode] The root node of the parse tree.
11
+ def initialize(theRootNode)
12
+ @root = theRootNode
17
13
  end
18
14
 
19
- # Return the active node.
20
- def current_node()
21
- return current_path.last
22
- end
23
15
 
24
16
  # Part of the 'visitee' role in the Visitor design pattern.
25
17
  # A visitee is expected to accept the visit from a visitor object
@@ -33,81 +25,6 @@ module Rley # This module is used as a namespace
33
25
  aVisitor.end_visit_ptree(self)
34
26
  end
35
27
 
36
- # Add children to the current node.
37
- # The children nodes correspond to the rhs of the production.
38
- # Update the range in the children given the passed range object.
39
- # Pre-condition: the current node refers to the same (non-terminal)
40
- # symbol of the lhs of the given produiction.
41
- # @param aProduction [Production] A production rule
42
- # @param aRange [TokenRange]
43
- def add_children(aProduction, aRange)
44
- if aProduction.lhs != current_node.symbol
45
- msg = "Internal error. Expected symbol was #{aProduction.lhs} but current node is #{current_node.symbol}"
46
- fail StandardError, msg
47
- end
48
-
49
- aProduction.rhs.each do |symb|
50
- case symb
51
- when Syntax::Terminal
52
- new_node = TerminalNode.new(symb, {})
53
- when Syntax::NonTerminal
54
- new_node = NonTerminalNode.new(symb, {})
55
- end
56
-
57
- current_node.add_child(new_node)
58
- end
59
-
60
- children = current_node.children
61
- children.first.range = low_bound(aRange)
62
- children.last.range = high_bound(aRange)
63
- return if children.empty?
64
-
65
- path_increment = [children.size - 1, children.last]
66
- @current_path.concat(path_increment)
67
- end
68
-
69
- # Move the current node to the parent node.
70
- # @param _tokenPos [Fixnum] position of the matching input token
71
- def step_up(_tokenPos)
72
- current_path.pop(2)
73
- end
74
-
75
-
76
-
77
- # Move the current node to the previous sibling node.
78
- # @param tokenPos [Fixnum] position of the matching input token
79
- def step_back(tokenPos)
80
- (pos, last_node) = current_path[-2, 2]
81
- last_node.range = low_bound(low: tokenPos)
82
-
83
- return if pos <= 0
84
- current_path.pop(2)
85
- new_pos = pos - 1
86
- new_curr_node = current_path.last.children[new_pos]
87
- current_path << new_pos
88
- current_path << new_curr_node
89
- new_curr_node.range = high_bound(high: tokenPos)
90
- end
91
-
92
- private
93
-
94
- def low_bound(aRange)
95
- result = case aRange
96
- when Hash then aRange[:low]
97
- when TokenRange then aRange.low
98
- end
99
-
100
- return { low: result }
101
- end
102
-
103
- def high_bound(aRange)
104
- result = case aRange
105
- when Hash then aRange[:high]
106
- when TokenRange then aRange.high
107
- end
108
-
109
- return { high: result }
110
- end
111
28
  end # class
112
29
  end # module
113
30
  end # module
@@ -22,6 +22,8 @@ module Rley # This module is used as a namespace
22
22
  result = low == other[:low] && high == other[:high]
23
23
  when TokenRange
24
24
  result = low == other.low && high == other.high
25
+ when Array
26
+ result = low == other[0] && high == other[1]
25
27
  end
26
28
 
27
29
  return result
@@ -41,6 +41,12 @@ module Rley # This module is used as a namespace
41
41
  def non_terminals()
42
42
  return symbols.select { |s| s.kind_of?(NonTerminal) }
43
43
  end
44
+
45
+ # @return [Production] The start production of the grammar (i.e.
46
+ # the rule that specifies the syntax for the start symbol.
47
+ def start_production()
48
+ return rules[0]
49
+ end
44
50
 
45
51
  private
46
52
 
@@ -6,7 +6,7 @@ require_relative 'grammar'
6
6
 
7
7
  module Rley # This module is used as a namespace
8
8
  module Syntax # This module is used as a namespace
9
- # Builder pattern. Builder pattern builds a complex object
9
+ # Builder GoF pattern. Builder pattern builds a complex object
10
10
  # (say, a grammar) from simpler objects (terminals and productions)
11
11
  # and using a step by step approach.
12
12
  class GrammarBuilder
@@ -43,9 +43,9 @@ module Rley # Re-open the module to get rid of qualified names
43
43
  # Generated tree has the following structure:
44
44
  # S[0,5]
45
45
  # +- A[0,5]
46
- # +- a[0,0]
46
+ # +- a[0,1]
47
47
  # +- A[1,4]
48
- # | +- a[1,1]
48
+ # | +- a[1,2]
49
49
  # | +- A[2,3]
50
50
  # | | +- b[2,3]
51
51
  # | +- c[3,4]
@@ -30,9 +30,13 @@ module Rley # Open this namespace to avoid module qualifier prefixes
30
30
  expect(subject.state_sets.size).to eq(count_token + 1)
31
31
  end
32
32
 
33
- it 'should the start dotted rule' do
33
+ it 'should know the start dotted rule' do
34
34
  expect(subject.start_dotted_rule).to eq(dotted_rule)
35
35
  end
36
+
37
+ it 'should have at least one non-empty state set' do
38
+ expect(subject.last_index).to eq(0)
39
+ end
36
40
  end # context
37
41
  end # describe
38
42
  end # module
@@ -0,0 +1,179 @@
1
+ require_relative '../../spec_helper'
2
+ require_relative '../../../lib/rley/parser/token'
3
+ require_relative '../../../lib/rley/parser/earley_parser'
4
+ require_relative '../../../lib/rley/parser/parsing'
5
+ # Load the class under test
6
+ require_relative '../../../lib/rley/parser/parse_tree_builder'
7
+ require_relative '../support/grammar_abc_helper'
8
+
9
+ module Rley # Open this namespace to avoid module qualifier prefixes
10
+ module Parser # Open this namespace to avoid module qualifier prefixes
11
+ describe ParseTreeBuilder do
12
+ include GrammarABCHelper # Mix-in module with builder for grammar abc
13
+
14
+ let(:grammar_abc) do
15
+ builder = grammar_abc_builder
16
+ builder.grammar
17
+ end
18
+
19
+ let(:capital_a) { grammar_abc.name2symbol['A'] }
20
+ let(:capital_s) { grammar_abc.name2symbol['S'] }
21
+ let(:small_a) { grammar_abc.name2symbol['a'] }
22
+ let(:small_b) { grammar_abc.name2symbol['b'] }
23
+ let(:small_c) { grammar_abc.name2symbol['c'] }
24
+
25
+ let(:start_prod) { grammar_abc.start_production }
26
+
27
+ let(:tokens_abc) do
28
+ %w(a a b c c).map do |letter|
29
+ Token.new(letter, grammar_abc.name2symbol[letter])
30
+ end
31
+ end
32
+
33
+ let(:sample_parsing) do
34
+ parser = EarleyParser.new(grammar_abc)
35
+ result = parser.parse(tokens_abc)
36
+ end
37
+
38
+ subject { ParseTreeBuilder.new(start_prod, {low: 0, high: 5}) }
39
+
40
+ context 'Initialization:' do
41
+ it 'should be created with a proposition and a range' do
42
+ expect { ParseTreeBuilder.new(start_prod, {}) }.not_to raise_error
43
+ end
44
+
45
+ it 'should have a root node at start' do
46
+ expect(subject.root.symbol).to eq(capital_s)
47
+ end
48
+
49
+ it "should have current path at start" do
50
+ expect(subject.current_path).not_to be_empty
51
+ end
52
+
53
+ it "should have current node at start" do
54
+ expect(subject.current_node.symbol).to eq(capital_a)
55
+ end
56
+ end # context
57
+
58
+ context 'Adding nodes to parse tree:' do
59
+ it 'should process parse state for a non-terminal node' do
60
+ # Expectation:
61
+ # S[0, 5]
62
+ # +- A[0,5]
63
+ expect(subject.root.symbol).to eq(capital_s)
64
+ expect(subject.root.children.size).to eq(1)
65
+ child1 = subject.root.children[0]
66
+ expect(child1.symbol).to eq(capital_a)
67
+ expect(child1.range.low).to eq(0)
68
+ expect(child1.range.high).to eq(5)
69
+ expect(subject.current_node).to eq(child1)
70
+
71
+ # Add children to A
72
+ other_state = sample_parsing.chart.state_sets.last.states.first
73
+ subject.use_complete_state(other_state)
74
+
75
+ # Tree is:
76
+ # S[0,5]
77
+ # +- A[0,5]
78
+ # +- a[0, ?]
79
+ # +- A[?, ?]
80
+ # +- c[?, 5]
81
+ expect(child1.children.size).to eq(3) # a A c
82
+ %w(a A c).each_with_index do |letter, i|
83
+ grm_symbol = grammar_abc.name2symbol[letter]
84
+ expect(child1.children[i].symbol).to eq(grm_symbol)
85
+ end
86
+ expect(child1.children[0].range.low).to eq(0)
87
+ expect(child1.children[-1].range.high).to eq(5)
88
+
89
+ subject.move_down # ... to c
90
+ subject.range = {low: 4}
91
+ expect(child1.children[-1].range.low).to eq(4)
92
+ expect(child1.children.last).to eq(subject.current_node)
93
+ subject.move_back # ... to A
94
+ expect(subject.current_node).to eq(child1.children[1])
95
+ grand_child_A = subject.current_node
96
+
97
+ other_state = sample_parsing.chart.state_sets[4].first
98
+ subject.use_complete_state(other_state)
99
+ expect(grand_child_A.children.size).to eq(3) # a A c
100
+ %w(a A c).each_with_index do |letter, i|
101
+ grm_symbol = grammar_abc.name2symbol[letter]
102
+ expect(grand_child_A.children[i].symbol).to eq(grm_symbol)
103
+ end
104
+ end
105
+ end # context
106
+
107
+ context 'Moving the current node:' do
108
+ it 'should move down to last child' do
109
+ # Tree is:
110
+ # S[0,?]
111
+ # +- A[0,?]
112
+
113
+ # Add children to A
114
+ parse_state = sample_parsing.chart.state_sets.last.states.first
115
+ subject.use_complete_state(parse_state)
116
+
117
+ # Tree is:
118
+ # S[0,?]
119
+ # +- A[0,?]
120
+ # +- a[0, ?]
121
+ # +- A[?, ?]
122
+ # +- c[?, ?]
123
+ subject.move_down # ...to grand-child c
124
+ expect(subject.current_node.symbol).to eq(small_c)
125
+
126
+
127
+ subject.move_back # ...to grand-child A
128
+ expect(subject.current_node.symbol).to eq(capital_a)
129
+
130
+ # Add more children
131
+ other_state = sample_parsing.chart.state_sets[4].states.first
132
+ subject.use_complete_state(other_state)
133
+
134
+ # Tree is:
135
+ # S[0,?]
136
+ # +- A[0,?]
137
+ # +- a[0, ?]
138
+ # +- A[?, ?]
139
+ # +- a[?, ?]
140
+ # +- A[?, ?]
141
+ # +- c [?, ?]
142
+ # +- c[?, ?]
143
+
144
+ subject.move_down # ...to grand-grand-child c
145
+ expect(subject.current_node.symbol).to eq(small_c)
146
+
147
+ subject.move_back # ...to grand-grand-child A
148
+ expect(subject.current_node.symbol).to eq(capital_a)
149
+
150
+ subject.move_back # ...to grand-grand-child a
151
+ expect(subject.current_node.symbol).to eq(small_a)
152
+
153
+ subject.move_back # ...to grand-child A
154
+ expect(subject.current_node.symbol).to eq(capital_a)
155
+
156
+ subject.move_back # ...to grand-child a
157
+ expect(subject.current_node.symbol).to eq(small_a)
158
+
159
+ subject.move_back # ...to child A
160
+ expect(subject.current_node.symbol).to eq(capital_a)
161
+
162
+ subject.move_back # ...to S
163
+ expect(subject.current_node.symbol).to eq(capital_s)
164
+ end
165
+ end # context
166
+
167
+ context 'Parse tree building:' do
168
+ it 'should build a parse tree' do
169
+ expect(subject.parse_tree).to be_kind_of(PTree::ParseTree)
170
+ actual = subject.parse_tree
171
+ expect(actual.root).to eq(subject.root)
172
+ end
173
+ end # context
174
+
175
+ end # describe
176
+ end # module
177
+ end # module
178
+
179
+ # End of file