rley 0.1.12 → 0.2.00

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,6 @@
1
1
  require_relative 'chart'
2
- require_relative '../ptree/parse_tree'
2
+ require_relative 'parse_state_tracker'
3
+ require_relative 'parse_tree_builder'
3
4
 
4
5
 
5
6
  module Rley # This module is used as a namespace
@@ -9,7 +10,7 @@ module Rley # This module is used as a namespace
9
10
 
10
11
  # The sequence of input token to parse
11
12
  attr_reader(:tokens)
12
-
13
+
13
14
  def initialize(startDottedRule, theTokens)
14
15
  @tokens = theTokens.dup
15
16
  @chart = Chart.new(startDottedRule, tokens.size)
@@ -31,49 +32,32 @@ module Rley # This module is used as a namespace
31
32
  # set state_set_index = index of last state set in chart
32
33
  # Search the completed parse state that corresponds to the full parse
33
34
  def parse_tree()
34
- state_set_index = chart.state_sets.size - 1
35
- parse_state = end_parse_state
36
- full_range = { low: 0, high: state_set_index }
37
- start_production = chart.start_dotted_rule.production
38
- ptree = PTree::ParseTree.new(start_production, full_range)
39
- return ptree if parse_state.nil?
35
+ state_tracker = new_state_tracker
36
+ builder = tree_builder(state_tracker.state_set_index)
37
+
40
38
  loop do
41
- curr_dotted_item = parse_state.dotted_rule
42
39
  # Look at the symbol on left of the dot
43
- curr_symbol = curr_dotted_item.prev_symbol
40
+ curr_symbol = state_tracker.symbol_on_left
41
+
44
42
  case curr_symbol
45
43
  when Syntax::Terminal
46
- state_set_index -= 1
47
- parse_state = predecessor_state_terminal(ptree, state_set_index,
48
- parse_state)
49
-
44
+ state_tracker.to_prev_state_set
45
+ predecessor_state_terminal(curr_symbol, state_tracker, builder)
46
+
50
47
  when Syntax::NonTerminal
51
- # Retrieve complete states with curr_symbol as lhs
52
- new_states = chart[state_set_index].states_rewriting(curr_symbol)
53
- # TODO: make this more robust
54
- parse_state = new_states[0]
55
- curr_dotted_item = parse_state.dotted_rule
56
- # Additional check
57
- if ptree.current_node.symbol != curr_dotted_item.production.lhs
58
- ptree.step_back(state_set_index)
59
- end
60
- ptree.current_node.range = { low: parse_state.origin }
61
- node_range = ptree.current_node.range
62
- ptree.add_children(curr_dotted_item.production, node_range)
63
- link_node_to_token(ptree, state_set_index - 1)
64
-
48
+ completed_state_for(curr_symbol, state_tracker, builder)
49
+
65
50
  when NilClass # No symbol on the left of dot
66
- lhs = curr_dotted_item.production.lhs
67
- new_states = states_expecting(lhs, state_set_index, true)
51
+ # Retrieve all parse states that expect the lhs
52
+ new_states = states_expecting_lhs(state_tracker)
68
53
  break if new_states.empty?
69
- # TODO: make this more robust
70
- parse_state = new_states[0]
71
- ptree.step_up(state_set_index)
72
- ptree.current_node.range = { low: parse_state.origin }
73
- break if ptree.root == ptree.current_node
54
+
55
+ select_expecting_state(new_states, state_tracker, builder)
56
+ break if builder.root == builder.current_node
74
57
  end
75
58
  end
76
- return ptree
59
+
60
+ return builder.parse_tree
77
61
  end
78
62
 
79
63
 
@@ -102,7 +86,7 @@ module Rley # This module is used as a namespace
102
86
  def scanning(aTerminal, aPosition, &nextMapping)
103
87
  curr_token = tokens[aPosition]
104
88
  return unless curr_token.terminal == aTerminal
105
-
89
+
106
90
  states = states_expecting(aTerminal, aPosition, false)
107
91
  states.each do |s|
108
92
  next_item = nextMapping.call(s.dotted_rule)
@@ -114,9 +98,9 @@ module Rley # This module is used as a namespace
114
98
 
115
99
  # This method is called when a parse state at chart entry reaches the end
116
100
  # of a production.
117
- # For every state in chart[aPosition] that is complete
101
+ # For every state in chart[aPosition] that is complete
118
102
  # (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
119
- # Find states s in chart[j] of the form
103
+ # Find states s in chart[j] of the form
120
104
  # {dotted_rule: Y -> α • X β, origin: i}
121
105
  # In other words, rules that predicted the non-terminal X.
122
106
  # For each s, add to chart[aPosition] a state of the form
@@ -137,22 +121,20 @@ module Rley # This module is used as a namespace
137
121
  def states_expecting(aTerminal, aPosition, toSort)
138
122
  expecting = chart[aPosition].states_expecting(aTerminal)
139
123
  return expecting if !toSort || expecting.size < 2
140
-
124
+
141
125
  # Put predicted states ahead
142
126
  (predicted, others) = expecting.partition { |state| state.predicted? }
143
-
127
+
144
128
  # Sort state in reverse order of their origin value
145
129
  [predicted, others].each do |set|
146
130
  set.sort! { |a,b| b.origin <=> a.origin }
147
131
  end
148
-
132
+
149
133
  return predicted + others
150
134
  end
151
-
152
- private
153
-
154
- # Retrieve full parse state.
155
- # After a successful parse, the last chart entry
135
+
136
+ # Retrieve the parse state that represents a complete, successful parse
137
+ # After a successful parse, the last chart entry
156
138
  # has a parse state that involves the start production and
157
139
  # has a dot positioned at the end of its rhs.
158
140
  def end_parse_state()
@@ -162,25 +144,80 @@ module Rley # This module is used as a namespace
162
144
  candidate_states = last_chart_entry.states_for(start_production)
163
145
  return candidate_states.find(&:complete?)
164
146
  end
147
+
148
+ private
149
+
150
+ # Factory method. Creates and initializes a ParseStateTracker instance.
151
+ def new_state_tracker()
152
+ instance = ParseStateTracker.new(chart.last_index)
153
+ instance.parse_state = end_parse_state
154
+
155
+ return instance
156
+ end
157
+
165
158
 
159
+ # A terminal symbol is on the left of dot.
166
160
  # Go to the predecessor state for the given terminal
167
- def predecessor_state_terminal(aParseTree, aStateSetIndex, current_state)
168
- aParseTree.step_back(aStateSetIndex)
169
- link_node_to_token(aParseTree, aStateSetIndex)
170
- state_set = chart[aStateSetIndex]
171
- state_set.predecessor_state(current_state)
161
+ def predecessor_state_terminal(a_symb, aStateTracker, aTreeBuilder)
162
+ aTreeBuilder.current_node.range = { low: aStateTracker.state_set_index }
163
+ link_node_to_token(aTreeBuilder, aStateTracker.state_set_index)
164
+ unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
165
+ pp aTreeBuilder.root
166
+ pp aTreeBuilder.current_node
167
+ fail StandardError, "Expected terminal node"
168
+ end
169
+ aTreeBuilder.move_back
170
+ state_set = chart[aStateTracker.state_set_index]
171
+ previous_state = state_set.predecessor_state(aStateTracker.parse_state)
172
+ aStateTracker.parse_state = previous_state
172
173
  end
174
+
175
+
176
+ # Retrieve a complete state with given symbol as lhs.
177
+ def completed_state_for(a_symb, aStateTracker, aTreeBuilder)
178
+ new_states = chart[aStateTracker.state_set_index].states_rewriting(a_symb)
179
+ aStateTracker.select_state(new_states)
180
+ aTreeBuilder.range = { high: aStateTracker.state_set_index }
181
+ aTreeBuilder.use_complete_state(aStateTracker.parse_state)
182
+ link_node_to_token(aTreeBuilder, aStateTracker.state_set_index - 1)
183
+ aTreeBuilder.move_down
184
+ end
185
+
186
+
187
+ def states_expecting_lhs(aStateTracker)
188
+ lhs = aStateTracker.curr_dotted_item.production.lhs
189
+ new_states = states_expecting(lhs, aStateTracker.state_set_index, true)
173
190
 
191
+ return new_states
192
+ end
174
193
 
194
+ def select_expecting_state(theStates, aStateTracker, aTreeBuilder)
195
+ # Select an unused parse state
196
+ aStateTracker.select_state(theStates)
197
+
198
+ aTreeBuilder.range = { low: aStateTracker.state_set_index }
199
+ aTreeBuilder.move_back
200
+ aTreeBuilder.range = { low: aStateTracker.parse_state.origin }
201
+ end
202
+
203
+
175
204
  # If the current node is a terminal node
176
205
  # then link the token to that node
177
- def link_node_to_token(aParseTree, aStateSetIndex)
178
- if aParseTree.current_node.is_a?(PTree::TerminalNode)
179
- a_node = aParseTree.current_node
206
+ def link_node_to_token(aTreeBuilder, aStateSetIndex)
207
+ if aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
208
+ a_node = aTreeBuilder.current_node
180
209
  a_node.token = tokens[aStateSetIndex] unless a_node.token
181
210
  end
182
211
  end
212
+
213
+ # Factory method. Initializes a ParseTreeBuilder object
214
+ def tree_builder(anIndex)
215
+ full_range = { low: 0, high: anIndex }
216
+ start_production = chart.start_dotted_rule.production
217
+ return ParseTreeBuilder.new(start_production, full_range)
218
+ end
183
219
 
220
+
184
221
  end # class
185
222
  end # module
186
223
  end # module
@@ -7,19 +7,11 @@ module Rley # This module is used as a namespace
7
7
  # The root node of the tree
8
8
  attr_reader(:root)
9
9
 
10
- # The path to current node
11
- attr_reader(:current_path)
12
-
13
- def initialize(aProduction, aRange)
14
- @root = NonTerminalNode.new(aProduction.lhs, aRange)
15
- @current_path = [ @root ]
16
- add_children(aProduction, aRange)
10
+ # @param theRootNode [ParseTreeNode] The root node of the parse tree.
11
+ def initialize(theRootNode)
12
+ @root = theRootNode
17
13
  end
18
14
 
19
- # Return the active node.
20
- def current_node()
21
- return current_path.last
22
- end
23
15
 
24
16
  # Part of the 'visitee' role in the Visitor design pattern.
25
17
  # A visitee is expected to accept the visit from a visitor object
@@ -33,81 +25,6 @@ module Rley # This module is used as a namespace
33
25
  aVisitor.end_visit_ptree(self)
34
26
  end
35
27
 
36
- # Add children to the current node.
37
- # The children nodes correspond to the rhs of the production.
38
- # Update the range in the children given the passed range object.
39
- # Pre-condition: the current node refers to the same (non-terminal)
40
- # symbol of the lhs of the given produiction.
41
- # @param aProduction [Production] A production rule
42
- # @param aRange [TokenRange]
43
- def add_children(aProduction, aRange)
44
- if aProduction.lhs != current_node.symbol
45
- msg = "Internal error. Expected symbol was #{aProduction.lhs} but current node is #{current_node.symbol}"
46
- fail StandardError, msg
47
- end
48
-
49
- aProduction.rhs.each do |symb|
50
- case symb
51
- when Syntax::Terminal
52
- new_node = TerminalNode.new(symb, {})
53
- when Syntax::NonTerminal
54
- new_node = NonTerminalNode.new(symb, {})
55
- end
56
-
57
- current_node.add_child(new_node)
58
- end
59
-
60
- children = current_node.children
61
- children.first.range = low_bound(aRange)
62
- children.last.range = high_bound(aRange)
63
- return if children.empty?
64
-
65
- path_increment = [children.size - 1, children.last]
66
- @current_path.concat(path_increment)
67
- end
68
-
69
- # Move the current node to the parent node.
70
- # @param _tokenPos [Fixnum] position of the matching input token
71
- def step_up(_tokenPos)
72
- current_path.pop(2)
73
- end
74
-
75
-
76
-
77
- # Move the current node to the previous sibling node.
78
- # @param tokenPos [Fixnum] position of the matching input token
79
- def step_back(tokenPos)
80
- (pos, last_node) = current_path[-2, 2]
81
- last_node.range = low_bound(low: tokenPos)
82
-
83
- return if pos <= 0
84
- current_path.pop(2)
85
- new_pos = pos - 1
86
- new_curr_node = current_path.last.children[new_pos]
87
- current_path << new_pos
88
- current_path << new_curr_node
89
- new_curr_node.range = high_bound(high: tokenPos)
90
- end
91
-
92
- private
93
-
94
- def low_bound(aRange)
95
- result = case aRange
96
- when Hash then aRange[:low]
97
- when TokenRange then aRange.low
98
- end
99
-
100
- return { low: result }
101
- end
102
-
103
- def high_bound(aRange)
104
- result = case aRange
105
- when Hash then aRange[:high]
106
- when TokenRange then aRange.high
107
- end
108
-
109
- return { high: result }
110
- end
111
28
  end # class
112
29
  end # module
113
30
  end # module
@@ -22,6 +22,8 @@ module Rley # This module is used as a namespace
22
22
  result = low == other[:low] && high == other[:high]
23
23
  when TokenRange
24
24
  result = low == other.low && high == other.high
25
+ when Array
26
+ result = low == other[0] && high == other[1]
25
27
  end
26
28
 
27
29
  return result
@@ -41,6 +41,12 @@ module Rley # This module is used as a namespace
41
41
  def non_terminals()
42
42
  return symbols.select { |s| s.kind_of?(NonTerminal) }
43
43
  end
44
+
45
+ # @return [Production] The start production of the grammar (i.e.
46
+ # the rule that specifies the syntax for the start symbol.
47
+ def start_production()
48
+ return rules[0]
49
+ end
44
50
 
45
51
  private
46
52
 
@@ -6,7 +6,7 @@ require_relative 'grammar'
6
6
 
7
7
  module Rley # This module is used as a namespace
8
8
  module Syntax # This module is used as a namespace
9
- # Builder pattern. Builder pattern builds a complex object
9
+ # Builder GoF pattern. Builder pattern builds a complex object
10
10
  # (say, a grammar) from simpler objects (terminals and productions)
11
11
  # and using a step by step approach.
12
12
  class GrammarBuilder
@@ -43,9 +43,9 @@ module Rley # Re-open the module to get rid of qualified names
43
43
  # Generated tree has the following structure:
44
44
  # S[0,5]
45
45
  # +- A[0,5]
46
- # +- a[0,0]
46
+ # +- a[0,1]
47
47
  # +- A[1,4]
48
- # | +- a[1,1]
48
+ # | +- a[1,2]
49
49
  # | +- A[2,3]
50
50
  # | | +- b[2,3]
51
51
  # | +- c[3,4]
@@ -30,9 +30,13 @@ module Rley # Open this namespace to avoid module qualifier prefixes
30
30
  expect(subject.state_sets.size).to eq(count_token + 1)
31
31
  end
32
32
 
33
- it 'should the start dotted rule' do
33
+ it 'should know the start dotted rule' do
34
34
  expect(subject.start_dotted_rule).to eq(dotted_rule)
35
35
  end
36
+
37
+ it 'should have at least one non-empty state set' do
38
+ expect(subject.last_index).to eq(0)
39
+ end
36
40
  end # context
37
41
  end # describe
38
42
  end # module
@@ -0,0 +1,179 @@
1
+ require_relative '../../spec_helper'
2
+ require_relative '../../../lib/rley/parser/token'
3
+ require_relative '../../../lib/rley/parser/earley_parser'
4
+ require_relative '../../../lib/rley/parser/parsing'
5
+ # Load the class under test
6
+ require_relative '../../../lib/rley/parser/parse_tree_builder'
7
+ require_relative '../support/grammar_abc_helper'
8
+
9
+ module Rley # Open this namespace to avoid module qualifier prefixes
10
+ module Parser # Open this namespace to avoid module qualifier prefixes
11
+ describe ParseTreeBuilder do
12
+ include GrammarABCHelper # Mix-in module with builder for grammar abc
13
+
14
+ let(:grammar_abc) do
15
+ builder = grammar_abc_builder
16
+ builder.grammar
17
+ end
18
+
19
+ let(:capital_a) { grammar_abc.name2symbol['A'] }
20
+ let(:capital_s) { grammar_abc.name2symbol['S'] }
21
+ let(:small_a) { grammar_abc.name2symbol['a'] }
22
+ let(:small_b) { grammar_abc.name2symbol['b'] }
23
+ let(:small_c) { grammar_abc.name2symbol['c'] }
24
+
25
+ let(:start_prod) { grammar_abc.start_production }
26
+
27
+ let(:tokens_abc) do
28
+ %w(a a b c c).map do |letter|
29
+ Token.new(letter, grammar_abc.name2symbol[letter])
30
+ end
31
+ end
32
+
33
+ let(:sample_parsing) do
34
+ parser = EarleyParser.new(grammar_abc)
35
+ result = parser.parse(tokens_abc)
36
+ end
37
+
38
+ subject { ParseTreeBuilder.new(start_prod, {low: 0, high: 5}) }
39
+
40
+ context 'Initialization:' do
41
+ it 'should be created with a proposition and a range' do
42
+ expect { ParseTreeBuilder.new(start_prod, {}) }.not_to raise_error
43
+ end
44
+
45
+ it 'should have a root node at start' do
46
+ expect(subject.root.symbol).to eq(capital_s)
47
+ end
48
+
49
+ it "should have current path at start" do
50
+ expect(subject.current_path).not_to be_empty
51
+ end
52
+
53
+ it "should have current node at start" do
54
+ expect(subject.current_node.symbol).to eq(capital_a)
55
+ end
56
+ end # context
57
+
58
+ context 'Adding nodes to parse tree:' do
59
+ it 'should process parse state for a non-terminal node' do
60
+ # Expectation:
61
+ # S[0, 5]
62
+ # +- A[0,5]
63
+ expect(subject.root.symbol).to eq(capital_s)
64
+ expect(subject.root.children.size).to eq(1)
65
+ child1 = subject.root.children[0]
66
+ expect(child1.symbol).to eq(capital_a)
67
+ expect(child1.range.low).to eq(0)
68
+ expect(child1.range.high).to eq(5)
69
+ expect(subject.current_node).to eq(child1)
70
+
71
+ # Add children to A
72
+ other_state = sample_parsing.chart.state_sets.last.states.first
73
+ subject.use_complete_state(other_state)
74
+
75
+ # Tree is:
76
+ # S[0,5]
77
+ # +- A[0,5]
78
+ # +- a[0, ?]
79
+ # +- A[?, ?]
80
+ # +- c[?, 5]
81
+ expect(child1.children.size).to eq(3) # a A c
82
+ %w(a A c).each_with_index do |letter, i|
83
+ grm_symbol = grammar_abc.name2symbol[letter]
84
+ expect(child1.children[i].symbol).to eq(grm_symbol)
85
+ end
86
+ expect(child1.children[0].range.low).to eq(0)
87
+ expect(child1.children[-1].range.high).to eq(5)
88
+
89
+ subject.move_down # ... to c
90
+ subject.range = {low: 4}
91
+ expect(child1.children[-1].range.low).to eq(4)
92
+ expect(child1.children.last).to eq(subject.current_node)
93
+ subject.move_back # ... to A
94
+ expect(subject.current_node).to eq(child1.children[1])
95
+ grand_child_A = subject.current_node
96
+
97
+ other_state = sample_parsing.chart.state_sets[4].first
98
+ subject.use_complete_state(other_state)
99
+ expect(grand_child_A.children.size).to eq(3) # a A c
100
+ %w(a A c).each_with_index do |letter, i|
101
+ grm_symbol = grammar_abc.name2symbol[letter]
102
+ expect(grand_child_A.children[i].symbol).to eq(grm_symbol)
103
+ end
104
+ end
105
+ end # context
106
+
107
+ context 'Moving the current node:' do
108
+ it 'should move down to last child' do
109
+ # Tree is:
110
+ # S[0,?]
111
+ # +- A[0,?]
112
+
113
+ # Add children to A
114
+ parse_state = sample_parsing.chart.state_sets.last.states.first
115
+ subject.use_complete_state(parse_state)
116
+
117
+ # Tree is:
118
+ # S[0,?]
119
+ # +- A[0,?]
120
+ # +- a[0, ?]
121
+ # +- A[?, ?]
122
+ # +- c[?, ?]
123
+ subject.move_down # ...to grand-child c
124
+ expect(subject.current_node.symbol).to eq(small_c)
125
+
126
+
127
+ subject.move_back # ...to grand-child A
128
+ expect(subject.current_node.symbol).to eq(capital_a)
129
+
130
+ # Add more children
131
+ other_state = sample_parsing.chart.state_sets[4].states.first
132
+ subject.use_complete_state(other_state)
133
+
134
+ # Tree is:
135
+ # S[0,?]
136
+ # +- A[0,?]
137
+ # +- a[0, ?]
138
+ # +- A[?, ?]
139
+ # +- a[?, ?]
140
+ # +- A[?, ?]
141
+ # +- c [?, ?]
142
+ # +- c[?, ?]
143
+
144
+ subject.move_down # ...to grand-grand-child c
145
+ expect(subject.current_node.symbol).to eq(small_c)
146
+
147
+ subject.move_back # ...to grand-grand-child A
148
+ expect(subject.current_node.symbol).to eq(capital_a)
149
+
150
+ subject.move_back # ...to grand-grand-child a
151
+ expect(subject.current_node.symbol).to eq(small_a)
152
+
153
+ subject.move_back # ...to grand-child A
154
+ expect(subject.current_node.symbol).to eq(capital_a)
155
+
156
+ subject.move_back # ...to grand-child a
157
+ expect(subject.current_node.symbol).to eq(small_a)
158
+
159
+ subject.move_back # ...to child A
160
+ expect(subject.current_node.symbol).to eq(capital_a)
161
+
162
+ subject.move_back # ...to S
163
+ expect(subject.current_node.symbol).to eq(capital_s)
164
+ end
165
+ end # context
166
+
167
+ context 'Parse tree building:' do
168
+ it 'should build a parse tree' do
169
+ expect(subject.parse_tree).to be_kind_of(PTree::ParseTree)
170
+ actual = subject.parse_tree
171
+ expect(actual.root).to eq(subject.root)
172
+ end
173
+ end # context
174
+
175
+ end # describe
176
+ end # module
177
+ end # module
178
+
179
+ # End of file